您的位置:首页 > 其它

alsa学习--5.用ffmpeg将mp3转为wav

2016-09-01 14:11 627 查看
一. 
1. 利用fffmpeg将mp3转为pcm并在pcm数据加上wav头就是一个完整的wav文件
2. 代码

#include "utils.h"

#include <libavutil/avutil.h>

#include <libavutil/attributes.h>

#include <libavutil/opt.h>

#include <libavutil/mathematics.h>

#include <libavutil/imgutils.h>

#include <libavutil/samplefmt.h>

#include <libavutil/timestamp.h>

#include <libavformat/avformat.h>

#include <libavcodec/avcodec.h>

#include <libswscale/swscale.h>

#include <libavutil/mathematics.h>

#include <libswresample/swresample.h>

#include <libavutil/channel_layout.h>

#include <libavutil/common.h>

#include <libavformat/avio.h>

#include <libavutil/file.h>

#include <libswresample/swresample.h>

#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000

 //下面这四个结构体是为了分析wav头的

typedef struct {

    u_int magic;      /* 'RIFF' */

    u_int length;     /* filelen */

    u_int type;       /* 'WAVE' */

} WaveHeader;

typedef struct {

    u_short format;       /* see WAV_FMT_* */

    u_short channels;

    u_int sample_fq;      /* frequence of sample */

    u_int byte_p_sec;

    u_short byte_p_spl;   /* samplesize; 1 or 2
bytes */

    u_short bit_p_spl;    /* 8, 12 or 16
bit */

} WaveFmtBody;

typedef struct {

    u_int type;        /* 'data' */

    u_int length;      /* samplecount */

} WaveChunkHeader;

#define COMPOSE_ID(a,b,c,d) ((a) | ((b)<<8) | ((c)<<16) | ((d)<<24))

#define WAV_RIFF COMPOSE_ID('R','I','F','F')

#define WAV_WAVE COMPOSE_ID('W','A','V','E')

#define WAV_FMT COMPOSE_ID('f','m','t',' ')

#define WAV_DATA COMPOSE_ID('d','a','t','a')

int insert_wave_header(FILE* fp, long
data_len)

{

    int len; 

    WaveHeader* header;

    WaveChunkHeader* chunk;

    WaveFmtBody* body;

    

    fseek(fp, 0, SEEK_SET);       
//写到wav文件的开始处

    

    len = sizeof(WaveHeader)+sizeof(WaveFmtBody)+sizeof(WaveChunkHeader)*2;

    char* buf = (char*)malloc(len);

    header = (WaveHeader*)buf;

    header->magic = WAV_RIFF;

    header->length = data_len + sizeof(WaveFmtBody)+sizeof(WaveChunkHeader)*2 + 4;

    header->type = WAV_WAVE;

   

    chunk = buf+sizeof(WaveHeader);

    chunk->type = WAV_FMT;

    chunk->length = 16;

    body = buf+sizeof(WaveHeader)+sizeof(WaveChunkHeader);

    body->format = (u_short)0x0001;     
//编码方式为pcm

    body->channels = (u_short)0x02;     
//声道数为2

    body->sample_fq = 44100;            
//采样频率为44.1k 

    body->byte_p_sec = 176400;          
//每秒所需字节数 44100*2*2=采样频率*声道*采样位数 

    body->byte_p_spl = (u_short)0x4;    
//对齐无意义

    body->bit_p_spl = (u_short)16;      
//采样位数16bit=2Byte

    chunk = buf+sizeof(WaveHeader)+sizeof(WaveChunkHeader)+sizeof(WaveFmtBody);

    chunk->type = WAV_DATA;

    chunk->length = data_len;

    

    fwrite(buf, 1, len, fp);

    free(buf);

    return 0;

}

typedef struct {

    int videoindex;

    int sndindex;

    AVFormatContext* pFormatCtx;

    AVCodecContext* sndCodecCtx;

    AVCodec* sndCodec;

    SwrContext *swr_ctx;

    DECLARE_ALIGNED(16,uint8_t,audio_buf) [AVCODEC_MAX_AUDIO_FRAME_SIZE * 4];

}AudioState;

int init_ffmpeg(AudioState* is, char* filepath)

{

    int i=0;

    int ret;

    is->sndindex = -1;

    if(NULL == filepath)

    {

        dbmsg("input file is NULL");

        return -1;

    }

    avcodec_register_all();

    avfilter_register_all();

    av_register_all();

    is->pFormatCtx = avformat_alloc_context();

    if(avformat_open_input(&is->pFormatCtx, filepath, NULL, NULL)!=0)

        return -1;

    if(avformat_find_stream_info(is->pFormatCtx, NULL)<0)

        return -1;

    av_dump_format(is->pFormatCtx,0, 0, 0);

    is->videoindex = av_find_best_stream(is->pFormatCtx, AVMEDIA_TYPE_VIDEO, is->videoindex, -1, NULL, 0); 

    is->sndindex = av_find_best_stream(is->pFormatCtx, AVMEDIA_TYPE_AUDIO,is->sndindex, is->videoindex, NULL, 0);

    dbmsg("videoindex=%d, sndindex=%d", is->videoindex, is->sndindex);

    if(is->sndindex != -1)

    {

        is->sndCodecCtx = is->pFormatCtx->streams[is->sndindex]->codec;

        is->sndCodec = avcodec_find_decoder(is->sndCodecCtx->codec_id);

        if(is->sndCodec == NULL)

        {

            dbmsg("Codec not found");

            return -1;

        }

        if(avcodec_open2(is->sndCodecCtx, is->sndCodec, NULL) < 0)

            return -1;

    }

    return 0;

}

int main(int argc, char **argv)

{

    int ret;

    FILE* fp; 

    int file_data_size = 0;                //这儿注意一个问题: 变量用时一定要初始化,否则会出现异常

    int len1,len2, data_size, got_frame;

    AVPacket *packet = av_mallocz(sizeof(AVPacket));

    AVFrame *frame = av_frame_alloc();

    AudioState* is = (AudioState*) av_mallocz(sizeof(AudioState));

    uint8_t *out[] = { is->audio_buf };

    fp = fopen("./test.wav", "wb+");

    len1 = sizeof(WaveHeader)+sizeof(WaveFmtBody)+sizeof(WaveChunkHeader)*2;

    fseek(fp,len1, SEEK_SET);      //在写之前先预留出wav的header,即44个字节

    dbmsg("len1=%d",len1);

     

    //第1步初始化ffmpeg,并用ffmpeg解码,最后转为pcm格式

    if( (ret=init_ffmpeg(is, argv[1])) != 0)            //1.1
初始化ffmpeg

    {

        dbmsg("init_ffmpeg error");

        return -1;

    }

    while( (av_read_frame(is->pFormatCtx, packet)>=0) )    //1.2
循环读取mp3文件中的数据帧

    { 

        if(packet->stream_index != is->sndindex)

            continue;

        if((ret=avcodec_decode_audio4(is->sndCodecCtx, frame, &got_frame, packet)) < 0) //1.3
解码数据帧

        {

            dbmsg("file eof");

            break;

        }

        if(got_frame <= 0) /* No
data yet, get more frames */

            continue;

        data_size = av_samples_get_buffer_size(NULL, is->sndCodecCtx->channels, frame->nb_samples, is->sndCodecCtx->sample_fmt, 1);

        //1.4下面将ffmpeg解码后的数据帧转为我们需要的数据(关于"需要的数据"下面有解释)

        if(NULL==is->swr_ctx)

        {

            if(is->swr_ctx != NULL)

                swr_free(&is->swr_ctx);

            dbmsg("frame: channnels=%d,format=%d, sample_rate=%d", frame->channels, frame->format, frame->sample_rate);

            is->swr_ctx = swr_alloc_set_opts(NULL, AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_S16, 44100, av_get_default_channel_layout(frame->channels), frame->format, frame->sample_rate, 0, NULL);

            if(is->swr_ctx == NULL)

            {

                dbmsg("swr_ctx == NULL");

            }

            swr_init(is->swr_ctx);

        }

        len2 = swr_convert(is->swr_ctx, out, 44100,(const uint8_t **)frame->extended_data, frame->nb_samples);

        file_data_size += len2;

        //1.5 数据格式转换完成后就写到文件中 

        fwrite((short *)is->audio_buf, sizeof(short), (size_t) len2* 2, fp);

    }

    file_data_size *= 4;

    dbmsg("file_data_size=%d", file_data_size);

    //第2步添加上wav的头

    ret = insert_wave_header(fp, file_data_size);

    av_free_packet(packet);

    av_free(frame);

    avcodec_close(is->sndCodecCtx);

    avformat_close_input(&is->pFormatCtx);

    fclose(fp);

    return 0;

}

2.运行结果

cong@msi:/work/ffmpeg/test/alsa/testalsa/5mp3towav$
make run

export LD_LIBRARY_PATH=/work/ffmpeg/out/lib/ \

    && ./mp3towav /work/ffmpeg/test/resource//test.mp3

mp3towav.c:main[150]: len1=44

[mp3 @ 0x14d3620] Skipping 0 bytes of junk at 197687.

libavutil/crc.c:av_crc_init[313]: 

[mp3 @ 0x14d3620] Estimating duration from bitrate, this may be inaccurate

Input #0, mp3, from '(null)':

  Metadata:

    artist : 佚名

    title : 法国国歌 马赛曲

    TYER : 2013-10-26

  Duration: 00:03:28.20, start: 0.000000, bitrate: 199
kb/s

    Stream #0:0: Audio: mp3, 44100
Hz, stereo, s16p, 192 kb/s

    Stream #0:1: Video: mjpeg, yuvj420p(pc, bt470bg/unknown/unknown), 600x600 [SAR
1:1 DAR 1:1], 90k
tbr, 90k tbn, 90k tbc

    Metadata:

      title : e

      comment : Cover (front)

mp3towav.c:init_ffmpeg[120]: videoindex=-1381258232, sndindex=0

mp3towav.c:main[173]: frame: channnels=2,format=6, sample_rate=44100

mp3towav.c:main[186]: file_data_size=36725760

ls查看

cong@msi:/work/ffmpeg/test/alsa/testalsa/5mp3towav$
ls -l

total 36064

-rw-rw-r-- 1
cong cong 885 Sep 11 11:25 Makefile

-rwxrwxr-x 1 cong cong 64126 Sep 11 11:44 mp3towav

-rw-rw-r-- 1
cong cong 6183 Sep 11 11:24 mp3towav.c

-rw-rw-r-- 1
cong cong 115344 Sep 11 11:44 mp3towav.o

-rw-rw-r-- 1
cong cong 36725804 Sep 11 11:44 test.wav

-rw-rw-r-- 1
cong cong 333 Sep 9 11:31 utils.h

3. 说明
mp3towav.c:main[173]: AV_CH_LAYOUT_STEREO=3, AV_SAMPLE_FMT_S16=1, freq=44100
mp3towav.c:main[174]: frame: channnels=2, default_layout=3, format=6, sample_rate=44100

ffmpeg中:include/libavutil/samplefmt.h

enum AVSampleFormat {

    AV_SAMPLE_FMT_NONE = -1, 

    AV_SAMPLE_FMT_U8, ///< unsigned
8 bits

    AV_SAMPLE_FMT_S16, ///< signed
16 bits    --> 1 这个是pcm的数据格式

    AV_SAMPLE_FMT_S32, ///< signed
32 bits

    AV_SAMPLE_FMT_FLT, ///< float

    AV_SAMPLE_FMT_DBL, ///< double

    AV_SAMPLE_FMT_U8P, ///< unsigned
8 bits, planar

    AV_SAMPLE_FMT_S16P, ///< signed
16 bits, planar  -->6
这个是ffmepg解码之后的数据格式

    AV_SAMPLE_FMT_S32P, ///< signed
32 bits, planar

    AV_SAMPLE_FMT_FLTP, ///< float, planar

    AV_SAMPLE_FMT_DBLP, ///< double, planar

    AV_SAMPLE_FMT_NB ///< Number
of sample formats. DO NOT USE if linking
dynamically

};

interleaved -->理解为交叉存取  --> AV_SAMPLE_FMT_S16是两个声道的声音是交叉存储的
plannar--> 理解为平面存取       --> AV_SAMPLE_FMT_S16P是先存1个声道的数据再存另一个声道的数据

AV_SAMPLE_FMT_S16P is planar signed 16 bit audio, i.e. 2 bytes for each sample which is same for AV_SAMPLE_FMT_S16.

The only difference is in AV_SAMPLE_FMT_S16 samples of each channel are interleaved i.e. if you have two channel audio then the samples buffer will look like

c1 c1 c2 c2 c1 c1 c2 c2...                        -->AV_SAMPLE_FMT_S16的数据组织方式

where c1 is a sample for channel1 and c2 is sample for channel2.

while for one frame of planar audio you will have something like

c1 c1 c1 c1 .... c2 c2 c2 c2 ..                -->AV_SAMPLE_FMT_S16P的数据组织方式

now how is it stored in AVFrame:

for planar audio:

data[i] will contain the data of channel i (assuming channel 0 is first channel).

however if you have more channels then 8 then data for rest of the channels can be found in extended_data attribute of AVFrame.

for non-planar audio

data[0] will contain the data for all channels in an interleaved manner.

参考文章:
What is the difference between AV_SAMPLE_FMT_S16P and AV_SAMPLE_FMT_S16? http://stackoverflow.com/questions/18888986/what-is-the-difference-between-av-sample-fmt-s16p-and-av-sample-fmt-s16 4. 代码打包 


5mp3towav.rar
(下载后改名为5mp3towav.tar.gz)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: