您的位置:首页 > 其它

MP4音频解码信息(转帖加注释)

2012-01-29 11:05 363 查看
http://blog.csdn.net/linzhiji/article/details/5840031

注释:

1。3gp和MP4中的AAC的私有数据保存在esds的0x05标签的数据,

结构为 05 + 长度 + 内容。

将长度赋值给 extradatasize

将内容赋值给 extradata

长度的计算函数在ffmpeg中的static int mp4_read_descr_len(ByteIOContext *pb)

2。avc/h264的extradata和extradata信息在avcc atom中,将avcc atom去掉type和长度(8个字节)后的长度赋予extradatasize,内容赋值给extradata.

MP4文件格式分为头部和数据两部分,头部是由许多被称作Atom的结构单元嵌套或排列而成,数据部分则完全为实际数据不包含元信息,因此具体解码时音视频帧的位置和大小都要在头部获取。详细内容见以下链接:

http://wqyuwss.52rd.net

这里总结下音频解码信息获取的一些经验,当然详细内容需要查看quick time file format的文档。

MP4的音频解码信息保存在如下嵌套的Atom中,{moov{mdia{minf{smhd{stbl{stsd}}}}}}

stsd可能包括多个音频信息的描述,结构如下:

typedef struct stsdtable

{

unsigned int size;//Atom大小

char format[4];//音频编码格式

int res1;

int ref;

short version;//版本

short pad1;

int pad2;

short channels;//声道

short bitspersample;

short compress_id;

short res2;

short samplerate1;//采样率

short samplerate2;

//{if(version==1)

int sampleperpacket;

int bytesperpacket;

int bytesperframe;

int bytespersample;

//}

} stsdtable;


其中format对应音频编码格式:

PCM_S32BE, in32

PCM_S32LE, in32

PCM_S24BE, in24

PCM_S24LE, in24

PCM_S16BE, twos // 16 bits //

PCM_S16LE, sowt //

PCM_S16LE, lpcm

PCM_F32BE, fl32

PCM_F64BE, fl64

PCM_S8, sowt

PCM_U8, raw // 8 bits unsigned

PCM_U8, NONE // uncompressed

PCM_MULAW, ulaw //

PCM_ALAW, alaw //

ADPCM_IMA_QT, ima4 // IMA-4 ADPCM //

MACE3, MAC3 // Macintosh Audio Compression and Expansion 3:1 ///

MACE6, MAC6 // Macintosh Audio Compression and Expansion 6:1 //

MP3, .mp3 // MPEG layer 3 */ /* sample files at http://www.3ivx.com/showcase.html use this tag //

MP3, 0x6D730055 // MPEG layer 3 //

OGG_VORBIS, OggS //// sample files at http://heroinewarrior.com/xmovie.php3 use this tag //

AAC, mp4a // MPEG-4 AAC //

AC3, ac-3 // ETSI TS 102 366 Annex F //

AMR_NB, samr // AMR-NB 3gp //

AMR_WB, sawb // AMR-WB 3gp//

GSM, agsm

ALAC, alac // Apple Lossless //

QCELP, Qclp

QCELP, sqcp // ISO Media fourcc //

QDM2, QDM2 // QDM2 //

DVAUDIO, vdva

DVAUDIO, dvca

WMAV2, WMA2

这个获取比较简单,下面是解码私有数据的获取:

这些解码私有数据也保存在Atom中,通常在上面结构体的后面,有esds、frma、mp4a、wave。AAC的私有数据保存在esds的0x05标签的数据,QDM2的则是"wave"Atom的数据部分(以下按顺序分析):

4字节 长度

4字节 "esds" or "m4ds" 标志

4字节 版本标识

1字节 ES描述类型标签 0x03

--3字节 扩展描述类型标签 可能没有

1字节 描述类型长度

2字节 ES ID

1字节 流优先级

1字节 解码配置描述类型标签 0x04

--3字节 扩展描述类型标签 可能没有

1字节 描述类型长度

1字节 描述对象ID

1字节

3字节

4字节

4字节

1字节 解码配置描述类型标签 0x05

--3字节 扩展描述类型标签 可能没有

1字节 长度

1字节 0x06

0x06不再分析

下面是一个例子:

长度 标签

00015218h: 00 00 00 10 73 6D 68 64 00 00 00 00 00 00 00 00 ; ....smhd........

00015228h: 00 00 00 24 64 69 6E 66 00 00 00 1C 64 72 65 66 ; ...$dinf....dref

00015238h: 00 00 00 00 00 00 00 01 00 00 00 0C 75 72 6C 20 ; ............url

00015248h: 00 00 00 01 00 02 C0 97 73 74 62 6C 00 00 00 5B ; ......罈stbl...[

00015258h: 73 74 73 64 00 00 00 00 00 00 00 01 00 00 00 4B ; stsd...........K

00015268h: 6D 70 34 61 00 00 00 00 00 00 00 01 00 00 00 00 ; mp4a............

00015278h: 00 00 00 00 00 01 00 10 00 00 00 00 7D 00 00 00 ; ............}...

00015288h: 00 00 00 27 65 73 64 73 00 00 00 00 03 19 00 00 ; ...'esds........

00015298h: 00 04 11 40 15 00 00 D2 00 00 BB 88 00 00 7D 00 ; ...@...?.粓..}.

000152a8h: 05 02 12 88 06 01 02 ; ...?..

0x12 0x88即私有数据(对应ffmpeg中AVCodecContext.extradata)

下面是mp4音频部分分析的代码:

//MP4Analyze.h

#define uint8_t unsigned char

/******atom tag*******/

uint8_t moov[] = "moov";

uint8_t trak[] = "trak";

uint8_t mdia[] = "mdia";

uint8_t minf[] = "minf";

uint8_t stbl[] = "stbl";

uint8_t stsd[] = "stsd";

uint8_t stsc[] = "stsc";

uint8_t stsz[] = "stsz";

uint8_t stco[] = "stco";

uint8_t ftyp[] = "ftyp";

uint8_t mdat[] = "mdat";

typedef struct Atom

{

unsigned int size;

uint8_t tag[4];

int ver_flag;

unsigned int num_of_entries;

unsigned int pos;

uint8_t *data;

} Atom;

/****audio format****/

uint8_t kmp3[] = {0x6D,0x73,0x00,0x55};

uint8_t fmp3[] = ".mp3";

uint8_t raw[] = "raw ";

uint8_t wave[] = "wave";

uint8_t mp4a[] = "mp4a";

uint8_t enca[] = "enca";//encrypted to ISO/IEC 14496-12 or 3GPP standards

uint8_t smar[] = "smar";//encoded to 3GPP GSM 6.10 AMR narrowband standards

uint8_t sawb[] = "sawb";//encoded to 3GPP GSM 6.10 AMR wideband standards

uint8_t m4ds[] = "m4ds";//encoded to ISO/IEC 14496-10 AVC standards

uint8_t esds[] = "esds";

uint8_t fram[] = "fram";

/*** We may not need these ***/

#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))

typedef struct AVCodecTag {

int id;

unsigned int tag;

} AVCodecTag;

typedef struct stsdtable

{

unsigned int size;

char format[4];

int res1;

int ref;

short version;

short pad1;

int pad2;

short channels;

short bitspersample;

short compress_id;

short res2;

short samplerate1;

short samplerate2;

//{if(version==1)

int sampleperpacket;

int bytesperpacket;

int bytesperframe;

int bytespersample;

//}

} stsdtable;

/***** result is stored here ******/

typedef struct sampletable

{

unsigned int size;

unsigned int id_of_sd;

} sampletable;

//MP4Analyze.cpp

#include "MP4Analyze.h"

#include <vector>

#include <map>

#include <iostream>

#include <string>

#ifdef WIN32

#include <winsock2.h>

#pragma comment(lib, "Ws2_32.lib")

#pragma warning (disable:4786)

#endif

#ifdef __GNUG__

#include <netinet/in.h>

#endif

using namespace std;

/**

*** mp4存在宽度为8字节的wide atom tag,需要注意,这里暂未考虑

**/

/*

* check if a mov/mp4/3gp type

*/

int check_format(uint8_t *data, int size)

{

if(strncmp((char*)moov,(char*)(data+4),4)==0 ||

strncmp((char*)ftyp,(char*)(data+4),4)==0 ||strncmp((char*)mdat,(char*)(data+4),4)==0 )

return 0;

return -1;

}

unsigned int get_size(const uint8_t *data,int size)

{

unsigned int tmp = 0;

for(int i=0; i<size; ++i)

{

tmp <<= 8;

tmp += *data++;

}

return tmp;

}

/* if found,return the offset from the data[0]*/

int seek_tag(uint8_t tag[],uint8_t *data, unsigned int size1,uint8_t **pos,unsigned int *size2)

{

if(data == NULL || size1 == 0)

return -1;

unsigned int tag_size = get_size(data,4);

if(tag_size >size1 + 8)

return -1;

unsigned int tmp = 0;

while(strncmp((char*)data+4,(char*)tag,4) != 0)

{

//    printf("%s/n",data+4);

if(tag_size==0)

return -1;

if(tag_size < size1 + 8)

{

data += tag_size;

tmp += tag_size;

}

else

return -1;

tag_size = get_size(data,4);

}

printf("find :%c%c%c%c/n",tag[0],tag[1],tag[2],tag[3]);

if(tmp + tag_size > size1 )

printf("warning: the atom may be not complete!/n");

*pos = data+8;

*size2 = tag_size -8;

return tmp;

}

/*** elementary stream descriptor analyse ***/

/*

unsigned int codec_get_tag(const AVCodecTag *tags, int id)

{

while (tags->id != CODEC_ID_NONE) {

if (tags->id == id)

return tags->tag;

tags++;

}

return 0;

}

/* may not need analyse

int esds_analyze(uint8_t *data, unsigned int size)

{

return 0;

}

*/

/*version == 2 ??? reffer to ffmpeg source mov.c line 943

if (format == MKTAG('l','p','c','m'))

st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample, flags);

*/

vector<stsdtable>& get_audio_info(uint8_t *data, unsigned int size, vector<stsdtable>& stable)//stsd

{

uint8_t * datapos = data;

Atom *stsd_audio =(Atom *)data;

int tmp_size = 16;

printf("size : %u/n",ntohl(stsd_audio->size));

printf("num_entr: %u/n",ntohl(stsd_audio->num_of_entries));

for(int i=0; i < ntohl(stsd_audio->num_of_entries); ++i)

{

if(tmp_size > size)//注意

return stable;

datapos += tmp_size;

stsdtable * audio_entry = (stsdtable *)(datapos);

stable.push_back(*audio_entry);//这里存入的是网络序的数据,使用时需进行转换

tmp_size += ntohl(audio_entry->size);

/***************/

printf("--tablesize: %d/n",ntohl(audio_entry->size));

printf("--format : %s/n",audio_entry->format);

printf("--version : %d/n",ntohs(audio_entry->version));

printf("--channels: %d/n",ntohs(audio_entry->channels));

printf("--bitpersam: %d/n",ntohs(audio_entry->bitspersample));

printf("--IDcompress: %d/n",ntohs(audio_entry->compress_id));

printf("--samplerate: %d.%d/n",ntohs(audio_entry->samplerate1),ntohs(audio_entry->samplerate2));

/**************/

tmp_size = sizeof(stsdtable);

if(ntohs(audio_entry->version)==0)

{

tmp_size -= 16;

}

datapos += tmp_size;

//if(ntohs(audio_entry->compress_id)==-2)//此处尚需考证

if(ntohl(audio_entry->size) > sizeof(stsdtable))

{

printf("----atom size:%d/n",get_size(datapos,4));

printf("----atom name:%c%c%c%c/n",datapos[4],datapos[5],datapos[6],datapos[7]);

if(strncmp((char*)datapos,(char*)esds,4)==0)

{

//handle esds

}

}

}

return stable;

}

map<unsigned int,sampletable> & get_packet_offset(uint8_t *STBL[], map<unsigned int,sampletable>& table)

{

//table.insert(pair<long,sampletable>(1,sample));

unsigned int num_sam_to_chunk = get_size(STBL[0]-4,4);//stsc

unsigned int num_sample = get_size(STBL[1]-4,4);//stsz

unsigned int num_chunk = get_size(STBL[2]-4,4);//stco

unsigned int chunk_index = 0;

unsigned int next_chunk_index = 0;

uint8_t *cur_sam_to_chunk = STBL[0];

uint8_t *cur_sam_size = STBL[1];

uint8_t *cur_chunk_offset = STBL[2];

sampletable sample;

printf("number of stsc entries:%d /nnumber of sample size:%d /nnumber of chunk offset:%d/n",num_sam_to_chunk,num_sample,num_chunk);

for(unsigned int i = 0; i < num_sam_to_chunk; ++i)//对所有的entries

{

chunk_index = get_size(cur_sam_to_chunk,4);

next_chunk_index = get_size(cur_sam_to_chunk+12,4);

sample.id_of_sd = get_size(cur_sam_to_chunk+8,4);

if(i == num_sam_to_chunk -1)//最后一个

{

next_chunk_index = num_chunk+1;

}

printf("chunk_index:(%d---%d)/n",chunk_index,next_chunk_index);

for(unsigned int k=chunk_index; k < next_chunk_index; ++k)//当前chunk序号到下一个chunk序号之间的chunk

{//处理所有重复的chunk

printf("chunk_index:%d sample num:%d/n",chunk_index,get_size(cur_sam_to_chunk+4,4));

unsigned int offset = get_size(cur_chunk_offset+(chunk_index-1)*4,4);

for(unsigned int j=0; j < get_size(cur_sam_to_chunk+4,4); ++j)//chunk内地sample数目

{//处理该chunk中的sample

sample.size = get_size(cur_sam_size,4);

printf("--sample offset:%d %x size:%d/n",offset,offset,sample.size);

table.insert(pair<unsigned int,sampletable>(offset,sample));

offset = offset + sample.size;

cur_sam_size += 4;

}

system("pause");

chunk_index++;

}

cur_sam_to_chunk += 12;

}

return table;

}

int seek_audio_atom( uint8_t *data1, unsigned int size1)

{

uint8_t tag[] = "mdiaminfsmhd";

uint8_t *datapos;

unsigned int tag_size;

uint8_t *data;

unsigned int size;

int offset_of_atom = 0;

if((offset_of_atom = seek_tag(moov, data1, size1, &data, &size)) == -1)

return -1;

if(offset_of_atom + size >size1)

{ //some handles

printf("moov atom is not complete,need more data");

}

data1 = data;

size1 = size;

uint8_t *nexttrak = data;

unsigned int traksize = size;

int i=0;

while(1)

{

printf("-----/n");

if(seek_tag(trak, nexttrak, traksize, &datapos, &tag_size) != -1)

{

nexttrak = datapos + tag_size;

if(size1 < (nexttrak - data1))

return -1;

traksize = size1 - (nexttrak - data1);

data = datapos;

size = tag_size;

}

else

{

return -1;

}

i=0;

while(i<3)

{

if(seek_tag(tag+i*4, data, size, &datapos, &tag_size) != -1)

{

if(i==2)

break;

data = datapos;

size = tag_size;

++i;

}

else

{

break;

}

}

if(strncmp("smhd",(char*)(datapos-4),4) == 0)

{

if(seek_tag(stbl, data, size, &datapos, &tag_size)!= -1)

{

printf("—find audio stbl—!/n");

data = datapos;

size = tag_size;

if(seek_tag(stsd, data, size, &datapos, &tag_size) != -1)

{

vector<stsdtable> stable; //音频信息

get_audio_info(datapos-8, tag_size,stable);

}

uint8_t *STBL[3] ={NULL,NULL,NULL};//

uint8_t *datapos1;

unsigned int tag_size1;//

if(seek_tag(stsc, data, size, &datapos1, &tag_size1) != -1)

{

STBL[0] = datapos1 + 8;

}

uint8_t *datapos2;

unsigned int tag_size2;

if(seek_tag(stsz, data, size, &datapos2, &tag_size2) != -1)

{

STBL[1] = datapos2 + 12;

}

uint8_t *datapos3;

unsigned int tag_size3;

if(seek_tag(stco, data, size, &datapos3, &tag_size3) != -1)

{

STBL[2] = datapos3 + 8;

}

if(STBL[0] && STBL[1] && STBL[2] )

{

map<unsigned int,sampletable> postable;//音频帧信息

get_packet_offset(STBL,postable);

}

}

return 0;

}

}

return -1;

}

int main(char arg, char *argv[])

{

FILE *mp4;

cout<<"please input the file name :"<<endl;

string filename;

cin>>filename;

mp4 = fopen(filename.c_str(),"rb");

uint8_t buffer[300000];

fread(buffer,1,300000,mp4);

seek_audio_atom((uint8_t*)buffer,300000);

fclose(mp4);

return 0;

}


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: