您的位置:首页 > 其它

x264 - x264_mb_encode_i16x16

2014-06-19 20:54 369 查看
/* All encoding functions must output the correct CBP and NNZ values.

* The entropy coding functions will check CBP first, then NNZ, before

* actually reading the DCT coefficients. NNZ still must be correct even

* if CBP is zero because of the use of NNZ values for context selection.

* "NNZ" need only be 0 or 1 rather than the exact coefficient count because

* that is only needed in CAVLC, and will be calculated by CAVLC's residual

* coding and stored as necessary. */

/* This means that decimation can be done merely by adjusting the CBP and NNZ

* rather than memsetting the coefficients. */

static void x264_mb_encode_i16x16( x264_t *h, int p, int i_qp )

{

pixel *p_src = h->mb.pic.p_fenc[p];

pixel *p_dst = h->mb.pic.p_fdec[p];

// dctcoef dct4x4[16[16]

// dctcoef dct_dc4x4[16]

ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] );

ALIGNED_ARRAY_N( dctcoef, dct_dc4x4,[16] );

int nz, block_cbp = 0;

int decimate_score = h->mb.b_dct_decimate ? 0 : 9;

int i_quant_cat = p ? CQM_4IC : CQM_4IY;

// test, first mb, i_mode = 6

int i_mode = h->mb.i_intra16x16_pred_mode;

if( h->mb.b_lossless )

x264_predict_lossless_16x16( h, p, i_mode );

else // h->predict_16x16[6] = x264_predict_16x16_dc_128_c

h->predict_16x16[i_mode]( h->mb.pic.p_fdec[p] );

if( h->mb.b_lossless )

{

for( int i = 0; i < 16; i++ )

{

int oe = block_idx_xy_fenc[i];

int od = block_idx_xy_fdec[i];

nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16*p+i], p_src+oe, p_dst+od, &dct_dc4x4[block_idx_yx_1d[i]] );

h->mb.cache.non_zero_count[x264_scan8[16*p+i]] = nz;

block_cbp |= nz;

}

h->mb.i_cbp_luma |= block_cbp * 0xf;

h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = array_non_zero( dct_dc4x4, 16 );

h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );

return;

}

// see CLEAR_16X16_nnz comments

CLEAR_16x16_NNZ( p );

// h->dctf.sub16x16_dct = sub16x16_dct

h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );

if( h->mb.b_noise_reduction )

for( int idx = 0; idx < 16; idx++ )

h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );

// split 16 dc from 16 4x4 dctcoef,

// pls attention the relation of corresponding

for( int idx = 0; idx < 16; idx++ )

{

dct_dc4x4[block_idx_xy_1d[idx]] = dct4x4[idx][0];

dct4x4[idx][0] = 0;

}

if( h->mb.b_trellis )

{

for( int idx = 0; idx < 16; idx++ )

if( x264_quant_4x4_trellis( h, dct4x4[idx], i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_AC][p], 1, !!p, idx ) )

{

block_cbp = 0xf;

h->zigzagf.scan_4x4( h->dct.luma4x4[16*p+idx], dct4x4[idx] );

h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[i_quant_cat], i_qp );

if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16*p+idx] );

h->mb.cache.non_zero_count[x264_scan8[16*p+idx]] = 1;

}

}

else

{

// quant 16x16 block by 4 8x8 blocks

for( int i8x8 = 0; i8x8 < 4; i8x8++ )

{

// for each 8x8 block, quant it by 4 4x4 blocks

// h->quantf.quant_4x4x4 = quant_4x4x4

// and each nz is or-shifted

nz = h->quantf.quant_4x4x4( &dct4x4[i8x8*4], h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );

// if exist nonzero coef

if( nz )

{

block_cbp = 0xf;

// for nz of each 4x4 block

FOREACH_BIT( idx, i8x8*4, nz )

{

// zigzag scan dctcoef quanted, and save it to h->dct.luma4x4 as level

h->zigzagf.scan_4x4( h->dct.luma4x4[16*p+idx], dct4x4[idx] );

// dequant dctcoef of each nz 4x4 block

h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[i_quant_cat], i_qp );

if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16*p+idx] );

// mark nz to mb.cache.non_zero_count for each nz 4x4 block

h->mb.cache.non_zero_count[x264_scan8[16*p+idx]] = 1;

}

}

}

}

/* Writing the 16 CBFs in an i16x16 block is quite costly, so decimation can save many bits. */

/* More useful with CAVLC, but still useful with CABAC. */

if( decimate_score < 6 )

{

CLEAR_16x16_NNZ( p );

block_cbp = 0;

}

else

h->mb.i_cbp_luma |= block_cbp;

// h->dctf.dct4x4dc = dct4x4dc

// do H4 transform against 16 dc coef as one 4x4 block

h->dctf.dct4x4dc( dct_dc4x4 );

if( h->mb.b_trellis )

nz = x264_quant_luma_dc_trellis( h, dct_dc4x4, i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_DC][p], 1, LUMA_DC+p );

else // h->quantf.quant_4x4_dc = quant_4x4_dc, quant dct_dc4x4 transformed by H4

nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[i_quant_cat][i_qp][0]>>1, h->quant4_bias[i_quant_cat][i_qp][0]<<1 );

// save nz mark to mb's dc position of mb.cache.non_zero count

h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = nz;

if( nz )

{

// if exist nonzero dct_dc4x4 coef

// zigzag scan this dct_dc4x4 coef, then save it to h->dct.luma16x16_dc as level

// h->zigzagf.scan_4x4 = zigzag_scan_4x4_frame for frame encode

h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );

/* output samples to fdec */

// h->dctf.idct4x4dc = idct4x4dc

// do inverse dct4x4dc transform against dct_dc4x4 coef

h->dctf.idct4x4dc( dct_dc4x4 );

// dequant it

// h->quantf.dequant_4x4_dc = dequant_4x4_dc

h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[i_quant_cat], i_qp ); /* XXX not inversed */

if( block_cbp )

for( int i = 0; i < 16; i++ ) // fill 16 dc coefs back to 16 dct4x4 blocks

dct4x4[i][0] = dct_dc4x4[block_idx_xy_1d[i]];

}

/* put pixels to fdec */

if( block_cbp ) // reconstruct whole 16x16 block, add16x16_idct

h->dctf.add16x16_idct( p_dst, dct4x4 );

else if( nz ) // only reconstruct dc parts of 16x16 block, add16x16_idct_dc

h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );

}

// from scan8 layout

// first 16 entries denotes 16 4x4 Y components

// second 16 entries denotes 16 4x4 U components

// Third 16 entries denotes 16 4x4 V components

// last three entries is Ydc, Udc, Vdc

//

// p = 0, 1, 2, denote Y, U, V plane respectively

// x264_scan8[16 * p] to get corresponding position

// p plane

//

#define CLEAR_16x16_NNZ( p ) \

do\

{\

// locate to 4x4 block 0, then let its 4 bytes equal 0

// i.e. let placeholder 4x4 block 0, 1, 4 ,5 equal 0

M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\

// locate to 4x4 block 2, then let its 4 bytes equal 0

// i.e. let placeholder 4x4 block 2, 3, 6, 7 equal 0

M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\

// locate to 4x4 block 8, then let its 4 bytes equal 0

// i.e. let placeholder 4x4 block 8, 9, 12, 13 equal 0

M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\

// locate to 4x4 block 10, then let its 4 bytes equal 0

// i.e. let placeholder 4x4 block 10, 11, 14, 15 equal 0

M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\

} while(0)

/* Scan8 organization:

* 0 1 2 3 4 5 6 7

* 0 DY y y y y y

* 1 y Y Y Y Y

* 2 y Y Y Y Y

* 3 y Y Y Y Y

* 4 y Y Y Y Y

* 5 DU u u u u u

* 6 u U U U U

* 7 u U U U U

* 8 u U U U U

* 9 u U U U U

* 10 DV v v v v v

* 11 v V V V V

* 12 v V V V V

* 13 v V V V V

* 14 v V V V V

* DY/DU/DV are for luma/chroma DC.

*/

#define LUMA_DC 48

#define CHROMA_DC 49

static const uint8_t x264_scan8[16*3 + 3] =

{

4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,

6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,

4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,

6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,

4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,

6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,

4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,

6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,

4+11*8, 5+11*8, 4+12*8, 5+12*8,

6+11*8, 7+11*8, 6+12*8, 7+12*8,

4+13*8, 5+13*8, 4+14*8, 5+14*8,

6+13*8, 7+13*8, 6+14*8, 7+14*8,

0+ 0*8, 0+ 5*8, 0+10*8

};


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: