您的位置:首页 > 其它

x264_me_search_ref函数分析

2014-02-25 11:04 465 查看
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )

{

const int bw = x264_pixel_size[m->i_pixel].w;

const int bh = x264_pixel_size[m->i_pixel].h;

const int i_pixel = m->i_pixel;

const int stride = m->i_stride[0];

int i_me_range = h->param.analyse.i_me_range;

int bmx, bmy, bcost;

int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;

int omx, omy, pmx, pmy;

pixel *p_fenc = m->p_fenc[0];

pixel *p_fref_w = m->p_fref_w;

ALIGNED_ARRAY_16( pixel, pix,[16*16] );

int costs[16];

int mv_x_min = h->mb.mv_min_fpel[0];

int mv_y_min = h->mb.mv_min_fpel[1];

int mv_x_max = h->mb.mv_max_fpel[0];

int mv_y_max = h->mb.mv_max_fpel[1]; //以上是整像素点

int mv_x_min_qpel = mv_x_min << 2;

int mv_y_min_qpel = mv_y_min << 2;

int mv_x_max_qpel = mv_x_max << 2;

int mv_y_max_qpel = mv_y_max << 2; //左移两位,变成四分之一像素点

/* Special version of pack to allow shortcuts in CHECK_MVRANGE */

//使mx在32位中的高16位,my在低16位

#define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))

uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );

uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;

//如果mx,my超过了边界mv_min或者mv_max , 在最高位即符号位会为1,取反后变为0,与0x80004000与变为0

#define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000))

const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];

const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];//之前计算的mvp为四分之一像素的

uint32_t pmv;

bmx = x264_clip3( m->mvp[0], mv_x_min_qpel, mv_x_max_qpel );

bmy = x264_clip3( m->mvp[1], mv_y_min_qpel, mv_y_max_qpel );

pmx = ( bmx + 2 ) >> 2;//将bmy,bmy 变为整数精度矢量

pmy = ( bmy + 2 ) >> 2;

bcost = COST_MAX;

/* try extra predictors if provided */

if( h->mb.i_subpel_refine >= 3 )//如果精度为1/4

{

pmv = pack16to32_mask(bmx,bmy);

if( i_mvc ) //计算mx,my的cost代价是否比bpred_cost小,如果小,就bpred_mx=mx ,bpred_my=my

COST_MV_HPEL( bmx, bmy );//此函数先找到1/4像素的mv对应的位置,然后算satd

for( int i = 0; i < i_mvc; i++ )//对每一个mvc 计算代价

{

if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )

{

int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );

int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );

COST_MV_HPEL( mx, my );

}

}

bmx = ( bpred_mx + 2 ) >> 2;//

bmy = ( bpred_my + 2 ) >> 2;

COST_MV( bmx, bmy );//计算中心位置(0,0),的cost值

}

else

{

/* check the MVP */

bmx = pmx;

bmy = pmy;

/* Because we are rounding the predicted motion vector to fullpel, there will be

* an extra MV cost in 15 out of 16 cases. However, when the predicted MV is

* chosen as the best predictor, it is often the case that the subpel search will

* result in a vector at or next to the predicted motion vector. Therefore, it is

* sensible to omit the cost of the MV from the rounded MVP to avoid unfairly

* biasing against use of the predicted motion vector. */

//计算bmx,bmy的代价

bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );

pmv = pack16to32_mask( bmx, bmy );

if( i_mvc > 0 )

{

ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );

x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );

bcost <<= 4;//先左移4位,每次计算cost时低位用于保存i值,最后选择

for( int i = 1; i <= i_mvc; i++ )

{

if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )

{

int mx = mvc_fpel[i-1][0];

int my = mvc_fpel[i-1][1];

int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );

cost = (cost << 4) + i;

COPY1_IF_LT( bcost, cost );

}

}

if( bcost&15 )//取低4位即选择的i_mvc ,跟新变量bmx ,bmy

{

bmx = mvc_fpel[(bcost&15)-1][0];

bmy = mvc_fpel[(bcost&15)-1][1];

}

bcost >>= 4;//变回bcost真正的值

}

}

if( pmv )

COST_MV( 0, 0 );//计算中心cost

switch( h->mb.i_me_method )

{

case X264_ME_DIA://钻石搜索算法

{

/* diamond search, radius 1 */

bcost <<= 4;

int i = i_me_range;

do

{

COST_MV_X4_DIR( 0,-1, 0,1, -1,0, 1,0, costs );//计算中心的周围4个点cost

COPY1_IF_LT( bcost, (costs[0]<<4)+1 );

COPY1_IF_LT( bcost, (costs[1]<<4)+3 );

COPY1_IF_LT( bcost, (costs[2]<<4)+4 );

COPY1_IF_LT( bcost, (costs[3]<<4)+12 );

if( !(bcost&15) )//如果中心点cost最小,提前结束

break;

bmx -= (bcost<<28)>>30;//将bcost值移出,将上面计算中心周围4个点的编号(1,3,4,12)移到高位,再

//右移30位,得到正确的选择的周围4个点的坐标偏移

bmy -= (bcost<<30)>>30;

bcost &= ~15;//将低4位数据消除

} while( --i && CHECK_MVRANGE(bmx, bmy) );

bcost >>= 4;

break;

}

case X264_ME_HEX:

{

me_hex2: //六边形搜索算法

/* hexagon search, radius 2 */

#if 0

for( int i = 0; i < i_me_range/2; i++ )

{

omx = bmx; omy = bmy;

COST_MV( omx-2, omy );

COST_MV( omx-1, omy+2 );

COST_MV( omx+1, omy+2 );

COST_MV( omx+2, omy );

COST_MV( omx+1, omy-2 );

COST_MV( omx-1, omy-2 );

if( bmx == omx && bmy == omy )

break;

if( !CHECK_MVRANGE(bmx, bmy) )

break;

}

#else

/* equivalent to the above, but eliminates duplicate candidates */

/* hexagon */

COST_MV_X3_DIR( -2,0, -1, 2, 1, 2, costs );//计算左边和上边3个点cost

COST_MV_X3_DIR( 2,0, 1,-2, -1,-2, costs+3 );//计算右边和下边3个点cost

bcost <<= 3;

COPY1_IF_LT( bcost, (costs[0]<<3)+2 );//判断选择cost最小的

COPY1_IF_LT( bcost, (costs[1]<<3)+3 );

COPY1_IF_LT( bcost, (costs[2]<<3)+4 );

COPY1_IF_LT( bcost, (costs[3]<<3)+5 );

COPY1_IF_LT( bcost, (costs[4]<<3)+6 );

COPY1_IF_LT( bcost, (costs[5]<<3)+7 );

if( bcost&7 )//如果6个点有最小的cost值

{

int dir = (bcost&7)-2;//dir为6个点的位置序号,0为左边,1为左上点,依次下去

//减2 是因为上面计算时给点计数时从2开始编号,

bmx += hex2[dir+1][0];//加1时因为hex2 中数组元素第一个不是编号1点

bmy += hex2[dir+1][1];

/* half hexagon, not overlapping the previous iteration */

for( int i = (i_me_range>>1) - 1; i > 0 && CHECK_MVRANGE(bmx, bmy); i-- )

{

COST_MV_X3_DIR( hex2[dir+0][0], hex2[dir+0][1],

hex2[dir+1][0], hex2[dir+1][1],

hex2[dir+2][0], hex2[dir+2][1],

costs );//循环左6边形搜索,但只需搜3个点,另3个点已计算过,而这

//3个点是从dir前一个位置,所以hex2数组中为加1了

bcost &= ~7;//将低3位数据消除

COPY1_IF_LT( bcost, (costs[0]<<3)+1 );

COPY1_IF_LT( bcost, (costs[1]<<3)+2 );

COPY1_IF_LT( bcost, (costs[2]<<3)+3 );

if( !(bcost&7) )//如果中心点cost值最小,则循环break

break;

dir += (bcost&7)-2;

dir = mod6m1[dir+1];

bmx += hex2[dir+1][0];//对bmx ,bmy 加偏移

bmy += hex2[dir+1][1];

}

}

bcost >>= 3;

break;

/* -> qpel mv */

if( bpred_cost < bcost ) //如果前frame作为reference时代价写则运动矢量取前frame

{

m->mv[0] = bpred_mx;

m->mv[1] = bpred_my;

m->cost = bpred_cost;

}

else

{

m->mv[0] = bmx << 2; //如果不是,则运动矢量为运动搜索后的mv

m->mv[1] = bmy << 2;

m->cost = bcost;

}

/* compute the real cost */

m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];

if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 ) //计算最后的代价函数

m->cost += m->cost_mv;

/* subpel refine */

if( h->mb.i_subpel_refine >= 2 ) //二分之一像素搜索和四分之一像素搜索

{

int hpel = subpel_iterations[h->mb.i_subpel_refine][2];

int qpel = subpel_iterations[h->mb.i_subpel_refine][3];

refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: