x264_me_search_ref函数分析
2014-02-25 11:04
465 查看
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
{
const int bw = x264_pixel_size[m->i_pixel].w;
const int bh = x264_pixel_size[m->i_pixel].h;
const int i_pixel = m->i_pixel;
const int stride = m->i_stride[0];
int i_me_range = h->param.analyse.i_me_range;
int bmx, bmy, bcost;
int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
int omx, omy, pmx, pmy;
pixel *p_fenc = m->p_fenc[0];
pixel *p_fref_w = m->p_fref_w;
ALIGNED_ARRAY_16( pixel, pix,[16*16] );
int costs[16];
int mv_x_min = h->mb.mv_min_fpel[0];
int mv_y_min = h->mb.mv_min_fpel[1];
int mv_x_max = h->mb.mv_max_fpel[0];
int mv_y_max = h->mb.mv_max_fpel[1]; //以上是整像素点
int mv_x_min_qpel = mv_x_min << 2;
int mv_y_min_qpel = mv_y_min << 2;
int mv_x_max_qpel = mv_x_max << 2;
int mv_y_max_qpel = mv_y_max << 2; //左移两位,变成四分之一像素点
/* Special version of pack to allow shortcuts in CHECK_MVRANGE */
//使mx在32位中的高16位,my在低16位
#define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;
//如果mx,my超过了边界mv_min或者mv_max , 在最高位即符号位会为1,取反后变为0,与0x80004000与变为0
#define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000))
const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];//之前计算的mvp为四分之一像素的
uint32_t pmv;
bmx = x264_clip3( m->mvp[0], mv_x_min_qpel, mv_x_max_qpel );
bmy = x264_clip3( m->mvp[1], mv_y_min_qpel, mv_y_max_qpel );
pmx = ( bmx + 2 ) >> 2;//将bmy,bmy 变为整数精度矢量
pmy = ( bmy + 2 ) >> 2;
bcost = COST_MAX;
/* try extra predictors if provided */
if( h->mb.i_subpel_refine >= 3 )//如果精度为1/4
{
pmv = pack16to32_mask(bmx,bmy);
if( i_mvc ) //计算mx,my的cost代价是否比bpred_cost小,如果小,就bpred_mx=mx ,bpred_my=my
COST_MV_HPEL( bmx, bmy );//此函数先找到1/4像素的mv对应的位置,然后算satd
for( int i = 0; i < i_mvc; i++ )//对每一个mvc 计算代价
{
if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )
{
int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );
int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );
COST_MV_HPEL( mx, my );
}
}
bmx = ( bpred_mx + 2 ) >> 2;//
bmy = ( bpred_my + 2 ) >> 2;
COST_MV( bmx, bmy );//计算中心位置(0,0),的cost值
}
else
{
/* check the MVP */
bmx = pmx;
bmy = pmy;
/* Because we are rounding the predicted motion vector to fullpel, there will be
* an extra MV cost in 15 out of 16 cases. However, when the predicted MV is
* chosen as the best predictor, it is often the case that the subpel search will
* result in a vector at or next to the predicted motion vector. Therefore, it is
* sensible to omit the cost of the MV from the rounded MVP to avoid unfairly
* biasing against use of the predicted motion vector. */
//计算bmx,bmy的代价
bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
pmv = pack16to32_mask( bmx, bmy );
if( i_mvc > 0 )
{
ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
bcost <<= 4;//先左移4位,每次计算cost时低位用于保存i值,最后选择
for( int i = 1; i <= i_mvc; i++ )
{
if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
{
int mx = mvc_fpel[i-1][0];
int my = mvc_fpel[i-1][1];
int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
cost = (cost << 4) + i;
COPY1_IF_LT( bcost, cost );
}
}
if( bcost&15 )//取低4位即选择的i_mvc ,跟新变量bmx ,bmy
{
bmx = mvc_fpel[(bcost&15)-1][0];
bmy = mvc_fpel[(bcost&15)-1][1];
}
bcost >>= 4;//变回bcost真正的值
}
}
if( pmv )
COST_MV( 0, 0 );//计算中心cost
switch( h->mb.i_me_method )
{
case X264_ME_DIA://钻石搜索算法
{
/* diamond search, radius 1 */
bcost <<= 4;
int i = i_me_range;
do
{
COST_MV_X4_DIR( 0,-1, 0,1, -1,0, 1,0, costs );//计算中心的周围4个点cost
COPY1_IF_LT( bcost, (costs[0]<<4)+1 );
COPY1_IF_LT( bcost, (costs[1]<<4)+3 );
COPY1_IF_LT( bcost, (costs[2]<<4)+4 );
COPY1_IF_LT( bcost, (costs[3]<<4)+12 );
if( !(bcost&15) )//如果中心点cost最小,提前结束
break;
bmx -= (bcost<<28)>>30;//将bcost值移出,将上面计算中心周围4个点的编号(1,3,4,12)移到高位,再
//右移30位,得到正确的选择的周围4个点的坐标偏移
bmy -= (bcost<<30)>>30;
bcost &= ~15;//将低4位数据消除
} while( --i && CHECK_MVRANGE(bmx, bmy) );
bcost >>= 4;
break;
}
case X264_ME_HEX:
{
me_hex2: //六边形搜索算法
/* hexagon search, radius 2 */
#if 0
for( int i = 0; i < i_me_range/2; i++ )
{
omx = bmx; omy = bmy;
COST_MV( omx-2, omy );
COST_MV( omx-1, omy+2 );
COST_MV( omx+1, omy+2 );
COST_MV( omx+2, omy );
COST_MV( omx+1, omy-2 );
COST_MV( omx-1, omy-2 );
if( bmx == omx && bmy == omy )
break;
if( !CHECK_MVRANGE(bmx, bmy) )
break;
}
#else
/* equivalent to the above, but eliminates duplicate candidates */
/* hexagon */
COST_MV_X3_DIR( -2,0, -1, 2, 1, 2, costs );//计算左边和上边3个点cost
COST_MV_X3_DIR( 2,0, 1,-2, -1,-2, costs+3 );//计算右边和下边3个点cost
bcost <<= 3;
COPY1_IF_LT( bcost, (costs[0]<<3)+2 );//判断选择cost最小的
COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
if( bcost&7 )//如果6个点有最小的cost值
{
int dir = (bcost&7)-2;//dir为6个点的位置序号,0为左边,1为左上点,依次下去
//减2 是因为上面计算时给点计数时从2开始编号,
bmx += hex2[dir+1][0];//加1时因为hex2 中数组元素第一个不是编号1点
bmy += hex2[dir+1][1];
/* half hexagon, not overlapping the previous iteration */
for( int i = (i_me_range>>1) - 1; i > 0 && CHECK_MVRANGE(bmx, bmy); i-- )
{
COST_MV_X3_DIR( hex2[dir+0][0], hex2[dir+0][1],
hex2[dir+1][0], hex2[dir+1][1],
hex2[dir+2][0], hex2[dir+2][1],
costs );//循环左6边形搜索,但只需搜3个点,另3个点已计算过,而这
//3个点是从dir前一个位置,所以hex2数组中为加1了
bcost &= ~7;//将低3位数据消除
COPY1_IF_LT( bcost, (costs[0]<<3)+1 );
COPY1_IF_LT( bcost, (costs[1]<<3)+2 );
COPY1_IF_LT( bcost, (costs[2]<<3)+3 );
if( !(bcost&7) )//如果中心点cost值最小,则循环break
break;
dir += (bcost&7)-2;
dir = mod6m1[dir+1];
bmx += hex2[dir+1][0];//对bmx ,bmy 加偏移
bmy += hex2[dir+1][1];
}
}
bcost >>= 3;
break;
/* -> qpel mv */
if( bpred_cost < bcost ) //如果前frame作为reference时代价写则运动矢量取前frame
{
m->mv[0] = bpred_mx;
m->mv[1] = bpred_my;
m->cost = bpred_cost;
}
else
{
m->mv[0] = bmx << 2; //如果不是,则运动矢量为运动搜索后的mv
m->mv[1] = bmy << 2;
m->cost = bcost;
}
/* compute the real cost */
m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 ) //计算最后的代价函数
m->cost += m->cost_mv;
/* subpel refine */
if( h->mb.i_subpel_refine >= 2 ) //二分之一像素搜索和四分之一像素搜索
{
int hpel = subpel_iterations[h->mb.i_subpel_refine][2];
int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
}
}
{
const int bw = x264_pixel_size[m->i_pixel].w;
const int bh = x264_pixel_size[m->i_pixel].h;
const int i_pixel = m->i_pixel;
const int stride = m->i_stride[0];
int i_me_range = h->param.analyse.i_me_range;
int bmx, bmy, bcost;
int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
int omx, omy, pmx, pmy;
pixel *p_fenc = m->p_fenc[0];
pixel *p_fref_w = m->p_fref_w;
ALIGNED_ARRAY_16( pixel, pix,[16*16] );
int costs[16];
int mv_x_min = h->mb.mv_min_fpel[0];
int mv_y_min = h->mb.mv_min_fpel[1];
int mv_x_max = h->mb.mv_max_fpel[0];
int mv_y_max = h->mb.mv_max_fpel[1]; //以上是整像素点
int mv_x_min_qpel = mv_x_min << 2;
int mv_y_min_qpel = mv_y_min << 2;
int mv_x_max_qpel = mv_x_max << 2;
int mv_y_max_qpel = mv_y_max << 2; //左移两位,变成四分之一像素点
/* Special version of pack to allow shortcuts in CHECK_MVRANGE */
//使mx在32位中的高16位,my在低16位
#define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;
//如果mx,my超过了边界mv_min或者mv_max , 在最高位即符号位会为1,取反后变为0,与0x80004000与变为0
#define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000))
const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];//之前计算的mvp为四分之一像素的
uint32_t pmv;
bmx = x264_clip3( m->mvp[0], mv_x_min_qpel, mv_x_max_qpel );
bmy = x264_clip3( m->mvp[1], mv_y_min_qpel, mv_y_max_qpel );
pmx = ( bmx + 2 ) >> 2;//将bmy,bmy 变为整数精度矢量
pmy = ( bmy + 2 ) >> 2;
bcost = COST_MAX;
/* try extra predictors if provided */
if( h->mb.i_subpel_refine >= 3 )//如果精度为1/4
{
pmv = pack16to32_mask(bmx,bmy);
if( i_mvc ) //计算mx,my的cost代价是否比bpred_cost小,如果小,就bpred_mx=mx ,bpred_my=my
COST_MV_HPEL( bmx, bmy );//此函数先找到1/4像素的mv对应的位置,然后算satd
for( int i = 0; i < i_mvc; i++ )//对每一个mvc 计算代价
{
if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )
{
int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );
int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );
COST_MV_HPEL( mx, my );
}
}
bmx = ( bpred_mx + 2 ) >> 2;//
bmy = ( bpred_my + 2 ) >> 2;
COST_MV( bmx, bmy );//计算中心位置(0,0),的cost值
}
else
{
/* check the MVP */
bmx = pmx;
bmy = pmy;
/* Because we are rounding the predicted motion vector to fullpel, there will be
* an extra MV cost in 15 out of 16 cases. However, when the predicted MV is
* chosen as the best predictor, it is often the case that the subpel search will
* result in a vector at or next to the predicted motion vector. Therefore, it is
* sensible to omit the cost of the MV from the rounded MVP to avoid unfairly
* biasing against use of the predicted motion vector. */
//计算bmx,bmy的代价
bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
pmv = pack16to32_mask( bmx, bmy );
if( i_mvc > 0 )
{
ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
bcost <<= 4;//先左移4位,每次计算cost时低位用于保存i值,最后选择
for( int i = 1; i <= i_mvc; i++ )
{
if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
{
int mx = mvc_fpel[i-1][0];
int my = mvc_fpel[i-1][1];
int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
cost = (cost << 4) + i;
COPY1_IF_LT( bcost, cost );
}
}
if( bcost&15 )//取低4位即选择的i_mvc ,跟新变量bmx ,bmy
{
bmx = mvc_fpel[(bcost&15)-1][0];
bmy = mvc_fpel[(bcost&15)-1][1];
}
bcost >>= 4;//变回bcost真正的值
}
}
if( pmv )
COST_MV( 0, 0 );//计算中心cost
switch( h->mb.i_me_method )
{
case X264_ME_DIA://钻石搜索算法
{
/* diamond search, radius 1 */
bcost <<= 4;
int i = i_me_range;
do
{
COST_MV_X4_DIR( 0,-1, 0,1, -1,0, 1,0, costs );//计算中心的周围4个点cost
COPY1_IF_LT( bcost, (costs[0]<<4)+1 );
COPY1_IF_LT( bcost, (costs[1]<<4)+3 );
COPY1_IF_LT( bcost, (costs[2]<<4)+4 );
COPY1_IF_LT( bcost, (costs[3]<<4)+12 );
if( !(bcost&15) )//如果中心点cost最小,提前结束
break;
bmx -= (bcost<<28)>>30;//将bcost值移出,将上面计算中心周围4个点的编号(1,3,4,12)移到高位,再
//右移30位,得到正确的选择的周围4个点的坐标偏移
bmy -= (bcost<<30)>>30;
bcost &= ~15;//将低4位数据消除
} while( --i && CHECK_MVRANGE(bmx, bmy) );
bcost >>= 4;
break;
}
case X264_ME_HEX:
{
me_hex2: //六边形搜索算法
/* hexagon search, radius 2 */
#if 0
for( int i = 0; i < i_me_range/2; i++ )
{
omx = bmx; omy = bmy;
COST_MV( omx-2, omy );
COST_MV( omx-1, omy+2 );
COST_MV( omx+1, omy+2 );
COST_MV( omx+2, omy );
COST_MV( omx+1, omy-2 );
COST_MV( omx-1, omy-2 );
if( bmx == omx && bmy == omy )
break;
if( !CHECK_MVRANGE(bmx, bmy) )
break;
}
#else
/* equivalent to the above, but eliminates duplicate candidates */
/* hexagon */
COST_MV_X3_DIR( -2,0, -1, 2, 1, 2, costs );//计算左边和上边3个点cost
COST_MV_X3_DIR( 2,0, 1,-2, -1,-2, costs+3 );//计算右边和下边3个点cost
bcost <<= 3;
COPY1_IF_LT( bcost, (costs[0]<<3)+2 );//判断选择cost最小的
COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
if( bcost&7 )//如果6个点有最小的cost值
{
int dir = (bcost&7)-2;//dir为6个点的位置序号,0为左边,1为左上点,依次下去
//减2 是因为上面计算时给点计数时从2开始编号,
bmx += hex2[dir+1][0];//加1时因为hex2 中数组元素第一个不是编号1点
bmy += hex2[dir+1][1];
/* half hexagon, not overlapping the previous iteration */
for( int i = (i_me_range>>1) - 1; i > 0 && CHECK_MVRANGE(bmx, bmy); i-- )
{
COST_MV_X3_DIR( hex2[dir+0][0], hex2[dir+0][1],
hex2[dir+1][0], hex2[dir+1][1],
hex2[dir+2][0], hex2[dir+2][1],
costs );//循环左6边形搜索,但只需搜3个点,另3个点已计算过,而这
//3个点是从dir前一个位置,所以hex2数组中为加1了
bcost &= ~7;//将低3位数据消除
COPY1_IF_LT( bcost, (costs[0]<<3)+1 );
COPY1_IF_LT( bcost, (costs[1]<<3)+2 );
COPY1_IF_LT( bcost, (costs[2]<<3)+3 );
if( !(bcost&7) )//如果中心点cost值最小,则循环break
break;
dir += (bcost&7)-2;
dir = mod6m1[dir+1];
bmx += hex2[dir+1][0];//对bmx ,bmy 加偏移
bmy += hex2[dir+1][1];
}
}
bcost >>= 3;
break;
/* -> qpel mv */
if( bpred_cost < bcost ) //如果前frame作为reference时代价写则运动矢量取前frame
{
m->mv[0] = bpred_mx;
m->mv[1] = bpred_my;
m->cost = bpred_cost;
}
else
{
m->mv[0] = bmx << 2; //如果不是,则运动矢量为运动搜索后的mv
m->mv[1] = bmy << 2;
m->cost = bcost;
}
/* compute the real cost */
m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 ) //计算最后的代价函数
m->cost += m->cost_mv;
/* subpel refine */
if( h->mb.i_subpel_refine >= 2 ) //二分之一像素搜索和四分之一像素搜索
{
int hpel = subpel_iterations[h->mb.i_subpel_refine][2];
int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
}
}
相关文章推荐
- x264中x264_me_search_ref的被调用分析
- x264中x264_me_search_ref的被调用分析
- x264 - x264_me_search_ref
- x264代码剖析(十一):核心算法之宏块分析函数x264_macroblock_analyse()
- STL(九):函数适配器bind2nd 、mem_fun_ref 源码分析、函数适配器应用举例
- x264源码分析 -- get_ref
- x264_median 函数分析
- x264中的提炼半像素函数——refine_subpel 函数分析
- C++之STL(九):函数适配器bind2nd 、mem_fun_ref 源码分析、函数适配器应用举例
- x264_macroblock_cache_load()函数分析
- 从零开始学C++之STL(九):函数适配器bind2nd 、mem_fun_ref 源码分析、函数适配器应用举例
- x264_macroblock_cache_load()函数分析
- x264中16x16帧内预测模式函数分析
- 函数适配器bind2nd 、mem_fun_ref 源码分析、函数适配器应用举例
- 从零开始学C++之STL(九):函数适配器bind2nd 、mem_fun_ref 源码分析、函数适配器应用举例
- (转)x264源码分析(1):main、parse、encode、x264_encoder_open函数代码分析
- x264代码剖析(十一):核心算法之宏块分析函数x264_macroblock_analyse()
- 破解 Rith's CrackMe #1(对比IDA查看动态分析中的MFC函数名)
- x264_mb_predict_mv_16x16 函数分析
- ubifs mount 函数分析