void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh )
{
const int i_pixel = m->i_pixel;
+ const unsigned int i_me_range = h->param.analyse.i_me_range;
const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
int bmx, bmy, bcost;
- int omx, omy;
+ int omx, omy, pmx, pmy;
uint8_t *p_fref = m->p_fref[0];
- int i_iter;
+ int i, j;
- const int mv_x_min = h->mb.mv_min_fpel[0];
- const int mv_y_min = h->mb.mv_min_fpel[1];
- const int mv_x_max = h->mb.mv_max_fpel[0];
- const int mv_y_max = h->mb.mv_max_fpel[1];
+ int mv_x_min = h->mb.mv_min_fpel[0];
+ int mv_y_min = h->mb.mv_min_fpel[1];
+ int mv_x_max = h->mb.mv_max_fpel[0];
+ int mv_y_max = h->mb.mv_max_fpel[1];
const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
+ if( h->mb.i_me_method == X264_ME_UMH )
+ {
+ /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */
+ p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
+ p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
+ }
- /* init with mvp */
- /* XXX: We don't need to clamp because the way diamond work, we will
- * never go outside padded picture, and predict mv won't compute vector
- * with componant magnitude greater.
- * XXX: if some vector can go outside, (accelerator, ....) you need to clip
- * them yourself */
- bmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max );
- bmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max );
- bcost = 1<<30;
+ bmx = pmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max );
+ bmy = pmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max );
+ bcost = COST_MAX;
COST_MV( bmx, bmy );
/* I don't know why this helps */
bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ];
/* try extra predictors if provided */
- for( i_iter = 0; i_iter < i_mvc; i_iter++ )
+ for( i = 0; i < i_mvc; i++ )
{
- const int mx = x264_clip3( ( mvc[i_iter][0] + 2 ) >> 2, mv_x_min, mv_x_max );
- const int my = x264_clip3( ( mvc[i_iter][1] + 2 ) >> 2, mv_y_min, mv_y_max );
+ const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max );
+ const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max );
if( mx != bmx || my != bmy )
COST_MV( mx, my );
}
COST_MV( 0, 0 );
- if( h->mb.i_subpel_refine >= 2 )
+ mv_x_max += 8;
+ mv_y_max += 8;
+ mv_x_min -= 8;
+ mv_y_min -= 8;
+
+ switch( h->mb.i_me_method )
{
- /* hexagon search */
- /* Don't need to test mv_range each time, we won't go outside picture+padding */
- omx = bmx;
- omy = bmy;
- for( i_iter = 0; i_iter < 8; i_iter++ )
+ case X264_ME_DIA:
+ /* diamond search, radius 1 */
+#define DIA1_ITER(mx, my)\
+ {\
+ omx = mx;\
+ omy = my;\
+ COST_MV( omx , omy-1 );\
+ COST_MV( omx , omy+1 );\
+ COST_MV( omx-1, omy );\
+ COST_MV( omx+1, omy );\
+ }
+
+ for( i = 0; i < i_me_range; i++ )
{
- COST_MV( omx-2, omy );
- COST_MV( omx-1, omy+2 );
- COST_MV( omx+1, omy+2 );
- COST_MV( omx+2, omy );
- COST_MV( omx+1, omy-2 );
- COST_MV( omx-1, omy-2 );
+ DIA1_ITER( bmx, bmy );
+ if( bmx == omx && bmy == omy )
+ break;
+ }
+ break;
+
+ case X264_ME_HEX:
+ /* hexagon search, radius 2 */
+#define HEX2_ITER(mx, my)\
+ {\
+ omx = mx;\
+ omy = my;\
+ COST_MV( omx-2, omy );\
+ COST_MV( omx-1, omy+2 );\
+ COST_MV( omx+1, omy+2 );\
+ COST_MV( omx+2, omy );\
+ COST_MV( omx+1, omy-2 );\
+ COST_MV( omx-1, omy-2 );\
+ }
+ for( i = 0; i < i_me_range/2; i++ )
+ {
+ HEX2_ITER( bmx, bmy );
if( bmx == omx && bmy == omy )
break;
- omx = bmx;
- omy = bmy;
}
-
/* square refine */
+ DIA1_ITER( bmx, bmy );
COST_MV( omx-1, omy-1 );
- COST_MV( omx-1, omy );
COST_MV( omx-1, omy+1 );
- COST_MV( omx , omy-1 );
- COST_MV( omx , omy+1 );
COST_MV( omx+1, omy-1 );
- COST_MV( omx+1, omy );
COST_MV( omx+1, omy+1 );
- }
- else
- {
- /* diamond search */
- for( i_iter = 0; i_iter < 16; i_iter++ )
+ break;
+
+ case X264_ME_UMH:
+ /* Uneven-cross Multi-Hexagon-grid Search
+ * as in JM, except without early termination */
+
+ DIA1_ITER( pmx, pmy );
+ if( pmx || pmy )
+ DIA1_ITER( 0, 0 );
+ DIA1_ITER( bmx, bmy );
+
+ if(i_pixel == PIXEL_4x4)
+ goto umh_small_hex;
+
+ /* cross */
+ omx = bmx; omy = bmy;
+ for( i = 1; i < i_me_range; i+=2 )
+ {
+ if( omx + i <= mv_x_max )
+ COST_MV( omx + i, omy );
+ if( omx - i >= mv_x_min )
+ COST_MV( omx - i, omy );
+ }
+ for( i = 1; i < i_me_range/2; i+=2 )
+ {
+ if( omy + i <= mv_y_max )
+ COST_MV( omx, omy + i );
+ if( omy - i >= mv_y_min )
+ COST_MV( omx, omy - i );
+ }
+
+ /* 5x5 ESA */
+ omx = bmx; omy = bmy;
+ for( i = 0; i < 24; i++ )
+ {
+ static const int square2_x[24] = {1,1,0,-1,-1,-1, 0, 1, 2,2,2,2,1,0,-1,-2,-2,-2,-2,-2,-1, 0, 1, 2};
+ static const int square2_y[24] = {0,1,1, 1, 0,-1,-1,-1,-1,0,1,2,2,2, 2, 2, 1, 0,-1,-2,-2,-2,-2,-2};
+ COST_MV( omx + square2_x[i], omy + square2_y[i] );
+ }
+ /* hexagon grid */
+ omx = bmx; omy = bmy;
+ for( i = 1; i <= i_me_range/4; i++ )
{
- omx = bmx;
- omy = bmy;
- COST_MV( omx , omy-1 );
- COST_MV( omx , omy+1 );
- COST_MV( omx-1, omy );
- COST_MV( omx+1, omy );
+ int bounds_check = 4*i > X264_MIN4( mv_x_max-omx, mv_y_max-omy, omx-mv_x_min, omy-mv_y_min );
+ for( j = 0; j < 16; j++ )
+ {
+ static const int hex4_x[16] = {0,-2,-4,-4,-4,-4,-4,-2, 0, 2, 4, 4,4,4,4,2};
+ static const int hex4_y[16] = {4, 3, 2, 1, 0,-1,-2,-3,-4,-3,-2,-1,0,1,2,3};
+ int mx = omx + hex4_x[j]*i;
+ int my = omy + hex4_y[j]*i;
+ if( !bounds_check || ( mx >= mv_x_min && mx <= mv_x_max
+ && my >= mv_y_min && my <= mv_y_max ) )
+ COST_MV( mx, my );
+ }
+ }
+umh_small_hex:
+ /* iterative search */
+ for( i = 0; i < i_me_range; i++ )
+ {
+ HEX2_ITER( bmx, bmy );
+ if( bmx == omx && bmy == omy )
+ break;
+ }
+ for( i = 0; i < i_me_range; i++ )
+ {
+ DIA1_ITER( bmx, bmy );
if( bmx == omx && bmy == omy )
break;
}
+ break;
+
+ case X264_ME_ESA:
+ {
+ const int min_x = X264_MAX( bmx - i_me_range, mv_x_min);
+ const int min_y = X264_MAX( bmy - i_me_range, mv_y_min);
+ const int max_x = X264_MIN( bmx + i_me_range, mv_x_max);
+ const int max_y = X264_MIN( bmy + i_me_range, mv_y_max);
+ for( omy = min_y; omy <= max_y; omy++ )
+ for( omx = min_x; omx <= max_x; omx++ )
+ {
+ COST_MV( omx, omy );
+ }
+ }
+ break;
}
/* -> qpel mv */
refine_subpel( h, m, hpel, qpel );
}
-#define COST_MV( mx, my, dir ) \
+#define COST_MV( mx, my ) \
{ \
int stride = 16; \
uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \
if( cost < bcost ) \
{ \
bcost = cost; \
- bdir = dir; \
+ bmx = mx; \
+ bmy = my; \
} \
}
int bmx = m->mv[0];
int bmy = m->mv[1];
+ int bcost = m->cost;
+
+ /* try the subpel component of the predicted mv if it's close to
+ * the result of the fullpel search */
+ if( hpel_iters )
+ {
+ int mx = X264_ABS(bmx - m->mvp[0]) < 4 ? m->mvp[0] : bmx;
+ int my = X264_ABS(bmy - m->mvp[1]) < 4 ? m->mvp[1] : bmy;
+ if( mx != bmx || my != bmy )
+ COST_MV( mx, my );
+ }
for( step = 2; step >= 1; step-- )
{
for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
{
- int bcost = 1<<30;
- int bdir = 0;
- COST_MV( bmx, bmy - step, 0 );
- COST_MV( bmx, bmy + step, 1 );
- COST_MV( bmx - step, bmy, 2 );
- COST_MV( bmx + step, bmy, 3 );
-
- if( bcost < m->cost )
- {
- m->cost = bcost;
- if( bdir == 0 ) bmy -= step;
- else if( bdir == 1 ) bmy += step;
- else if( bdir == 2 ) bmx -= step;
- else if( bdir == 3 ) bmx += step;
- }
- else break;
+ int omx = bmx;
+ int omy = bmy;
+ COST_MV( omx, omy - step );
+ COST_MV( omx, omy + step );
+ COST_MV( omx - step, omy );
+ COST_MV( omx + step, omy );
+ if( bmx == omx && bmy == omy )
+ break;
}
}
+ m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];