}
}
-static inline int x264_median( int a, int b, int c )
-{
- int min = a, max =a;
- if( b < min )
- min = b;
- else
- max = b; /* no need to do 'b > max' (more consuming than always doing affectation) */
-
- if( c < min )
- min = c;
- else if( c > max )
- max = c;
-
- return a + b + c - min - max;
-}
-
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] )
{
const int i8 = x264_scan8[idx];
}
}
-void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc )
+static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
{
- int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
+ int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
+ int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
+ int i;
+
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
+
+ /* FIXME: optimize per block size */
+ for( i = 0; i < 4; i++ )
+ {
+ const int x8 = 2*(i%2);
+ const int y8 = 2*(i/2);
+ /* TODO: MapColToList0 */
+ const int i_ref = h->fref1[0]->ref[0][ i_mb_8x8 + x8/2 + y8 * h->mb.i_mb_stride ];
+
+ if( i_ref == -1 )
+ {
+ x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, 0 );
+ x264_macroblock_cache_mv( h, x8, y8, 2, 2, 0, 0, 0 );
+ x264_macroblock_cache_mv( h, x8, y8, 2, 2, 1, 0, 0 );
+ }
+ else
+ {
+ const int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
+ int x4, y4;
+
+ x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, i_ref );
+
+ for( y4 = y8; y4 < y8+2; y4++ )
+ for( x4 = x8; x4 < x8+2; x4++ )
+ {
+ const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + x4 + y4 * 4 * h->mb.i_mb_stride ];
+ int mv_l0[2];
+ mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
+ mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_l0[0], mv_l0[1] );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
+{
+ int ref[2];
+ int mv[2][2];
+ int i_list;
+ int i8, i4;
+ const int8_t *l1ref = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
+ const int16_t (*l1mv)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[0][ h->mb.i_b4_xy ];
+
+ for( i_list=0; i_list<2; i_list++ )
+ {
+ int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
+ int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
+ int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
+ if( i_refc == -2 )
+ i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
+
+ ref[i_list] = i_refa;
+ if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 ))
+ ref[i_list] = i_refb;
+ if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 ))
+ ref[i_list] = i_refc;
+ if( ref[i_list] < 0 )
+ ref[i_list] = -1;
+ }
+
+ if( ref[0] < 0 && ref[1] < 0 )
+ {
+ ref[0] =
+ ref[1] = 0;
+ mv[0][0] =
+ mv[0][1] =
+ mv[1][0] =
+ mv[1][1] = 0;
+ }
+ else
+ {
+ for( i_list=0; i_list<2; i_list++ )
+ {
+ if( ref[i_list] >= 0 )
+ x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] );
+ else
+ mv[i_list][0] = mv[i_list][1] = 0;
+ }
+ }
+
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, mv[0][0], mv[0][1] );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, mv[1][0], mv[1][1] );
+
+ /* col_zero_flag */
+ for( i8=0; i8<4; i8++ )
+ {
+ const int x8 = i8%2;
+ const int y8 = i8/2;
+ if( l1ref[ x8 + y8 * h->mb.i_b8_stride ] == 0 )
+ {
+ for( i4=0; i4<4; i4++ )
+ {
+ const int x4 = i4%2 + 2*x8;
+ const int y4 = i4/2 + 2*y8;
+ const int16_t *mvcol = l1mv[x4 + y4 * h->mb.i_b4_stride];
+ if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
+ {
+ if( ref[0] == 0 )
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0, 0 );
+ if( ref[1] == 0 )
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+int x264_mb_predict_mv_direct16x16( x264_t *h )
+{
+ int b_available;
+ if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
+ return 0;
+ else if( h->sh.b_direct_spatial_mv_pred )
+ b_available = x264_mb_predict_mv_direct16x16_spatial( h );
+ else
+ b_available = x264_mb_predict_mv_direct16x16_temporal( h );
+
+ /* cache ref & mv */
+ if( b_available )
+ {
+ int i, l;
+ for( l = 0; l < 2; l++ )
+ for( i = 0; i < 4; i++ )
+ h->mb.cache.direct_ref[l][i] = h->mb.cache.ref[l][x264_scan8[i*4]];
+ memcpy(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
+ }
+
+ return b_available;
+}
+void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
+{
+ const int x = 2*(idx%2);
+ const int y = 2*(idx/2);
+ int l;
+ x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
+ x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
+ for( l = 0; l < 2; l++ )
+ {
+ *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]] =
+ *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]];
+ *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]+8] =
+ *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]+8];
+ }
+}
+
+/* This just improves encoder performance, it's not part of the spec */
+void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2], int *i_mvc )
+{
+ int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
int i = 0;
+
+ /* temporal */
+ if( h->sh.i_type == SLICE_TYPE_B )
+ {
+ if( h->mb.cache.ref[i_list][x264_scan8[12]] == i_ref )
+ {
+ /* FIXME: use direct_mv to be clearer? */
+ int16_t *mvp = h->mb.cache.mv[i_list][x264_scan8[12]];
+ mvc[i][0] = mvp[0];
+ mvc[i][1] = mvp[1];
+ i++;
+ }
+ }
+
+ /* spatial */
if( h->mb.i_mb_x > 0 )
{
int i_mb_l = h->mb.i_mb_xy - 1;
- mvc[i][0] = mvr[i_mb_l][0];
- mvc[i][1] = mvr[i_mb_l][1];
- i++;
+ /* skip MBs didn't go through the whole search process, so mvr is undefined */
+ if( !IS_SKIP( h->mb.type[i_mb_l] ) )
+ {
+ mvc[i][0] = mvr[i_mb_l][0];
+ mvc[i][1] = mvr[i_mb_l][1];
+ i++;
+ }
}
if( h->mb.i_mb_y > 0 )
{
int i_mb_t = h->mb.i_mb_xy - h->mb.i_mb_stride;
- mvc[i][0] = mvr[i_mb_t][0];
- mvc[i][1] = mvr[i_mb_t][1];
- i++;
+ if( !IS_SKIP( h->mb.type[i_mb_t] ) )
+ {
+ mvc[i][0] = mvr[i_mb_t][0];
+ mvc[i][1] = mvr[i_mb_t][1];
+ i++;
+ }
- if( h->mb.i_mb_x > 0 )
+ if( h->mb.i_mb_x > 0 && !IS_SKIP( h->mb.type[i_mb_t - 1] ) )
{
mvc[i][0] = mvr[i_mb_t - 1][0];
mvc[i][1] = mvr[i_mb_t - 1][1];
i++;
}
- if( h->mb.i_mb_x < h->mb.i_mb_stride - 1 )
+ if( h->mb.i_mb_x < h->mb.i_mb_stride - 1 && !IS_SKIP( h->mb.type[i_mb_t + 1] ) )
{
mvc[i][0] = mvr[i_mb_t + 1][0];
mvc[i][1] = mvr[i_mb_t + 1][1];
{
const int i8 = x264_scan8[0]+x+8*y;
const int i_ref = h->mb.cache.ref[0][i8];
- const int mvx = h->mb.cache.mv[0][i8][0];
- const int mvy = h->mb.cache.mv[0][i8][1];
+ const int mvx = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
+ const int mvy = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[0][i_ref][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
&h->mb.pic.p_fdec[0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height );
+ mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
&h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
mvx, mvy, 2*width, 2*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
&h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
mvx, mvy, 2*width, 2*height );
}
{
const int i8 = x264_scan8[0]+x+8*y;
const int i_ref = h->mb.cache.ref[1][i8];
- const int mvx = h->mb.cache.mv[1][i8][0];
- const int mvy = h->mb.cache.mv[1][i8][1];
+ const int mvx = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
+ const int mvy = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[1][i_ref][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
&h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height );
+ mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
&h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
mvx, mvy, 2*width, 2*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
&h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
mvx, mvy, 2*width, 2*height );
}
{
const int i8 = x264_scan8[0]+x+8*y;
- const int i_ref0 = h->mb.cache.ref[0][i8];
- const int mvx0 = h->mb.cache.mv[0][i8][0];
- const int mvy0 = h->mb.cache.mv[0][i8][1];
-
const int i_ref1 = h->mb.cache.ref[1][i8];
- const int mvx1 = h->mb.cache.mv[1][i8][0];
- const int mvy1 = h->mb.cache.mv[1][i8][1];
+ const int mvx1 = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
+ const int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
DECLARE_ALIGNED( uint8_t, tmp[16*16], 16 );
- int i_mode = 0;
-
- if( width == 4 && height == 4 ) i_mode = PIXEL_16x16;
- else if( width == 4 && height == 2 ) i_mode = PIXEL_16x8;
- else if( width == 2 && height == 4 ) i_mode = PIXEL_8x16;
- else if( width == 2 && height == 2 ) i_mode = PIXEL_8x8;
- else if( width == 2 && height == 1 ) i_mode = PIXEL_8x4;
- else if( width == 1 && height == 2 ) i_mode = PIXEL_4x8;
- else if( width == 1 && height == 1 ) i_mode = PIXEL_4x4;
-
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[0][i_ref0][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- mvx0, mvy0, 4*width, 4*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref0][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
- &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
- mvx0, mvy0, 2*width, 2*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref0][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
- &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
- mvx0, mvy0, 2*width, 2*height );
-
-
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[1][i_ref1][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- tmp, 16, mvx1, mvy1, 4*width, 4*height );
- h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
-
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref1][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
- tmp, 16, mvx1, mvy1, 2*width, 2*height );
- h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
-
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref1][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
- tmp, 16, mvx1, mvy1, 2*width, 2*height );
- h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
+ int i_mode = x264_size2pixel[height][width];
+
+ x264_mb_mc_0xywh( h, x, y, width, height );
+
+ h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
+ tmp, 16, mvx1 + 4*4*x, mvy1 + 4*4*y, 4*width, 4*height );
+
+ if( h->param.analyse.b_weighted_bipred )
+ {
+ const int i_ref0 = h->mb.cache.ref[0][i8];
+ const int weight = h->mb.bipred_weight[i_ref0][i_ref1];
+
+ h->pixf.avg_weight[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16, weight );
+
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ tmp, 16, mvx1, mvy1, 2*width, 2*height );
+ h->pixf.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16, weight );
+
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ tmp, 16, mvx1, mvy1, 2*width, 2*height );
+ h->pixf.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16, weight );
+ }
+ else
+ {
+ h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
+
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ tmp, 16, mvx1, mvy1, 2*width, 2*height );
+ h->pixf.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
+
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ tmp, 16, mvx1, mvy1, 2*width, 2*height );
+ h->pixf.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
+ }
}
+static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
+{
+ const int i8 = x264_scan8[0] + x + 8*y;
+
+ /* FIXME: optimize based on current block size, not global settings? */
+ if( h->sps->b_direct8x8_inference )
+ {
+ if( h->mb.cache.ref[0][i8] >= 0 )
+ if( h->mb.cache.ref[1][i8] >= 0 )
+ x264_mb_mc_01xywh( h, x, y, 2, 2 );
+ else
+ x264_mb_mc_0xywh( h, x, y, 2, 2 );
+ else
+ x264_mb_mc_1xywh( h, x, y, 2, 2 );
+ }
+ else
+ {
+ if( h->mb.cache.ref[0][i8] >= 0 )
+ {
+ if( h->mb.cache.ref[1][i8] >= 0 )
+ {
+ x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
+ }
+ else
+ {
+ x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
+ }
+ }
+ else
+ {
+ x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
+ }
+ }
+}
void x264_mb_mc( x264_t *h )
{
x264_mb_mc_0xywh( h, 2, 0, 2, 4 );
}
}
- else if( h->mb.i_type == P_8x8 )
+ else if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 )
{
int i;
for( i = 0; i < 4; i++ )
x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
break;
+ case D_L1_8x8:
+ x264_mb_mc_1xywh( h, x, y, 2, 2 );
+ break;
+ case D_L1_8x4:
+ x264_mb_mc_1xywh( h, x, y+0, 2, 1 );
+ x264_mb_mc_1xywh( h, x, y+1, 2, 1 );
+ break;
+ case D_L1_4x8:
+ x264_mb_mc_1xywh( h, x+0, y, 1, 2 );
+ x264_mb_mc_1xywh( h, x+1, y, 1, 2 );
+ break;
+ case D_L1_4x4:
+ x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
+ break;
+ case D_BI_8x8:
+ x264_mb_mc_01xywh( h, x, y, 2, 2 );
+ break;
+ case D_BI_8x4:
+ x264_mb_mc_01xywh( h, x, y+0, 2, 1 );
+ x264_mb_mc_01xywh( h, x, y+1, 2, 1 );
+ break;
+ case D_BI_4x8:
+ x264_mb_mc_01xywh( h, x+0, y, 1, 2 );
+ x264_mb_mc_01xywh( h, x+1, y, 1, 2 );
+ break;
+ case D_BI_4x4:
+ x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
+ break;
+ case D_DIRECT_8x8:
+ x264_mb_mc_direct8x8( h, x, y );
+ break;
}
}
}
- else if( h->mb.i_type == B_8x8 || h->mb.i_type == B_DIRECT )
+ else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT )
{
- x264_log( h, X264_LOG_ERROR, "mc_luma with unsupported mb\n" );
- return;
+ x264_mb_mc_direct8x8( h, 0, 0 );
+ x264_mb_mc_direct8x8( h, 2, 0 );
+ x264_mb_mc_direct8x8( h, 0, 2 );
+ x264_mb_mc_direct8x8( h, 2, 2 );
}
else /* B_*x* */
{
void x264_macroblock_cache_init( x264_t *h )
{
int i, j;
- int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
+ int i_mb_count = h->mb.i_mb_count;
h->mb.i_mb_stride = h->sps->i_mb_width;
+ h->mb.i_b8_stride = h->sps->i_mb_width * 2;
+ h->mb.i_b4_stride = h->sps->i_mb_width * 4;
h->mb.type= x264_malloc( i_mb_count * sizeof( int8_t) );
h->mb.qp = x264_malloc( i_mb_count * sizeof( int8_t) );
h->mb.cbp = x264_malloc( i_mb_count * sizeof( int16_t) );
+ h->mb.skipbp = x264_malloc( i_mb_count * sizeof( int8_t) );
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
h->mb.intra4x4_pred_mode = x264_malloc( i_mb_count * 7 * sizeof( int8_t ) );
/* all coeffs */
h->mb.non_zero_count = x264_malloc( i_mb_count * 24 * sizeof( uint8_t ) );
- h->mb.mv[0] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
- h->mb.mv[1] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
- h->mb.ref[0] = x264_malloc( 4 * i_mb_count * sizeof( int16_t ) );
- h->mb.ref[1] = x264_malloc( 4 * i_mb_count * sizeof( int16_t ) );
-
if( h->param.b_cabac )
{
h->mb.chroma_pred_mode = x264_malloc( i_mb_count * sizeof( int8_t) );
}
for( i=0; i<2; i++ )
- for( j=0; j<16; j++ ) /* FIXME: alloc no more than param.i_frame_reference */
+ for( j=0; j < ( i ? 1 : h->param.i_frame_reference ); j++ )
h->mb.mvr[i][j] = x264_malloc( 2 * i_mb_count * sizeof( int16_t ) );
/* init with not avaiable (for top right idx=7,15) */
{
int i, j;
for( i=0; i<2; i++ )
- for( j=0; j<16; j++ )
+ for( j=0; j < ( i ? 1 : h->param.i_frame_reference ); j++ )
x264_free( h->mb.mvr[i][j] );
if( h->param.b_cabac )
{
x264_free( h->mb.mvd[0] );
x264_free( h->mb.mvd[1] );
}
- x264_free( h->mb.mv[0] );
- x264_free( h->mb.mv[1] );
- x264_free( h->mb.ref[0] );
- x264_free( h->mb.ref[1] );
x264_free( h->mb.intra4x4_pred_mode );
x264_free( h->mb.non_zero_count );
+ x264_free( h->mb.skipbp );
x264_free( h->mb.cbp );
x264_free( h->mb.qp );
x264_free( h->mb.type );
}
+void x264_macroblock_slice_init( x264_t *h )
+{
+ int i;
+
+ h->mb.mv[0] = h->fdec->mv[0];
+ h->mb.mv[1] = h->fdec->mv[1];
+ h->mb.ref[0] = h->fdec->ref[0];
+ h->mb.ref[1] = h->fdec->ref[1];
+
+ h->fdec->i_ref[0] = h->i_ref0;
+ h->fdec->i_ref[1] = h->i_ref1;
+ for( i = 0; i < h->i_ref0; i++ )
+ h->fdec->ref_poc[0][i] = h->fref0[i]->i_poc;
+ if( h->sh.i_type == SLICE_TYPE_B )
+ {
+ for( i = 0; i < h->i_ref1; i++ )
+ h->fdec->ref_poc[1][i] = h->fref1[i]->i_poc;
+ }
+}
void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
{
- const int i_mb_4x4 = 16 * h->mb.i_mb_stride *i_mb_y + 4 * i_mb_x;
- const int i_mb_8x8 = 4 * h->mb.i_mb_stride *i_mb_y + 2 * i_mb_x;
+ const int i_mb_4x4 = 4*(i_mb_y * h->mb.i_b4_stride + i_mb_x);
+ const int i_mb_8x8 = 2*(i_mb_y * h->mb.i_b8_stride + i_mb_x);
int i_top_xy = -1;
int i_left_xy = -1;
h->mb.i_mb_x = i_mb_x;
h->mb.i_mb_y = i_mb_y;
h->mb.i_mb_xy = i_mb_y * h->mb.i_mb_stride + i_mb_x;
+ h->mb.i_b8_xy = i_mb_8x8;
+ h->mb.i_b4_xy = i_mb_4x4;
h->mb.i_neighbour = 0;
/* load picture pointers */
for( j = 0; j < h->i_ref0; j++ )
{
- h->mb.pic.p_fref[0][j][i] = &h->fref0[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[0][j][i==0 ? 0:i+3] = &h->fref0[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[0][j][i+1] = &h->fref0[j]->filtered[i+1][ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )];
}
for( j = 0; j < h->i_ref1; j++ )
{
- h->mb.pic.p_fref[1][j][i] = &h->fref1[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[1][j][i==0 ? 0:i+3] = &h->fref1[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[1][j][i+1] = &h->fref1[j]->filtered[i+1][ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )];
}
}
/* load ref/mv/mvd */
if( h->sh.i_type != SLICE_TYPE_I )
{
- int s8x8 = 2 * h->mb.i_mb_stride;
- int s4x4 = 4 * h->mb.i_mb_stride;
+ const int s8x8 = h->mb.i_b8_stride;
+ const int s4x4 = h->mb.i_b4_stride;
int i_top_left_xy = -1;
int i_top_right_xy = -1;
i_top_right_xy = i_top_xy + 1;
}
- for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_P ? 1 : 2 ); i_list++ )
+ for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ )
{
/*
h->mb.cache.ref[i_list][x264_scan8[5 ]+1] =
}
}
}
+
+ /* load skip */
+ if( h->param.b_cabac )
+ {
+ if( h->sh.i_type == SLICE_TYPE_B )
+ {
+ memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
+ if( i_left_xy >= 0 )
+ {
+ h->mb.cache.skip[x264_scan8[0] - 1] = h->mb.skipbp[i_left_xy] & 0x2;
+ h->mb.cache.skip[x264_scan8[8] - 1] = h->mb.skipbp[i_left_xy] & 0x8;
+ }
+ if( i_top_xy >= 0 )
+ {
+ h->mb.cache.skip[x264_scan8[0] - 8] = h->mb.skipbp[i_top_xy] & 0x4;
+ h->mb.cache.skip[x264_scan8[4] - 8] = h->mb.skipbp[i_top_xy] & 0x8;
+ }
+ }
+ else if( h->mb.i_mb_xy == 0 && h->sh.i_type == SLICE_TYPE_P )
+ {
+ memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
+ }
+ }
}
}
{
const int i_mb_xy = h->mb.i_mb_xy;
const int i_mb_type = h->mb.i_type;
- const int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
- const int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
+ const int s8x8 = h->mb.i_b8_stride;
+ const int s4x4 = h->mb.i_b4_stride;
+ const int i_mb_4x4 = h->mb.i_b4_xy;
+ const int i_mb_8x8 = h->mb.i_b8_xy;
int i;
if( !IS_INTRA( i_mb_type ) )
{
int i_list;
- for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_P ? 1 : 2 ); i_list++ )
+ for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ )
{
- const int s8x8 = 2 * h->mb.i_mb_stride;
- const int s4x4 = 4 * h->mb.i_mb_stride;
int y,x;
h->mb.ref[i_list][i_mb_8x8+0+0*s8x8] = h->mb.cache.ref[i_list][x264_scan8[0]];
else
{
int i_list;
- for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_P ? 1 : 2 ); i_list++ )
+ for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ )
{
- const int s8x8 = 2 * h->mb.i_mb_stride;
- const int s4x4 = 4 * h->mb.i_mb_stride;
int y,x;
h->mb.ref[i_list][i_mb_8x8+0+0*s8x8] =
else
h->mb.chroma_pred_mode[i_mb_xy] = I_PRED_CHROMA_DC;
- if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) )
+ if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) )
{
int i_list;
for( i_list = 0; i_list < 2; i_list++ )
}
}
}
+ if( h->sh.i_type == SLICE_TYPE_B )
+ {
+ if( i_mb_type == B_SKIP || i_mb_type == B_DIRECT )
+ h->mb.skipbp[i_mb_xy] = 0xf;
+ else if( i_mb_type == B_8x8 )
+ {
+ int skipbp = 0;
+ for( i = 0; i < 4; i++ )
+ skipbp |= ( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) << i;
+ h->mb.skipbp[i_mb_xy] = skipbp;
+ }
+ else
+ h->mb.skipbp[i_mb_xy] = 0;
+ }
}
}
+void x264_macroblock_bipred_init( x264_t *h )
+{
+ int i_ref0, i_ref1;
+ for( i_ref0 = 0; i_ref0 < h->i_ref0; i_ref0++ )
+ {
+ int poc0 = h->fref0[i_ref0]->i_poc;
+ for( i_ref1 = 0; i_ref1 < h->i_ref1; i_ref1++ )
+ {
+ int dist_scale_factor;
+ int poc1 = h->fref1[i_ref1]->i_poc;
+ int td = x264_clip3( poc1 - poc0, -128, 127 );
+ if( td == 0 /* || pic0 is a long-term ref */ )
+ dist_scale_factor = 256;
+ else
+ {
+ int tb = x264_clip3( h->fdec->i_poc - poc0, -128, 127 );
+ int tx = (16384 + (abs(td) >> 1)) / td;
+ dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
+ }
+ h->mb.dist_scale_factor[i_ref0][i_ref1] = dist_scale_factor;
+
+ dist_scale_factor >>= 2;
+ if( h->param.analyse.b_weighted_bipred
+ && dist_scale_factor >= -64
+ && dist_scale_factor <= 128 )
+ h->mb.bipred_weight[i_ref0][i_ref1] = 64 - dist_scale_factor;
+ else
+ h->mb.bipred_weight[i_ref0][i_ref1] = 32;
+ }
+ }
+}