+static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
+{
+ int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
+ int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
+ int i;
+
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
+
+ for( i = 0; i < 4; i++ )
+ {
+ const int x8 = 2*(i%2);
+ const int y8 = 2*(i/2);
+ /* TODO: MapColToList0 */
+ const int i_ref = h->mb.list1ref0.ref[ i_mb_8x8 + x8/2 + y8 * h->mb.i_mb_stride ];
+
+ if( i_ref == -1 )
+ {
+ x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, 0 );
+ x264_macroblock_cache_mv( h, x8, y8, 2, 2, 0, 0, 0 );
+ x264_macroblock_cache_mv( h, x8, y8, 2, 2, 1, 0, 0 );
+ }
+ else
+ {
+ int tb = x264_clip3( h->fdec->i_poc - h->fref0[i_ref]->i_poc, -128, 127 );
+ int td = x264_clip3( h->fref1[0]->i_poc - h->fref0[i_ref]->i_poc, -128, 127 );
+ int tx = (16384 + (abs(td) >> 1)) / td;
+ int dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
+ int x4, y4;
+
+ x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, i_ref );
+
+ for( y4 = y8; y4 < y8+2; y4++ )
+ for( x4 = x8; x4 < x8+2; x4++ )
+ {
+ const int16_t *mv_col = h->mb.list1ref0.mv[ i_mb_4x4 + x4 + y4 * 4 * h->mb.i_mb_stride ];
+ if( td == 0 /* || pic0 is a long-term ref */ )
+ {
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_col[0], mv_col[1] );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
+ }
+ else
+ {
+ int mv_l0[2];
+ mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
+ mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_l0[0], mv_l0[1] );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
+{
+ int ref[2];
+ int mv[2][2];
+ int i_list;
+ int i8, i4;
+ const int s8x8 = 2 * h->mb.i_mb_stride;
+ const int s4x4 = 4 * h->mb.i_mb_stride;
+ const int8_t *l1ref = &h->mb.list1ref0.ref[ 2*h->mb.i_mb_x + 2*s8x8*h->mb.i_mb_y ];
+ const int16_t (*l1mv)[2] = (const int16_t (*)[2])
+ &h->mb.list1ref0.mv[ 4*h->mb.i_mb_x + 4*s4x4*h->mb.i_mb_y ];
+
+ for( i_list=0; i_list<2; i_list++ )
+ {
+ int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
+ int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
+ int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
+ if( i_refc == -2 )
+ i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
+
+ ref[i_list] = i_refa;
+ if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 ))
+ ref[i_list] = i_refb;
+ if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 ))
+ ref[i_list] = i_refc;
+ if( ref[i_list] < 0 )
+ ref[i_list] = -1;
+ }
+
+ if( ref[0] < 0 && ref[1] < 0 )
+ {
+ ref[0] =
+ ref[1] = 0;
+ mv[0][0] =
+ mv[0][1] =
+ mv[1][0] =
+ mv[1][1] = 0;
+ }
+ else
+ {
+ for( i_list=0; i_list<2; i_list++ )
+ {
+ if( ref[i_list] >= 0 )
+ x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] );
+ else
+ mv[i_list][0] = mv[i_list][1] = 0;
+ }
+ }
+
+ /* FIXME: clip mv ? */
+
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, mv[0][0], mv[0][1] );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, mv[1][0], mv[1][1] );
+
+ /* col_zero_flag */
+ for( i8=0; i8<4; i8++ )
+ {
+ const int x8 = i8%2;
+ const int y8 = i8/2;
+ if( l1ref[ x8 + y8*s8x8 ] == 0 )
+ {
+ for( i4=0; i4<4; i4++ )
+ {
+ const int x4 = i4%2 + 2*x8;
+ const int y4 = i4/2 + 2*y8;
+ const int16_t *mvcol = l1mv[x4 + y4*s4x4];
+ if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
+ {
+ if( ref[0] == 0 )
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0, 0 );
+ if( ref[1] == 0 )
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+int x264_mb_predict_mv_direct16x16( x264_t *h )
+{
+ int b_available;
+ if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
+ return 0;
+ else if( h->sh.b_direct_spatial_mv_pred )
+ b_available = x264_mb_predict_mv_direct16x16_spatial( h );
+ else
+ b_available = x264_mb_predict_mv_direct16x16_temporal( h );
+
+ /* cache ref & mv */
+ if( b_available )
+ {
+ int i, l;
+ for( l = 0; l < 2; l++ )
+ for( i = 0; i < 4; i++ )
+ h->mb.cache.direct_ref[l][i] = h->mb.cache.ref[l][x264_scan8[i*4]];
+ memcpy(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
+ }
+
+ return b_available;
+}
+
+void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
+{
+ const int x = 2*(idx%2);
+ const int y = 2*(idx/2);
+ int l;
+ x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
+ x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
+ for( l = 0; l < 2; l++ )
+ {
+ *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]] =
+ *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]];
+ *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]+8] =
+ *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]+8];
+ }
+}
+
+/* This just improves encoder performance, it's not part of the spec */