git.sesse.net Git - x264/blob - common/mvpred.c

   1 /*****************************************************************************
   2  * mvpred.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003-2008 x264 project
   5  *
   6  * Authors: Loren Merritt <lorenm@u.washington.edu>
   7  *          Fiona Glaser <fiona@x264.com>
   8  *          Laurent Aimar <fenrir@via.ecp.fr>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include "common.h"
  26
  27 void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] )
  28 {
  29     const int i8 = x264_scan8[idx];
  30     const int i_ref= h->mb.cache.ref[i_list][i8];
  31     int     i_refa = h->mb.cache.ref[i_list][i8 - 1];
  32     int16_t *mv_a  = h->mb.cache.mv[i_list][i8 - 1];
  33     int     i_refb = h->mb.cache.ref[i_list][i8 - 8];
  34     int16_t *mv_b  = h->mb.cache.mv[i_list][i8 - 8];
  35     int     i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width];
  36     int16_t *mv_c  = h->mb.cache.mv[i_list][i8 - 8 + i_width];
  37
  38     if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
  39     {
  40         i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
  41         mv_c   = h->mb.cache.mv[i_list][i8 - 8 - 1];
  42     }
  43
  44     if( h->mb.i_partition == D_16x8 )
  45     {
  46         if( idx == 0 )
  47         {
  48             if( i_refb == i_ref )
  49             {
  50                 CP32( mvp, mv_b );
  51                 return;
  52             }
  53         }
  54         else
  55         {
  56             if( i_refa == i_ref )
  57             {
  58                 CP32( mvp, mv_a );
  59                 return;
  60             }
  61         }
  62     }
  63     else if( h->mb.i_partition == D_8x16 )
  64     {
  65         if( idx == 0 )
  66         {
  67             if( i_refa == i_ref )
  68             {
  69                 CP32( mvp, mv_a );
  70                 return;
  71             }
  72         }
  73         else
  74         {
  75             if( i_refc == i_ref )
  76             {
  77                 CP32( mvp, mv_c );
  78                 return;
  79             }
  80         }
  81     }
  82
  83     int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
  84
  85     if( i_count > 1 )
  86     {
  87 median:
  88         x264_median_mv( mvp, mv_a, mv_b, mv_c );
  89     }
  90     else if( i_count == 1 )
  91     {
  92         if( i_refa == i_ref )
  93             CP32( mvp, mv_a );
  94         else if( i_refb == i_ref )
  95             CP32( mvp, mv_b );
  96         else
  97             CP32( mvp, mv_c );
  98     }
  99     else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
 100         CP32( mvp, mv_a );
 101     else
 102         goto median;
 103 }
 104
 105 void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
 106 {
 107     int     i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
 108     int16_t *mv_a  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
 109     int     i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
 110     int16_t *mv_b  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
 111     int     i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
 112     int16_t *mv_c  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
 113     if( i_refc == -2 )
 114     {
 115         i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
 116         mv_c   = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
 117     }
 118
 119     int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
 120
 121     if( i_count > 1 )
 122     {
 123 median:
 124         x264_median_mv( mvp, mv_a, mv_b, mv_c );
 125     }
 126     else if( i_count == 1 )
 127     {
 128         if( i_refa == i_ref )
 129             CP32( mvp, mv_a );
 130         else if( i_refb == i_ref )
 131             CP32( mvp, mv_b );
 132         else
 133             CP32( mvp, mv_c );
 134     }
 135     else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
 136         CP32( mvp, mv_a );
 137     else
 138         goto median;
 139 }
 140
 141
 142 void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
 143 {
 144     int     i_refa = h->mb.cache.ref[0][X264_SCAN8_0 - 1];
 145     int     i_refb = h->mb.cache.ref[0][X264_SCAN8_0 - 8];
 146     int16_t *mv_a  = h->mb.cache.mv[0][X264_SCAN8_0 - 1];
 147     int16_t *mv_b  = h->mb.cache.mv[0][X264_SCAN8_0 - 8];
 148
 149     if( i_refa == -2 || i_refb == -2 ||
 150         !( i_refa | M32( mv_a ) ) ||
 151         !( i_refb | M32( mv_b ) ) )
 152     {
 153         M32( mv ) = 0;
 154     }
 155     else
 156         x264_mb_predict_mv_16x16( h, 0, 0, mv );
 157 }
 158
 159 static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
 160 {
 161     int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
 162     int i_mb_8x8 =  4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
 163     const int type_col = h->fref1[0]->mb_type[h->mb.i_mb_xy];
 164     const int partition_col = h->fref1[0]->mb_partition[h->mb.i_mb_xy];
 165
 166     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
 167
 168     h->mb.i_partition = partition_col;
 169
 170     if( IS_INTRA( type_col ) )
 171     {
 172         x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
 173         x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 0, 0 );
 174         x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 1, 0 );
 175         return 1;
 176     }
 177
 178     /* Don't do any checks other than the ones we have to, based
 179      * on the size of the colocated partitions.
 180      * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
 181     int max_i8 = (D_16x16 - partition_col) + 1;
 182     int step = (partition_col == D_16x8) + 1;
 183     int width = 4 >> ((D_16x16 - partition_col)&1);
 184     int height = 4 >> ((D_16x16 - partition_col)>>1);
 185
 186     for( int i8 = 0; i8 < max_i8; i8 += step )
 187     {
 188         int x8 = i8&1;
 189         int y8 = i8>>1;
 190         int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
 191         int i_ref1_ref = h->fref1[0]->ref[0][i_part_8x8];
 192         int i_ref = (map_col_to_list0(i_ref1_ref>>h->sh.b_mbaff) << h->sh.b_mbaff) + (i_ref1_ref&h->sh.b_mbaff);
 193
 194         if( i_ref >= 0 )
 195         {
 196             int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
 197             int16_t *mv_col = h->fref1[0]->mv[0][i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
 198             int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
 199             int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
 200             if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
 201                 return 0;
 202             x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, i_ref );
 203             x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, pack16to32_mask(l0x, l0y) );
 204             x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
 205         }
 206         else
 207         {
 208             /* the collocated ref isn't in the current list0 */
 209             /* FIXME: we might still be able to use direct_8x8 on some partitions */
 210             /* FIXME: with B-pyramid + extensive ref list reordering
 211              *   (not currently used), we would also have to check
 212              *   l1mv1 like in spatial mode */
 213             return 0;
 214         }
 215     }
 216
 217     return 1;
 218 }
 219
 220 static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
 221 {
 222     int8_t ref[2];
 223     ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
 224     const int8_t *l1ref0 = &h->fref1[0]->ref[0][h->mb.i_b8_xy];
 225     const int8_t *l1ref1 = &h->fref1[0]->ref[1][h->mb.i_b8_xy];
 226     const int16_t (*l1mv[2])[2] = { (const int16_t (*)[2]) &h->fref1[0]->mv[0][h->mb.i_b4_xy],
 227                                     (const int16_t (*)[2]) &h->fref1[0]->mv[1][h->mb.i_b4_xy] };
 228     const int type_col = h->fref1[0]->mb_type[h->mb.i_mb_xy];
 229     const int partition_col = h->fref1[0]->mb_partition[h->mb.i_mb_xy];
 230
 231     h->mb.i_partition = partition_col;
 232
 233     for( int i_list = 0; i_list < 2; i_list++ )
 234     {
 235         int     i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
 236         int16_t *mv_a  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
 237         int     i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
 238         int16_t *mv_b  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
 239         int     i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
 240         int16_t *mv_c  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
 241         if( i_refc == -2 )
 242         {
 243             i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
 244             mv_c   = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
 245         }
 246
 247         int i_ref = X264_MIN3( (unsigned)i_refa, (unsigned)i_refb, (unsigned)i_refc );
 248         if( i_ref < 0 )
 249         {
 250             i_ref = -1;
 251             M32( mv[i_list] ) = 0;
 252         }
 253         else
 254         {
 255             /* Same as x264_mb_predict_mv_16x16, but simplified to eliminate cases
 256              * not relevant to spatial direct. */
 257             int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
 258
 259             if( i_count > 1 )
 260                 x264_median_mv( mv[i_list], mv_a, mv_b, mv_c );
 261             else
 262             {
 263                 if( i_refa == i_ref )
 264                     CP32( mv[i_list], mv_a );
 265                 else if( i_refb == i_ref )
 266                     CP32( mv[i_list], mv_b );
 267                 else
 268                     CP32( mv[i_list], mv_c );
 269             }
 270         }
 271
 272         x264_macroblock_cache_ref( h, 0, 0, 4, 4, i_list, i_ref );
 273         x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, i_list, mv[i_list] );
 274         ref[i_list] = i_ref;
 275     }
 276
 277     if( (M16( ref ) & 0x8080) == 0x8080 ) /* if( ref[0] < 0 && ref[1] < 0 ) */
 278     {
 279         x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
 280         x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
 281         return 1;
 282     }
 283
 284     if( h->param.i_threads > 1
 285         && ( mv[0][1] > h->mb.mv_max_spel[1]
 286           || mv[1][1] > h->mb.mv_max_spel[1] ) )
 287     {
 288 #if 0
 289         fprintf(stderr, "direct_spatial: (%d,%d) (%d,%d) > %d \n",
 290                 mv[0][0], mv[0][1], mv[1][0], mv[1][1],
 291                 h->mb.mv_max_spel[1]);
 292 #endif
 293         return 0;
 294     }
 295
 296     if( !M64( mv ) || IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
 297         return 1;
 298
 299     /* Don't do any checks other than the ones we have to, based
 300      * on the size of the colocated partitions.
 301      * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
 302     int max_i8 = (D_16x16 - partition_col) + 1;
 303     int step = (partition_col == D_16x8) + 1;
 304     int width = 4 >> ((D_16x16 - partition_col)&1);
 305     int height = 4 >> ((D_16x16 - partition_col)>>1);
 306
 307     /* col_zero_flag */
 308     for( int i8 = 0; i8 < max_i8; i8 += step )
 309     {
 310         const int x8 = i8&1;
 311         const int y8 = i8>>1;
 312         const int o8 = x8 + y8 * h->mb.i_b8_stride;
 313         const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
 314         int idx;
 315         if( l1ref0[o8] == 0 )
 316             idx = 0;
 317         else if( l1ref0[o8] < 0 && l1ref1[o8] == 0 )
 318             idx = 1;
 319         else
 320             continue;
 321
 322         if( abs( l1mv[idx][o4][0] ) <= 1 && abs( l1mv[idx][o4][1] ) <= 1 )
 323         {
 324             if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
 325             if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, 0 );
 326         }
 327     }
 328
 329     return 1;
 330 }
 331
 332 int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
 333 {
 334     int b_available;
 335     if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
 336         return 0;
 337     else if( h->sh.b_direct_spatial_mv_pred )
 338         b_available = x264_mb_predict_mv_direct16x16_spatial( h );
 339     else
 340         b_available = x264_mb_predict_mv_direct16x16_temporal( h );
 341
 342     if( b_changed != NULL && b_available )
 343     {
 344         int changed;
 345
 346         changed  = M32( h->mb.cache.direct_mv[0][0] ) ^ M32( h->mb.cache.mv[0][x264_scan8[0]] );
 347         changed |= M32( h->mb.cache.direct_mv[1][0] ) ^ M32( h->mb.cache.mv[1][x264_scan8[0]] );
 348         changed |= h->mb.cache.direct_ref[0][0] ^ h->mb.cache.ref[0][x264_scan8[0]];
 349         changed |= h->mb.cache.direct_ref[1][0] ^ h->mb.cache.ref[1][x264_scan8[0]];
 350         if( !changed && h->mb.i_partition != D_16x16 )
 351         {
 352             changed |= M32( h->mb.cache.direct_mv[0][3] ) ^ M32( h->mb.cache.mv[0][x264_scan8[12]] );
 353             changed |= M32( h->mb.cache.direct_mv[1][3] ) ^ M32( h->mb.cache.mv[1][x264_scan8[12]] );
 354             changed |= h->mb.cache.direct_ref[0][3] ^ h->mb.cache.ref[0][x264_scan8[12]];
 355             changed |= h->mb.cache.direct_ref[1][3] ^ h->mb.cache.ref[1][x264_scan8[12]];
 356         }
 357         if( !changed && h->mb.i_partition == D_8x8 )
 358         {
 359             changed |= M32( h->mb.cache.direct_mv[0][1] ) ^ M32( h->mb.cache.mv[0][x264_scan8[4]] );
 360             changed |= M32( h->mb.cache.direct_mv[1][1] ) ^ M32( h->mb.cache.mv[1][x264_scan8[4]] );
 361             changed |= M32( h->mb.cache.direct_mv[0][2] ) ^ M32( h->mb.cache.mv[0][x264_scan8[8]] );
 362             changed |= M32( h->mb.cache.direct_mv[1][2] ) ^ M32( h->mb.cache.mv[1][x264_scan8[8]] );
 363             changed |= h->mb.cache.direct_ref[0][1] ^ h->mb.cache.ref[0][x264_scan8[4]];
 364             changed |= h->mb.cache.direct_ref[1][1] ^ h->mb.cache.ref[1][x264_scan8[4]];
 365             changed |= h->mb.cache.direct_ref[0][2] ^ h->mb.cache.ref[0][x264_scan8[8]];
 366             changed |= h->mb.cache.direct_ref[1][2] ^ h->mb.cache.ref[1][x264_scan8[8]];
 367         }
 368         *b_changed = changed;
 369         if( !changed )
 370             return b_available;
 371     }
 372
 373     /* cache ref & mv */
 374     if( b_available )
 375         for( int l = 0; l < 2; l++ )
 376         {
 377             CP32( h->mb.cache.direct_mv[l][0], h->mb.cache.mv[l][x264_scan8[ 0]] );
 378             CP32( h->mb.cache.direct_mv[l][1], h->mb.cache.mv[l][x264_scan8[ 4]] );
 379             CP32( h->mb.cache.direct_mv[l][2], h->mb.cache.mv[l][x264_scan8[ 8]] );
 380             CP32( h->mb.cache.direct_mv[l][3], h->mb.cache.mv[l][x264_scan8[12]] );
 381             h->mb.cache.direct_ref[l][0] = h->mb.cache.ref[l][x264_scan8[ 0]];
 382             h->mb.cache.direct_ref[l][1] = h->mb.cache.ref[l][x264_scan8[ 4]];
 383             h->mb.cache.direct_ref[l][2] = h->mb.cache.ref[l][x264_scan8[ 8]];
 384             h->mb.cache.direct_ref[l][3] = h->mb.cache.ref[l][x264_scan8[12]];
 385             h->mb.cache.direct_partition = h->mb.i_partition;
 386         }
 387
 388     return b_available;
 389 }
 390
 391 /* This just improves encoder performance, it's not part of the spec */
 392 void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[9][2], int *i_mvc )
 393 {
 394     int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
 395     int i = 0;
 396
 397 #define SET_MVP(mvp) \
 398     { \
 399         CP32( mvc[i], mvp ); \
 400         i++; \
 401     }
 402
 403     /* b_direct */
 404     if( h->sh.i_type == SLICE_TYPE_B
 405         && h->mb.cache.ref[i_list][x264_scan8[12]] == i_ref )
 406     {
 407         SET_MVP( h->mb.cache.mv[i_list][x264_scan8[12]] );
 408     }
 409
 410     if( i_ref == 0 && h->frames.b_have_lowres )
 411     {
 412         int16_t (*lowres_mv)[2] = i_list ? h->fenc->lowres_mvs[1][h->fref1[0]->i_frame-h->fenc->i_frame-1]
 413                                          : h->fenc->lowres_mvs[0][h->fenc->i_frame-h->fref0[0]->i_frame-1];
 414         if( lowres_mv[0][0] != 0x7fff )
 415         {
 416             M32( mvc[i] ) = (M32( lowres_mv[h->mb.i_mb_xy] )*2)&0xfffeffff;
 417             i++;
 418         }
 419     }
 420
 421     /* spatial predictors */
 422     if( h->mb.i_neighbour_frame & MB_LEFT )
 423     {
 424         SET_MVP( mvr[h->mb.i_mb_left_xy] );
 425     }
 426     if( h->mb.i_neighbour_frame & MB_TOP )
 427     {
 428         SET_MVP( mvr[h->mb.i_mb_top_xy] );
 429
 430         if( h->mb.i_neighbour_frame & MB_TOPLEFT )
 431             SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
 432         if( h->mb.i_neighbour_frame & MB_TOPRIGHT )
 433             SET_MVP( mvr[h->mb.i_mb_topright_xy] );
 434     }
 435 #undef SET_MVP
 436
 437     /* temporal predictors */
 438     if( h->fref0[0]->i_ref[0] > 0 )
 439     {
 440         x264_frame_t *l0 = h->fref0[0];
 441         x264_frame_t **fref = i_list ? h->fref1 : h->fref0;
 442         int field = h->mb.i_mb_y&1;
 443         int curpoc = h->fdec->i_poc + field*h->sh.i_delta_poc_bottom;
 444         int refpoc = fref[i_ref>>h->sh.b_mbaff]->i_poc;
 445         if( h->sh.b_mbaff && field^(i_ref&1) )
 446             refpoc += h->sh.i_delta_poc_bottom;
 447
 448 #define SET_TMVP( dx, dy ) \
 449         { \
 450             int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
 451             int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field]; \
 452             mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8; \
 453             mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8; \
 454             i++; \
 455         }
 456
 457         SET_TMVP(0,0);
 458         if( h->mb.i_mb_x < h->sps->i_mb_width-1 )
 459             SET_TMVP(1,0);
 460         if( h->mb.i_mb_y < h->sps->i_mb_height-1 )
 461             SET_TMVP(0,1);
 462 #undef SET_TMVP
 463     }
 464
 465     *i_mvc = i;
 466 }