1 /*****************************************************************************
2 * mvpred.c: motion vector prediction
3 *****************************************************************************
4 * Copyright (C) 2003-2011 x264 project
6 * Authors: Loren Merritt <lorenm@u.washington.edu>
7 * Fiona Glaser <fiona@x264.com>
8 * Laurent Aimar <fenrir@via.ecp.fr>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
24 * This program is also available under a commercial proprietary license.
25 * For more information, contact us at licensing@x264.com.
26 *****************************************************************************/
30 void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] )
32 const int i8 = x264_scan8[idx];
33 const int i_ref= h->mb.cache.ref[i_list][i8];
34 int i_refa = h->mb.cache.ref[i_list][i8 - 1];
35 int16_t *mv_a = h->mb.cache.mv[i_list][i8 - 1];
36 int i_refb = h->mb.cache.ref[i_list][i8 - 8];
37 int16_t *mv_b = h->mb.cache.mv[i_list][i8 - 8];
38 int i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width];
39 int16_t *mv_c = h->mb.cache.mv[i_list][i8 - 8 + i_width];
41 // Partitions not yet reached in scan order are unavailable.
42 if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
44 i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
45 mv_c = h->mb.cache.mv[i_list][i8 - 8 - 1];
48 && h->mb.cache.ref[i_list][x264_scan8[0]-1] != -2
49 && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_left_xy[0]] )
53 mv_c = h->mb.cache.topright_mv[i_list][0];
54 i_refc = h->mb.cache.topright_ref[i_list][0];
58 mv_c = h->mb.cache.topright_mv[i_list][1];
59 i_refc = h->mb.cache.topright_ref[i_list][1];
63 mv_c = h->mb.cache.topright_mv[i_list][2];
64 i_refc = h->mb.cache.topright_ref[i_list][2];
68 if( h->mb.i_partition == D_16x8 )
87 else if( h->mb.i_partition == D_8x16 )
107 int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
112 x264_median_mv( mvp, mv_a, mv_b, mv_c );
114 else if( i_count == 1 )
116 if( i_refa == i_ref )
118 else if( i_refb == i_ref )
123 else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
129 void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
131 int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
132 int16_t *mv_a = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
133 int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
134 int16_t *mv_b = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
135 int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
136 int16_t *mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
139 i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
140 mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
143 int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
148 x264_median_mv( mvp, mv_a, mv_b, mv_c );
150 else if( i_count == 1 )
152 if( i_refa == i_ref )
154 else if( i_refb == i_ref )
159 else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
166 void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
168 int i_refa = h->mb.cache.ref[0][X264_SCAN8_0 - 1];
169 int i_refb = h->mb.cache.ref[0][X264_SCAN8_0 - 8];
170 int16_t *mv_a = h->mb.cache.mv[0][X264_SCAN8_0 - 1];
171 int16_t *mv_b = h->mb.cache.mv[0][X264_SCAN8_0 - 8];
173 if( i_refa == -2 || i_refb == -2 ||
174 !( i_refa | M32( mv_a ) ) ||
175 !( i_refb | M32( mv_b ) ) )
180 x264_mb_predict_mv_16x16( h, 0, 0, mv );
183 static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
185 int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
186 int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
187 const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
188 const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
190 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
192 h->mb.i_partition = partition_col;
194 if( IS_INTRA( type_col ) )
196 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
197 x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0 );
198 x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
202 /* Don't do any checks other than the ones we have to, based
203 * on the size of the colocated partitions.
204 * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
205 int max_i8 = (D_16x16 - partition_col) + 1;
206 int step = (partition_col == D_16x8) + 1;
207 int width = 4 >> ((D_16x16 - partition_col)&1);
208 int height = 4 >> ((D_16x16 - partition_col)>>1);
210 for( int i8 = 0; i8 < max_i8; i8 += step )
214 int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
215 int i_ref1_ref = h->fref[1][0]->ref[0][i_part_8x8];
216 int i_ref = (map_col_to_list0(i_ref1_ref>>h->sh.b_mbaff) << h->sh.b_mbaff) + (i_ref1_ref&h->sh.b_mbaff);
220 int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
221 int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
222 int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
223 int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
224 if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
226 x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, i_ref );
227 x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, pack16to32_mask(l0x, l0y) );
228 x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
232 /* the collocated ref isn't in the current list0 */
233 /* FIXME: we might still be able to use direct_8x8 on some partitions */
234 /* FIXME: with B-pyramid + extensive ref list reordering
235 * (not currently used), we would also have to check
236 * l1mv1 like in spatial mode */
244 static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
247 ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
248 const int8_t *l1ref0 = &h->fref[1][0]->ref[0][h->mb.i_b8_xy];
249 const int8_t *l1ref1 = &h->fref[1][0]->ref[1][h->mb.i_b8_xy];
250 const int16_t (*l1mv[2])[2] = { (const int16_t (*)[2]) &h->fref[1][0]->mv[0][h->mb.i_b4_xy],
251 (const int16_t (*)[2]) &h->fref[1][0]->mv[1][h->mb.i_b4_xy] };
252 const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
253 const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
255 h->mb.i_partition = partition_col;
257 for( int i_list = 0; i_list < 2; i_list++ )
259 int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
260 int16_t *mv_a = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
261 int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
262 int16_t *mv_b = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
263 int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
264 int16_t *mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
267 i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
268 mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
271 int i_ref = X264_MIN3( (unsigned)i_refa, (unsigned)i_refb, (unsigned)i_refc );
275 M32( mv[i_list] ) = 0;
279 /* Same as x264_mb_predict_mv_16x16, but simplified to eliminate cases
280 * not relevant to spatial direct. */
281 int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
284 x264_median_mv( mv[i_list], mv_a, mv_b, mv_c );
287 if( i_refa == i_ref )
288 CP32( mv[i_list], mv_a );
289 else if( i_refb == i_ref )
290 CP32( mv[i_list], mv_b );
292 CP32( mv[i_list], mv_c );
296 x264_macroblock_cache_ref( h, 0, 0, 4, 4, i_list, i_ref );
297 x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, i_list, mv[i_list] );
301 if( (M16( ref ) & 0x8080) == 0x8080 ) /* if( ref[0] < 0 && ref[1] < 0 ) */
303 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
304 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
308 if( h->param.i_threads > 1
309 && ( mv[0][1] > h->mb.mv_max_spel[1]
310 || mv[1][1] > h->mb.mv_max_spel[1] ) )
313 fprintf(stderr, "direct_spatial: (%d,%d) (%d,%d) > %d \n",
314 mv[0][0], mv[0][1], mv[1][0], mv[1][1],
315 h->mb.mv_max_spel[1]);
320 if( !M64( mv ) || IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
323 /* Don't do any checks other than the ones we have to, based
324 * on the size of the colocated partitions.
325 * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
326 int max_i8 = (D_16x16 - partition_col) + 1;
327 int step = (partition_col == D_16x8) + 1;
328 int width = 4 >> ((D_16x16 - partition_col)&1);
329 int height = 4 >> ((D_16x16 - partition_col)>>1);
332 for( int i8 = 0; i8 < max_i8; i8 += step )
335 const int y8 = i8>>1;
336 const int o8 = x8 + y8 * h->mb.i_b8_stride;
337 const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
339 if( l1ref0[o8] == 0 )
341 else if( l1ref0[o8] < 0 && l1ref1[o8] == 0 )
346 if( abs( l1mv[idx][o4][0] ) <= 1 && abs( l1mv[idx][o4][1] ) <= 1 )
348 if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
349 if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, 0 );
356 int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
359 if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
361 else if( h->sh.b_direct_spatial_mv_pred )
362 b_available = x264_mb_predict_mv_direct16x16_spatial( h );
364 b_available = x264_mb_predict_mv_direct16x16_temporal( h );
366 if( b_changed != NULL && b_available )
370 changed = M32( h->mb.cache.direct_mv[0][0] ) ^ M32( h->mb.cache.mv[0][x264_scan8[0]] );
371 changed |= M32( h->mb.cache.direct_mv[1][0] ) ^ M32( h->mb.cache.mv[1][x264_scan8[0]] );
372 changed |= h->mb.cache.direct_ref[0][0] ^ h->mb.cache.ref[0][x264_scan8[0]];
373 changed |= h->mb.cache.direct_ref[1][0] ^ h->mb.cache.ref[1][x264_scan8[0]];
374 if( !changed && h->mb.i_partition != D_16x16 )
376 changed |= M32( h->mb.cache.direct_mv[0][3] ) ^ M32( h->mb.cache.mv[0][x264_scan8[12]] );
377 changed |= M32( h->mb.cache.direct_mv[1][3] ) ^ M32( h->mb.cache.mv[1][x264_scan8[12]] );
378 changed |= h->mb.cache.direct_ref[0][3] ^ h->mb.cache.ref[0][x264_scan8[12]];
379 changed |= h->mb.cache.direct_ref[1][3] ^ h->mb.cache.ref[1][x264_scan8[12]];
381 if( !changed && h->mb.i_partition == D_8x8 )
383 changed |= M32( h->mb.cache.direct_mv[0][1] ) ^ M32( h->mb.cache.mv[0][x264_scan8[4]] );
384 changed |= M32( h->mb.cache.direct_mv[1][1] ) ^ M32( h->mb.cache.mv[1][x264_scan8[4]] );
385 changed |= M32( h->mb.cache.direct_mv[0][2] ) ^ M32( h->mb.cache.mv[0][x264_scan8[8]] );
386 changed |= M32( h->mb.cache.direct_mv[1][2] ) ^ M32( h->mb.cache.mv[1][x264_scan8[8]] );
387 changed |= h->mb.cache.direct_ref[0][1] ^ h->mb.cache.ref[0][x264_scan8[4]];
388 changed |= h->mb.cache.direct_ref[1][1] ^ h->mb.cache.ref[1][x264_scan8[4]];
389 changed |= h->mb.cache.direct_ref[0][2] ^ h->mb.cache.ref[0][x264_scan8[8]];
390 changed |= h->mb.cache.direct_ref[1][2] ^ h->mb.cache.ref[1][x264_scan8[8]];
392 *b_changed = changed;
399 for( int l = 0; l < 2; l++ )
401 CP32( h->mb.cache.direct_mv[l][0], h->mb.cache.mv[l][x264_scan8[ 0]] );
402 CP32( h->mb.cache.direct_mv[l][1], h->mb.cache.mv[l][x264_scan8[ 4]] );
403 CP32( h->mb.cache.direct_mv[l][2], h->mb.cache.mv[l][x264_scan8[ 8]] );
404 CP32( h->mb.cache.direct_mv[l][3], h->mb.cache.mv[l][x264_scan8[12]] );
405 h->mb.cache.direct_ref[l][0] = h->mb.cache.ref[l][x264_scan8[ 0]];
406 h->mb.cache.direct_ref[l][1] = h->mb.cache.ref[l][x264_scan8[ 4]];
407 h->mb.cache.direct_ref[l][2] = h->mb.cache.ref[l][x264_scan8[ 8]];
408 h->mb.cache.direct_ref[l][3] = h->mb.cache.ref[l][x264_scan8[12]];
409 h->mb.cache.direct_partition = h->mb.i_partition;
415 /* This just improves encoder performance, it's not part of the spec */
416 void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[9][2], int *i_mvc )
418 int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
421 #define SET_MVP(mvp) \
423 CP32( mvc[i], mvp ); \
428 if( h->sh.i_type == SLICE_TYPE_B
429 && h->mb.cache.ref[i_list][x264_scan8[12]] == i_ref )
431 SET_MVP( h->mb.cache.mv[i_list][x264_scan8[12]] );
434 if( i_ref == 0 && h->frames.b_have_lowres )
436 int idx = i_list ? h->fref[1][0]->i_frame-h->fenc->i_frame-1
437 : h->fenc->i_frame-h->fref[0][0]->i_frame-1;
438 if( idx <= h->param.i_bframe )
440 int16_t (*lowres_mv)[2] = h->fenc->lowres_mvs[i_list][idx];
441 if( lowres_mv[0][0] != 0x7fff )
443 M32( mvc[i] ) = (M32( lowres_mv[h->mb.i_mb_xy] )*2)&0xfffeffff;
449 /* spatial predictors */
450 SET_MVP( mvr[h->mb.i_mb_left_xy[0]] );
451 SET_MVP( mvr[h->mb.i_mb_top_xy] );
452 SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
453 SET_MVP( mvr[h->mb.i_mb_topright_xy] );
456 /* temporal predictors */
457 if( h->fref[0][0]->i_ref[0] > 0 )
459 x264_frame_t *l0 = h->fref[0][0];
460 int field = h->mb.i_mb_y&1;
461 int curpoc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
462 int refpoc = h->fref[i_list][i_ref>>h->sh.b_mbaff]->i_poc;
463 refpoc += l0->i_delta_poc[field^(i_ref&1)];
465 #define SET_TMVP( dx, dy ) \
467 int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
468 int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field]; \
469 mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8; \
470 mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8; \
475 if( h->mb.i_mb_x < h->mb.i_mb_width-1 )
477 if( h->mb.i_mb_y < h->mb.i_mb_height-1 )