1 /*****************************************************************************
2 * analyse.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 x264 project
5 * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8 * Loren Merritt <lorenm@u.washington.edu>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
30 #include "common/common.h"
31 #include "macroblock.h"
33 #include "ratecontrol.h"
45 int mvc[16][5][2]; /* [ref][0] is 16x16 mv,
46 [ref][1..4] are 8x8 mv from partition [0..3] */
50 int i_cost4x4[4]; /* cost per 8x8 partition */
51 x264_me_t me4x4[4][4];
54 int i_cost8x4[4]; /* cost per 8x8 partition */
55 x264_me_t me8x4[4][2];
58 int i_cost4x8[4]; /* cost per 8x8 partition */
59 x264_me_t me4x8[4][4];
69 } x264_mb_analysis_list_t;
73 /* conduct the analysis using this lamda and QP */
82 /* Take some shortcuts in intra search if intra is deemed unlikely */
92 int i_predict8x8[2][2];
95 int i_predict4x4[4][4];
99 int i_predict8x8chroma;
101 /* II: Inter part P/B frame */
102 x264_mb_analysis_list_t l0;
103 x264_mb_analysis_list_t l1;
105 int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
106 int i_cost16x16direct;
108 int i_cost8x8direct[4];
112 int i_mb_partition16x8[2]; /* mb_partition_e */
113 int i_mb_partition8x16[2];
114 int i_mb_type16x8; /* mb_class_e */
117 int b_direct_available;
119 } x264_mb_analysis_t;
121 /* lambda = pow(2,qp/6-2) */
122 static const int i_qp0_cost_table[52] = {
123 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
124 1, 1, 1, 1, /* 8-11 */
125 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
126 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
127 6, 7, 8, 9,10,11,13,14, /* 28-35 */
128 16,18,20,23,25,29,32,36, /* 36-43 */
129 40,45,51,57,64,72,81,91 /* 44-51 */
132 /* pow(lambda,2) * .9 */
133 static const int i_qp0_cost2_table[52] = {
134 1, 1, 1, 1, 1, 1, /* 0-5 */
135 1, 1, 1, 1, 1, 1, /* 6-11 */
136 1, 1, 1, 2, 2, 3, /* 12-17 */
137 4, 5, 6, 7, 9, 11, /* 18-23 */
138 14, 18, 23, 29, 36, 46, /* 24-29 */
139 58, 73, 91, 115, 145, 183, /* 30-35 */
140 230, 290, 366, 461, 581, 731, /* 36-41 */
141 922,1161,1463,1843,2322,2926, /* 42-47 */
145 /* TODO: calculate CABAC costs */
146 static const int i_mb_b_cost_table[19] = {
147 9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
149 static const int i_mb_b16x8_cost_table[17] = {
150 0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
152 static const int i_sub_mb_b_cost_table[13] = {
153 7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
155 static const int i_sub_mb_p_cost_table[4] = {
159 static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
161 /* initialize an array of lambda*nbits for all possible mvs */
162 static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
164 static int16_t *p_cost_mv[52];
166 if( !p_cost_mv[a->i_qp] )
168 /* could be faster, but isn't called many times */
169 /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
171 p_cost_mv[a->i_qp] = x264_malloc( (4*4*2048 + 1) * sizeof(int16_t) );
172 p_cost_mv[a->i_qp] += 2*4*2048;
173 for( i = 0; i <= 2*4*2048; i++ )
175 p_cost_mv[a->i_qp][-i] =
176 p_cost_mv[a->i_qp][i] = a->i_lambda * bs_size_se( i );
180 a->p_cost_mv = p_cost_mv[a->i_qp];
183 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
185 memset( a, 0, sizeof( x264_mb_analysis_t ) );
187 /* conduct the analysis using this lamda and QP */
188 a->i_qp = h->mb.i_qp = i_qp;
189 a->i_lambda = i_qp0_cost_table[i_qp];
190 a->i_lambda2 = i_qp0_cost2_table[i_qp];
191 a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 &&
192 ( h->sh.i_type != SLICE_TYPE_B || h->param.analyse.b_bframe_rdo );
194 h->mb.i_me_method = h->param.analyse.i_me_method;
195 h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
196 h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
197 && h->mb.i_subpel_refine >= 5;
198 h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->b_mbrd;
199 h->mb.b_transform_8x8 = 0;
200 h->mb.b_noise_reduction = 0;
206 a->i_sad_i8x8chroma = COST_MAX;
209 a->i_best_satd = COST_MAX;
211 /* II: Inter part P/B frame */
212 if( h->sh.i_type != SLICE_TYPE_I )
215 int i_fmv_range = h->param.analyse.i_mv_range - 16;
217 /* Calculate max allowed MV range */
218 #define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range )
219 h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
220 h->mb.mv_max[0] = 4*( 16*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
221 h->mb.mv_min_fpel[0] = CLIP_FMV( -16*h->mb.i_mb_x - 8 );
222 h->mb.mv_max_fpel[0] = CLIP_FMV( 16*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 8 );
223 h->mb.mv_min_spel[0] = 4*( h->mb.mv_min_fpel[0] - 16 );
224 h->mb.mv_max_spel[0] = 4*( h->mb.mv_max_fpel[0] + 16 );
225 if( h->mb.i_mb_x == 0)
227 h->mb.mv_min[1] = 4*( -16*h->mb.i_mb_y - 24 );
228 h->mb.mv_max[1] = 4*( 16*( h->sps->i_mb_height - h->mb.i_mb_y - 1 ) + 24 );
229 h->mb.mv_min_fpel[1] = CLIP_FMV( -16*h->mb.i_mb_y - 8 );
230 h->mb.mv_max_fpel[1] = CLIP_FMV( 16*( h->sps->i_mb_height - h->mb.i_mb_y - 1 ) + 8 );
231 h->mb.mv_min_spel[1] = 4*( h->mb.mv_min_fpel[1] - 16 );
232 h->mb.mv_max_spel[1] = 4*( h->mb.mv_max_fpel[1] + 16 );
237 a->l0.i_cost8x8 = COST_MAX;
239 for( i = 0; i < 4; i++ )
243 a->l0.i_cost4x8[i] = COST_MAX;
247 a->l0.i_cost8x16 = COST_MAX;
248 if( h->sh.i_type == SLICE_TYPE_B )
251 a->l1.i_cost8x8 = COST_MAX;
253 for( i = 0; i < 4; i++ )
258 a->i_cost8x8direct[i] = COST_MAX;
265 a->i_cost16x16direct =
268 a->i_cost8x16bi = COST_MAX;
271 /* Fast intra decision */
272 if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
275 || IS_INTRA( h->mb.i_mb_type_left )
276 || IS_INTRA( h->mb.i_mb_type_top )
277 || IS_INTRA( h->mb.i_mb_type_topleft )
278 || IS_INTRA( h->mb.i_mb_type_topright )
279 || (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] ))
280 || (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) )
281 { /* intra is likely */ }
296 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
298 if( i_neighbour & MB_TOPLEFT )
300 /* top and left avaible */
301 *mode++ = I_PRED_16x16_V;
302 *mode++ = I_PRED_16x16_H;
303 *mode++ = I_PRED_16x16_DC;
304 *mode++ = I_PRED_16x16_P;
307 else if( i_neighbour & MB_LEFT )
310 *mode++ = I_PRED_16x16_DC_LEFT;
311 *mode++ = I_PRED_16x16_H;
314 else if( i_neighbour & MB_TOP )
317 *mode++ = I_PRED_16x16_DC_TOP;
318 *mode++ = I_PRED_16x16_V;
324 *mode = I_PRED_16x16_DC_128;
330 static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
332 if( i_neighbour & MB_TOPLEFT )
334 /* top and left avaible */
335 *mode++ = I_PRED_CHROMA_V;
336 *mode++ = I_PRED_CHROMA_H;
337 *mode++ = I_PRED_CHROMA_DC;
338 *mode++ = I_PRED_CHROMA_P;
341 else if( i_neighbour & MB_LEFT )
344 *mode++ = I_PRED_CHROMA_DC_LEFT;
345 *mode++ = I_PRED_CHROMA_H;
348 else if( i_neighbour & MB_TOP )
351 *mode++ = I_PRED_CHROMA_DC_TOP;
352 *mode++ = I_PRED_CHROMA_V;
358 *mode = I_PRED_CHROMA_DC_128;
364 static void predict_4x4_mode_available( unsigned int i_neighbour,
365 int *mode, int *pi_count )
367 int b_l = i_neighbour & MB_LEFT;
368 int b_t = i_neighbour & MB_TOP;
373 *mode++ = I_PRED_4x4_DC;
374 *mode++ = I_PRED_4x4_H;
375 *mode++ = I_PRED_4x4_V;
376 *mode++ = I_PRED_4x4_DDL;
377 if( i_neighbour & MB_TOPLEFT )
379 *mode++ = I_PRED_4x4_DDR;
380 *mode++ = I_PRED_4x4_VR;
381 *mode++ = I_PRED_4x4_HD;
384 *mode++ = I_PRED_4x4_VL;
385 *mode++ = I_PRED_4x4_HU;
389 *mode++ = I_PRED_4x4_DC_LEFT;
390 *mode++ = I_PRED_4x4_H;
391 *mode++ = I_PRED_4x4_HU;
396 *mode++ = I_PRED_4x4_DC_TOP;
397 *mode++ = I_PRED_4x4_V;
398 *mode++ = I_PRED_4x4_DDL;
399 *mode++ = I_PRED_4x4_VL;
404 *mode++ = I_PRED_4x4_DC_128;
409 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
416 uint8_t *p_dstc[2], *p_srcc[2];
418 if( a->i_sad_i8x8chroma < COST_MAX )
421 /* 8x8 prediction selection for chroma */
422 p_dstc[0] = h->mb.pic.p_fdec[1];
423 p_dstc[1] = h->mb.pic.p_fdec[2];
424 p_srcc[0] = h->mb.pic.p_fenc[1];
425 p_srcc[1] = h->mb.pic.p_fenc[2];
427 predict_8x8chroma_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
428 a->i_sad_i8x8chroma = COST_MAX;
429 for( i = 0; i < i_max; i++ )
434 i_mode = predict_mode[i];
436 /* we do the prediction */
437 h->predict_8x8c[i_mode]( p_dstc[0] );
438 h->predict_8x8c[i_mode]( p_dstc[1] );
440 /* we calculate the cost */
441 i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE,
442 p_srcc[0], FENC_STRIDE ) +
443 h->pixf.mbcmp[PIXEL_8x8]( p_dstc[1], FDEC_STRIDE,
444 p_srcc[1], FENC_STRIDE ) +
445 a->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
447 /* if i_score is lower it is better */
448 if( a->i_sad_i8x8chroma > i_sad )
450 a->i_predict8x8chroma = i_mode;
451 a->i_sad_i8x8chroma = i_sad;
455 h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
458 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_inter )
460 const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
461 uint8_t *p_src = h->mb.pic.p_fenc[0];
462 uint8_t *p_dst = h->mb.pic.p_fdec[0];
463 int f8_satd_rd_ratio = 0;
470 if( h->sh.i_type == SLICE_TYPE_B )
471 i_satd_thresh = a->i_best_satd * 9/8;
473 i_satd_thresh = a->i_best_satd * 5/4 + a->i_lambda * 10;
475 /*---------------- Try all mode and calculate their score ---------------*/
477 /* 16x16 prediction selection */
478 predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
479 for( i = 0; i < i_max; i++ )
484 i_mode = predict_mode[i];
485 h->predict_16x16[i_mode]( p_dst );
487 i_sad = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
488 a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
489 if( a->i_sad_i16x16 > i_sad )
491 a->i_predict16x16 = i_mode;
492 a->i_sad_i16x16 = i_sad;
498 f8_satd_rd_ratio = ((unsigned)i_cost_inter << 8) / a->i_best_satd + 1;
499 x264_mb_analyse_intra_chroma( h, a );
500 if( h->mb.b_chroma_me )
501 a->i_sad_i16x16 += a->i_sad_i8x8chroma;
502 if( a->i_sad_i16x16 < i_satd_thresh )
504 h->mb.i_type = I_16x16;
505 h->mb.i_intra16x16_pred_mode = a->i_predict16x16;
506 a->i_sad_i16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
509 a->i_sad_i16x16 = a->i_sad_i16x16 * f8_satd_rd_ratio >> 8;
513 if( h->sh.i_type == SLICE_TYPE_B )
514 /* cavlc mb type prefix */
515 a->i_sad_i16x16 += a->i_lambda * i_mb_b_cost_table[I_16x16];
516 if( a->b_fast_intra && a->i_sad_i16x16 > 2*i_cost_inter )
520 /* 4x4 prediction selection */
521 if( flags & X264_ANALYSE_I4x4 )
524 for( idx = 0; idx < 16; idx++ )
532 i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
533 x = block_idx_x[idx];
534 y = block_idx_y[idx];
536 p_src_by = p_src + 4 * x + 4 * y * FENC_STRIDE;
537 p_dst_by = p_dst + 4 * x + 4 * y * FDEC_STRIDE;
540 predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );
542 if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
543 /* emulate missing topright samples */
544 *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
546 for( i = 0; i < i_max; i++ )
551 i_mode = predict_mode[i];
552 h->predict_4x4[i_mode]( p_dst_by );
554 i_sad = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE,
555 p_src_by, FENC_STRIDE )
556 + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
560 a->i_predict4x4[x][y] = i_mode;
564 a->i_sad_i4x4 += i_best;
566 /* we need to encode this block now (for next ones) */
567 h->predict_4x4[a->i_predict4x4[x][y]]( p_dst_by );
568 x264_mb_encode_i4x4( h, idx, a->i_qp );
570 h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y];
573 a->i_sad_i4x4 += a->i_lambda * 24; /* from JVT (SATD0) */
576 if( h->mb.b_chroma_me )
577 a->i_sad_i4x4 += a->i_sad_i8x8chroma;
578 if( a->i_sad_i4x4 < i_satd_thresh )
580 h->mb.i_type = I_4x4;
581 a->i_sad_i4x4 = x264_rd_cost_mb( h, a->i_lambda2 );
584 a->i_sad_i4x4 = a->i_sad_i4x4 * f8_satd_rd_ratio >> 8;
588 if( h->sh.i_type == SLICE_TYPE_B )
589 a->i_sad_i4x4 += a->i_lambda * i_mb_b_cost_table[I_4x4];
593 /* 8x8 prediction selection */
594 if( flags & X264_ANALYSE_I8x8 )
597 for( idx = 0; idx < 4; idx++ )
605 i_pred_mode= x264_mb_predict_intra4x4_mode( h, 4*idx );
609 p_src_by = p_src + 8 * x + 8 * y * FENC_STRIDE;
610 p_dst_by = p_dst + 8 * x + 8 * y * FDEC_STRIDE;
613 predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max );
614 for( i = 0; i < i_max; i++ )
619 i_mode = predict_mode[i];
620 h->predict_8x8[i_mode]( p_dst_by, h->mb.i_neighbour8[idx] );
622 /* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */
623 i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dst_by, FDEC_STRIDE,
624 p_src_by, FENC_STRIDE )
625 + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
629 a->i_predict8x8[x][y] = i_mode;
633 a->i_sad_i8x8 += i_best;
635 /* we need to encode this block now (for next ones) */
636 h->predict_8x8[a->i_predict8x8[x][y]]( p_dst_by, h->mb.i_neighbour8[idx] );
637 x264_mb_encode_i8x8( h, idx, a->i_qp );
639 x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] );
644 if( h->mb.b_chroma_me )
645 a->i_sad_i8x8 += a->i_sad_i8x8chroma;
646 if( a->i_sad_i8x8 < i_satd_thresh )
648 h->mb.i_type = I_8x8;
649 a->i_sad_i8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
652 a->i_sad_i8x8 = a->i_sad_i8x8 * f8_satd_rd_ratio >> 8;
656 // FIXME some bias like in i4x4?
657 if( h->sh.i_type == SLICE_TYPE_B )
658 a->i_sad_i8x8 += a->i_lambda * i_mb_b_cost_table[I_8x8];
663 static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
665 uint8_t *p_src = h->mb.pic.p_fenc[0];
666 uint8_t *p_dst = h->mb.pic.p_fdec[0];
669 int i_max, i_sad, i_best, i_mode;
673 if( h->mb.i_type == I_16x16 )
675 int old_pred_mode = a->i_predict16x16;
676 i_best = a->i_sad_i16x16;
677 predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
678 for( i = 0; i < i_max; i++ )
680 if( predict_mode[i] == old_pred_mode )
682 h->mb.i_intra16x16_pred_mode = predict_mode[i];
683 i_sad = x264_rd_cost_mb( h, a->i_lambda2 );
686 a->i_predict16x16 = predict_mode[i];
691 else if( h->mb.i_type == I_4x4 )
693 for( idx = 0; idx < 16; idx++ )
701 i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
702 x = block_idx_x[idx];
703 y = block_idx_y[idx];
705 p_src_by = p_src + 4*x + 4*y*FENC_STRIDE;
706 p_dst_by = p_dst + 4*x + 4*y*FDEC_STRIDE;
707 predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );
709 if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
710 /* emulate missing topright samples */
711 *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
713 for( i = 0; i < i_max; i++ )
715 i_mode = predict_mode[i];
716 h->predict_4x4[i_mode]( p_dst_by );
718 i_sad = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode );
722 a->i_predict4x4[x][y] = i_mode;
724 pels[0] = *(uint32_t*)(p_dst_by+0*FDEC_STRIDE);
725 pels[1] = *(uint32_t*)(p_dst_by+1*FDEC_STRIDE);
726 pels[2] = *(uint32_t*)(p_dst_by+2*FDEC_STRIDE);
727 pels[3] = *(uint32_t*)(p_dst_by+3*FDEC_STRIDE);
728 i_nnz = h->mb.cache.non_zero_count[x264_scan8[idx]];
732 *(uint32_t*)(p_dst_by+0*FDEC_STRIDE) = pels[0];
733 *(uint32_t*)(p_dst_by+1*FDEC_STRIDE) = pels[1];
734 *(uint32_t*)(p_dst_by+2*FDEC_STRIDE) = pels[2];
735 *(uint32_t*)(p_dst_by+3*FDEC_STRIDE) = pels[3];
736 h->mb.cache.non_zero_count[x264_scan8[idx]] = i_nnz;
738 h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y];
741 else if( h->mb.i_type == I_8x8 )
743 for( idx = 0; idx < 4; idx++ )
753 i_pred_mode= x264_mb_predict_intra4x4_mode( h, 4*idx );
757 p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
758 p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
759 predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max );
760 for( i = 0; i < i_max; i++ )
762 i_mode = predict_mode[i];
763 h->predict_8x8[i_mode]( p_dst_by, h->mb.i_neighbour8[idx] );
765 i_sad = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode );
769 a->i_predict8x8[x][y] = i_mode;
772 pels_h = *(uint64_t*)(p_dst_by+7*FDEC_STRIDE);
775 pels_v[j] = p_dst_by[7+j*FDEC_STRIDE];
777 i_nnz[j] = h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]];
781 *(uint64_t*)(p_dst_by+7*FDEC_STRIDE) = pels_h;
784 p_dst_by[7+j*FDEC_STRIDE] = pels_v[j];
786 h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]] = i_nnz[j];
788 x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] );
793 #define LOAD_FENC( m, src, xoff, yoff) \
794 (m)->i_stride[0] = h->mb.pic.i_stride[0]; \
795 (m)->i_stride[1] = h->mb.pic.i_stride[1]; \
796 (m)->p_fenc[0] = &(src)[0][(xoff)+(yoff)*FENC_STRIDE]; \
797 (m)->p_fenc[1] = &(src)[1][((xoff)>>1)+((yoff)>>1)*FENC_STRIDE]; \
798 (m)->p_fenc[2] = &(src)[2][((xoff)>>1)+((yoff)>>1)*FENC_STRIDE];
800 #define LOAD_HPELS(m, src, list, ref, xoff, yoff) \
801 (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
802 (m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \
803 (m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \
804 (m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \
805 (m)->p_fref[4] = &(src)[4][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
806 (m)->p_fref[5] = &(src)[5][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
807 (m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]];
809 #define REF_COST(list, ref) \
810 (a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l##list##_active - 1, ref ))
812 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
816 int mvc[7][2], i_mvc;
817 int i_halfpel_thresh = INT_MAX;
818 int *p_halfpel_thresh = h->i_ref0>1 ? &i_halfpel_thresh : NULL;
820 /* 16x16 Search on all ref frame */
821 m.i_pixel = PIXEL_16x16;
822 m.p_cost_mv = a->p_cost_mv;
823 LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
825 a->l0.me16x16.cost = INT_MAX;
826 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
828 const int i_ref_cost = REF_COST( 0, i_ref );
829 i_halfpel_thresh -= i_ref_cost;
830 m.i_ref_cost = i_ref_cost;
833 /* search with ref */
834 LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 0 );
835 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
836 x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
837 x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh );
840 * SSD threshold would probably be better than SATD */
841 if( i_ref == 0 && a->b_try_pskip && m.cost-m.cost_mv < 300*a->i_lambda )
844 x264_mb_predict_mv_pskip( h, mvskip );
845 if( abs(m.mv[0]-mvskip[0]) + abs(m.mv[1]-mvskip[1]) <= 1
846 && x264_macroblock_probe_pskip( h ) )
848 h->mb.i_type = P_SKIP;
849 x264_analyse_update_cache( h, a );
854 m.cost += i_ref_cost;
855 i_halfpel_thresh += i_ref_cost;
857 if( m.cost < a->l0.me16x16.cost )
860 /* save mv for predicting neighbors */
861 a->l0.mvc[i_ref][0][0] =
862 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
863 a->l0.mvc[i_ref][0][1] =
864 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
867 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
872 a->i_best_satd = a->l0.me16x16.cost;
873 h->mb.i_partition = D_16x16;
874 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
875 a->l0.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 );
879 static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
883 uint8_t **p_fenc = h->mb.pic.p_fenc;
884 int i_halfpel_thresh = INT_MAX;
885 int *p_halfpel_thresh = /*h->i_ref0>1 ? &i_halfpel_thresh : */NULL;
887 int i_maxref = h->i_ref0-1;
889 h->mb.i_partition = D_8x8;
891 /* early termination: if 16x16 chose ref 0, then evalute no refs older
892 * than those used by the neighbors */
893 if( i_maxref > 0 && a->l0.me16x16.i_ref == 0 &&
894 h->mb.i_mb_type_top && h->mb.i_mb_type_left )
897 i_maxref = X264_MAX( i_maxref, h->mb.cache.ref[0][ X264_SCAN8_0 - 8 - 1 ] );
898 i_maxref = X264_MAX( i_maxref, h->mb.cache.ref[0][ X264_SCAN8_0 - 8 + 0 ] );
899 i_maxref = X264_MAX( i_maxref, h->mb.cache.ref[0][ X264_SCAN8_0 - 8 + 2 ] );
900 i_maxref = X264_MAX( i_maxref, h->mb.cache.ref[0][ X264_SCAN8_0 - 8 + 4 ] );
901 i_maxref = X264_MAX( i_maxref, h->mb.cache.ref[0][ X264_SCAN8_0 + 0 - 1 ] );
902 i_maxref = X264_MAX( i_maxref, h->mb.cache.ref[0][ X264_SCAN8_0 + 2*8 - 1 ] );
905 for( i_ref = 0; i_ref <= i_maxref; i_ref++ )
907 a->l0.mvc[i_ref][0][0] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0];
908 a->l0.mvc[i_ref][0][1] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1];
911 for( i = 0; i < 4; i++ )
913 x264_me_t *l0m = &a->l0.me8x8[i];
917 m.i_pixel = PIXEL_8x8;
918 m.p_cost_mv = a->p_cost_mv;
920 LOAD_FENC( &m, p_fenc, 8*x8, 8*y8 );
922 for( i_ref = 0; i_ref <= i_maxref; i_ref++ )
924 const int i_ref_cost = REF_COST( 0, i_ref );
925 i_halfpel_thresh -= i_ref_cost;
926 m.i_ref_cost = i_ref_cost;
929 LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*x8, 8*y8 );
930 x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
931 x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );
932 x264_me_search_ref( h, &m, a->l0.mvc[i_ref], i+1, p_halfpel_thresh );
934 m.cost += i_ref_cost;
935 i_halfpel_thresh += i_ref_cost;
936 *(uint64_t*)a->l0.mvc[i_ref][i+1] = *(uint64_t*)m.mv;
938 if( m.cost < l0m->cost )
941 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, l0m->mv[0], l0m->mv[1] );
942 x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, l0m->i_ref );
945 l0m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
948 a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
949 a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
952 if( a->i_best_satd > a->l0.i_cost8x8 )
953 a->i_best_satd = a->l0.i_cost8x8;
954 h->mb.i_type = P_8x8;
955 h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
956 h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
957 a->l0.i_cost8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
961 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
963 const int i_ref = a->l0.me16x16.i_ref;
964 const int i_ref_cost = REF_COST( 0, i_ref );
965 uint8_t **p_fref = h->mb.pic.p_fref[0][i_ref];
966 uint8_t **p_fenc = h->mb.pic.p_fenc;
968 int (*mvc)[2] = a->l0.mvc[i_ref];
971 /* XXX Needed for x264_mb_predict_mv */
972 h->mb.i_partition = D_8x8;
975 *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.me16x16.mv;
977 for( i = 0; i < 4; i++ )
979 x264_me_t *m = &a->l0.me8x8[i];
983 m->i_pixel = PIXEL_8x8;
984 m->p_cost_mv = a->p_cost_mv;
985 m->i_ref_cost = i_ref_cost;
988 LOAD_FENC( m, p_fenc, 8*x8, 8*y8 );
989 LOAD_HPELS( m, p_fref, 0, i_ref, 8*x8, 8*y8 );
990 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
991 x264_me_search( h, m, mvc, i_mvc );
993 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
995 *(uint64_t*)mvc[i_mvc] = *(uint64_t*)m->mv;
999 m->cost += i_ref_cost;
1000 m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
1003 /* theoretically this should include 4*ref_cost,
1004 * but 3 seems a better approximation of cabac. */
1005 a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
1006 a->l0.me8x8[2].cost + a->l0.me8x8[3].cost -
1007 REF_COST( 0, a->l0.me16x16.i_ref );
1010 if( a->i_best_satd > a->l0.i_cost8x8 )
1011 a->i_best_satd = a->l0.i_cost8x8;
1012 h->mb.i_type = P_8x8;
1013 h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
1014 h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
1015 a->l0.i_cost8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
1019 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
1022 uint8_t **p_fenc = h->mb.pic.p_fenc;
1026 /* XXX Needed for x264_mb_predict_mv */
1027 h->mb.i_partition = D_16x8;
1029 for( i = 0; i < 2; i++ )
1031 x264_me_t *l0m = &a->l0.me16x8[i];
1032 const int ref8[2] = { a->l0.me8x8[2*i].i_ref, a->l0.me8x8[2*i+1].i_ref };
1033 const int i_ref8s = ( ref8[0] == ref8[1] ) ? 1 : 2;
1035 m.i_pixel = PIXEL_16x8;
1036 m.p_cost_mv = a->p_cost_mv;
1038 LOAD_FENC( &m, p_fenc, 0, 8*i );
1039 l0m->cost = INT_MAX;
1040 for( j = 0; j < i_ref8s; j++ )
1042 const int i_ref = ref8[j];
1043 const int i_ref_cost = REF_COST( 0, i_ref );
1044 m.i_ref_cost = i_ref_cost;
1047 /* if we skipped the 16x16 predictor, we wouldn't have to copy anything... */
1048 *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];
1049 *(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][2*i+1];
1050 *(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][2*i+2];
1052 LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 8*i );
1053 x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, i_ref );
1054 x264_mb_predict_mv( h, 0, 8*i, 4, m.mvp );
1055 x264_me_search( h, &m, mvc, 3 );
1057 m.cost += i_ref_cost;
1059 if( m.cost < l0m->cost )
1062 x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, l0m->mv[0], l0m->mv[1] );
1063 x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, l0m->i_ref );
1066 a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
1069 if( a->i_best_satd > a->l0.i_cost16x8 )
1070 a->i_best_satd = a->l0.i_cost16x8;
1071 h->mb.i_type = P_L0;
1072 a->l0.i_cost16x8 = x264_rd_cost_mb( h, a->i_lambda2 );
1076 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
1079 uint8_t **p_fenc = h->mb.pic.p_fenc;
1083 /* XXX Needed for x264_mb_predict_mv */
1084 h->mb.i_partition = D_8x16;
1086 for( i = 0; i < 2; i++ )
1088 x264_me_t *l0m = &a->l0.me8x16[i];
1089 const int ref8[2] = { a->l0.me8x8[i].i_ref, a->l0.me8x8[i+2].i_ref };
1090 const int i_ref8s = ( ref8[0] == ref8[1] ) ? 1 : 2;
1092 m.i_pixel = PIXEL_8x16;
1093 m.p_cost_mv = a->p_cost_mv;
1095 LOAD_FENC( &m, p_fenc, 8*i, 0 );
1096 l0m->cost = INT_MAX;
1097 for( j = 0; j < i_ref8s; j++ )
1099 const int i_ref = ref8[j];
1100 const int i_ref_cost = REF_COST( 0, i_ref );
1101 m.i_ref_cost = i_ref_cost;
1104 *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];
1105 *(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][i+1];
1106 *(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][i+3];
1108 LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*i, 0 );
1109 x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, i_ref );
1110 x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );
1111 x264_me_search( h, &m, mvc, 3 );
1113 m.cost += i_ref_cost;
1115 if( m.cost < l0m->cost )
1118 x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, l0m->mv[0], l0m->mv[1] );
1119 x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, l0m->i_ref );
1122 a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
1125 if( a->i_best_satd > a->l0.i_cost8x16 )
1126 a->i_best_satd = a->l0.i_cost8x16;
1127 h->mb.i_type = P_L0;
1128 a->l0.i_cost8x16 = x264_rd_cost_mb( h, a->i_lambda2 );
1132 static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
1134 DECLARE_ALIGNED( uint8_t, pix1[8*8], 8 );
1135 DECLARE_ALIGNED( uint8_t, pix2[8*8], 8 );
1136 const int i_stride = h->mb.pic.i_stride[1];
1137 const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
1138 const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
1140 #define CHROMA4x4MC( width, height, me, x, y ) \
1141 h->mc.mc_chroma( &p_fref[4][or+x+y*i_stride], i_stride, &pix1[x+y*8], 8, (me).mv[0], (me).mv[1], width, height ); \
1142 h->mc.mc_chroma( &p_fref[5][or+x+y*i_stride], i_stride, &pix2[x+y*8], 8, (me).mv[0], (me).mv[1], width, height );
1144 if( pixel == PIXEL_4x4 )
1146 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][0], 0,0 );
1147 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][1], 0,2 );
1148 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][2], 2,0 );
1149 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][3], 2,2 );
1151 else if( pixel == PIXEL_8x4 )
1153 CHROMA4x4MC( 4,2, a->l0.me8x4[i8x8][0], 0,0 );
1154 CHROMA4x4MC( 4,2, a->l0.me8x4[i8x8][1], 0,2 );
1158 CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][0], 0,0 );
1159 CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][1], 2,0 );
1162 return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 8 )
1163 + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 8 );
1166 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
1168 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
1169 uint8_t **p_fenc = h->mb.pic.p_fenc;
1170 const int i_ref = a->l0.me8x8[i8x8].i_ref;
1173 /* XXX Needed for x264_mb_predict_mv */
1174 h->mb.i_partition = D_8x8;
1176 for( i4x4 = 0; i4x4 < 4; i4x4++ )
1178 const int idx = 4*i8x8 + i4x4;
1179 const int x4 = block_idx_x[idx];
1180 const int y4 = block_idx_y[idx];
1181 const int i_mvc = (i4x4 == 0);
1183 x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
1185 m->i_pixel = PIXEL_4x4;
1186 m->p_cost_mv = a->p_cost_mv;
1188 LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
1189 LOAD_HPELS( m, p_fref, 0, i_ref, 4*x4, 4*y4 );
1191 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
1192 x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
1194 x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
1196 a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
1197 a->l0.me4x4[i8x8][1].cost +
1198 a->l0.me4x4[i8x8][2].cost +
1199 a->l0.me4x4[i8x8][3].cost +
1200 REF_COST( 0, i_ref ) +
1201 a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x4];
1202 if( h->mb.b_chroma_me )
1203 a->l0.i_cost4x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x4 );
1206 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
1208 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
1209 uint8_t **p_fenc = h->mb.pic.p_fenc;
1210 const int i_ref = a->l0.me8x8[i8x8].i_ref;
1213 /* XXX Needed for x264_mb_predict_mv */
1214 h->mb.i_partition = D_8x8;
1216 for( i8x4 = 0; i8x4 < 2; i8x4++ )
1218 const int idx = 4*i8x8 + 2*i8x4;
1219 const int x4 = block_idx_x[idx];
1220 const int y4 = block_idx_y[idx];
1221 const int i_mvc = (i8x4 == 0);
1223 x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
1225 m->i_pixel = PIXEL_8x4;
1226 m->p_cost_mv = a->p_cost_mv;
1228 LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
1229 LOAD_HPELS( m, p_fref, 0, i_ref, 4*x4, 4*y4 );
1231 x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
1232 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
1234 x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
1236 a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +
1237 REF_COST( 0, i_ref ) +
1238 a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x4];
1239 if( h->mb.b_chroma_me )
1240 a->l0.i_cost8x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_8x4 );
1243 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
1245 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
1246 uint8_t **p_fenc = h->mb.pic.p_fenc;
1247 const int i_ref = a->l0.me8x8[i8x8].i_ref;
1250 /* XXX Needed for x264_mb_predict_mv */
1251 h->mb.i_partition = D_8x8;
1253 for( i4x8 = 0; i4x8 < 2; i4x8++ )
1255 const int idx = 4*i8x8 + i4x8;
1256 const int x4 = block_idx_x[idx];
1257 const int y4 = block_idx_y[idx];
1258 const int i_mvc = (i4x8 == 0);
1260 x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
1262 m->i_pixel = PIXEL_4x8;
1263 m->p_cost_mv = a->p_cost_mv;
1265 LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
1266 LOAD_HPELS( m, p_fref, 0, i_ref, 4*x4, 4*y4 );
1268 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
1269 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
1271 x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
1273 a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +
1274 REF_COST( 0, i_ref ) +
1275 a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x8];
1276 if( h->mb.b_chroma_me )
1277 a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
1280 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
1282 /* Assumes that fdec still contains the results of
1283 * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
1285 uint8_t **p_fenc = h->mb.pic.p_fenc;
1286 uint8_t **p_fdec = h->mb.pic.p_fdec;
1289 a->i_cost16x16direct = 0;
1290 for( i = 0; i < 4; i++ )
1292 const int x = (i&1)*8;
1293 const int y = (i>>1)*8;
1294 a->i_cost16x16direct +=
1295 a->i_cost8x8direct[i] =
1296 h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[0][x+y*FENC_STRIDE], FENC_STRIDE, &p_fdec[0][x+y*FDEC_STRIDE], FDEC_STRIDE );
1299 a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
1301 a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
1305 if( a->i_cost16x16direct < a->i_best_satd )
1306 a->i_best_satd = a->i_cost16x16direct;
1308 h->mb.i_type = B_DIRECT;
1309 a->i_cost16x16direct = x264_rd_cost_mb( h, a->i_lambda2 );
1313 #define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \
1315 if( h->param.analyse.b_weighted_bipred ) \
1316 h->mc.avg_weight[size]( pix1, stride1, src2, stride2, \
1317 h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] ); \
1319 h->mc.avg[size]( pix1, stride1, src2, stride2 ); \
1322 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
1324 uint8_t pix1[16*16], pix2[16*16];
1331 int mvc[8][2], i_mvc;
1332 int i_halfpel_thresh = INT_MAX;
1333 int *p_halfpel_thresh = h->i_ref0>1 ? &i_halfpel_thresh : NULL;
1335 /* 16x16 Search on all ref frame */
1336 m.i_pixel = PIXEL_16x16;
1337 m.p_cost_mv = a->p_cost_mv;
1338 LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
1341 a->l0.me16x16.cost = INT_MAX;
1342 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
1344 /* search with ref */
1345 LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 0 );
1346 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
1347 x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
1348 x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh );
1351 m.cost += REF_COST( 0, i_ref );
1353 if( m.cost < a->l0.me16x16.cost )
1355 a->l0.i_ref = i_ref;
1359 /* save mv for predicting neighbors */
1360 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
1361 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
1363 /* subtract ref cost, so we don't have to add it for the other MB types */
1364 a->l0.me16x16.cost -= REF_COST( 0, a->l0.i_ref );
1367 i_halfpel_thresh = INT_MAX;
1368 p_halfpel_thresh = h->i_ref1>1 ? &i_halfpel_thresh : NULL;
1369 a->l1.me16x16.cost = INT_MAX;
1370 for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
1372 /* search with ref */
1373 LOAD_HPELS( &m, h->mb.pic.p_fref[1][i_ref], 1, i_ref, 0, 0 );
1374 x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
1375 x264_mb_predict_mv_ref16x16( h, 1, i_ref, mvc, &i_mvc );
1376 x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh );
1379 m.cost += REF_COST( 1, i_ref );
1381 if( m.cost < a->l1.me16x16.cost )
1383 a->l1.i_ref = i_ref;
1387 /* save mv for predicting neighbors */
1388 h->mb.mvr[1][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
1389 h->mb.mvr[1][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
1391 /* subtract ref cost, so we don't have to add it for the other MB types */
1392 a->l1.me16x16.cost -= REF_COST( 1, a->l1.i_ref );
1394 /* Set global ref, needed for other modes? */
1395 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
1396 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
1398 /* get cost of BI mode */
1399 weight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref];
1400 if ( ((a->l0.me16x16.mv[0] | a->l0.me16x16.mv[1]) & 1) == 0 )
1402 /* l0 reference is halfpel, so get_ref on it will make it faster */
1403 src2 = h->mc.get_ref( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
1405 a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
1407 h->mc.mc_luma( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
1409 a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
1411 weight = 64 - weight;
1415 /* if l0 was qpel, we'll use get_ref on l1 instead */
1416 h->mc.mc_luma( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
1418 a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
1420 src2 = h->mc.get_ref( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
1422 a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
1426 if( h->param.analyse.b_weighted_bipred )
1427 h->mc.avg_weight[PIXEL_16x16]( pix1, 16, src2, stride2, weight );
1429 h->mc.avg[PIXEL_16x16]( pix1, 16, src2, stride2 );
1431 a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix1, 16 )
1432 + REF_COST( 0, a->l0.i_ref )
1433 + REF_COST( 1, a->l1.i_ref )
1434 + a->l0.me16x16.cost_mv
1435 + a->l1.me16x16.cost_mv;
1438 a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
1439 a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
1440 a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
1446 if( a->l0.me16x16.cost < a->i_best_satd )
1447 a->i_best_satd = a->l0.me16x16.cost;
1448 if( a->l1.me16x16.cost < a->i_best_satd )
1449 a->i_best_satd = a->l1.me16x16.cost;
1450 if( a->i_cost16x16bi < a->i_best_satd )
1451 a->i_best_satd = a->i_cost16x16bi;
1453 i_satd_thresh = a->i_best_satd * 3/2;
1455 h->mb.i_partition = D_16x16;
1457 if( a->l0.me16x16.cost < i_satd_thresh )
1459 h->mb.i_type = B_L0_L0;
1460 x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
1461 a->l0.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 );
1464 a->l0.me16x16.cost = COST_MAX;
1467 if( a->l1.me16x16.cost < i_satd_thresh )
1469 h->mb.i_type = B_L1_L1;
1470 x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
1471 a->l1.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 );
1474 a->l1.me16x16.cost = COST_MAX;
1477 if( a->i_cost16x16bi < i_satd_thresh )
1479 h->mb.i_type = B_BI_BI;
1480 a->i_cost16x16bi = x264_rd_cost_mb( h, a->i_lambda2 );
1483 a->i_cost16x16bi = COST_MAX;
1487 static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i )
1489 const int x = 2*(i%2);
1490 const int y = 2*(i/2);
1492 switch( h->mb.i_sub_partition[i] )
1495 x264_macroblock_cache_mv( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv[0], a->l0.me8x8[i].mv[1] );
1498 x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv[0], a->l0.me8x4[i][0].mv[1] );
1499 x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv[0], a->l0.me8x4[i][1].mv[1] );
1502 x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv[0], a->l0.me4x8[i][0].mv[1] );
1503 x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv[0], a->l0.me4x8[i][1].mv[1] );
1506 x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv[0], a->l0.me4x4[i][0].mv[1] );
1507 x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv[0], a->l0.me4x4[i][1].mv[1] );
1508 x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv[0], a->l0.me4x4[i][2].mv[1] );
1509 x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv[0], a->l0.me4x4[i][3].mv[1] );
1512 x264_log( h, X264_LOG_ERROR, "internal error\n" );
1517 #define CACHE_MV_BI(x,y,dx,dy,me0,me1,part) \
1518 if( x264_mb_partition_listX_table[0][part] ) \
1520 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
1521 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
1525 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
1526 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0, 0 ); \
1528 x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
1530 if( x264_mb_partition_listX_table[1][part] ) \
1532 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
1533 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
1537 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
1538 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0, 0 ); \
1540 x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
1543 static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1547 if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
1549 x264_mb_load_mv_direct8x8( h, i );
1552 x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0, 0 );
1553 x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0, 0 );
1554 x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
1559 CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
1562 static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1564 CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
1566 static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1568 CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
1572 static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
1574 uint8_t **p_fref[2] =
1575 { h->mb.pic.p_fref[0][a->l0.i_ref],
1576 h->mb.pic.p_fref[1][a->l1.i_ref] };
1577 uint8_t pix[2][8*8];
1580 /* XXX Needed for x264_mb_predict_mv */
1581 h->mb.i_partition = D_8x8;
1585 for( i = 0; i < 4; i++ )
1590 int i_part_cost_bi = 0;
1592 for( l = 0; l < 2; l++ )
1594 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1595 x264_me_t *m = &lX->me8x8[i];
1597 m->i_pixel = PIXEL_8x8;
1598 m->p_cost_mv = a->p_cost_mv;
1600 LOAD_FENC( m, h->mb.pic.p_fenc, 8*x8, 8*y8 );
1601 LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*x8, 8*y8 );
1603 x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
1604 x264_me_search( h, m, &lX->me16x16.mv, 1 );
1606 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
1609 h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 8,
1610 m->mv[0], m->mv[1], 8, 8 );
1611 i_part_cost_bi += m->cost_mv;
1612 /* FIXME: ref cost */
1615 WEIGHTED_AVG( PIXEL_8x8, pix[0], 8, pix[1], 8 );
1616 i_part_cost_bi += h->pixf.mbcmp[PIXEL_8x8]( a->l0.me8x8[i].p_fenc[0], FENC_STRIDE, pix[0], 8 )
1617 + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
1618 a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
1619 a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
1621 i_part_cost = a->l0.me8x8[i].cost;
1622 h->mb.i_sub_partition[i] = D_L0_8x8;
1623 if( a->l1.me8x8[i].cost < i_part_cost )
1625 i_part_cost = a->l1.me8x8[i].cost;
1626 h->mb.i_sub_partition[i] = D_L1_8x8;
1628 if( i_part_cost_bi < i_part_cost )
1630 i_part_cost = i_part_cost_bi;
1631 h->mb.i_sub_partition[i] = D_BI_8x8;
1633 if( a->i_cost8x8direct[i] < i_part_cost )
1635 i_part_cost = a->i_cost8x8direct[i];
1636 h->mb.i_sub_partition[i] = D_DIRECT_8x8;
1638 a->i_cost8x8bi += i_part_cost;
1640 /* XXX Needed for x264_mb_predict_mv */
1641 x264_mb_cache_mv_b8x8( h, a, i, 0 );
1645 a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
1649 if( a->i_cost8x8bi < a->i_best_satd )
1650 a->i_best_satd = a->i_cost8x8bi;
1652 if( a->i_cost8x8bi < a->i_best_satd * 3/2 )
1654 h->mb.i_type = B_8x8;
1655 h->mb.i_partition = D_8x8;
1656 a->i_cost8x8bi = x264_rd_cost_mb( h, a->i_lambda2 );
1659 a->i_cost8x8bi = COST_MAX;
1663 static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
1665 uint8_t **p_fref[2] =
1666 { h->mb.pic.p_fref[0][a->l0.i_ref],
1667 h->mb.pic.p_fref[1][a->l1.i_ref] };
1668 DECLARE_ALIGNED( uint8_t, pix[2][16*8], 16 );
1672 h->mb.i_partition = D_16x8;
1673 a->i_cost16x8bi = 0;
1675 for( i = 0; i < 2; i++ )
1678 int i_part_cost_bi = 0;
1680 /* TODO: check only the list(s) that were used in b8x8? */
1681 for( l = 0; l < 2; l++ )
1683 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1684 x264_me_t *m = &lX->me16x8[i];
1686 m->i_pixel = PIXEL_16x8;
1687 m->p_cost_mv = a->p_cost_mv;
1689 LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
1690 LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i );
1692 mvc[0][0] = lX->me8x8[2*i].mv[0];
1693 mvc[0][1] = lX->me8x8[2*i].mv[1];
1694 mvc[1][0] = lX->me8x8[2*i+1].mv[0];
1695 mvc[1][1] = lX->me8x8[2*i+1].mv[1];
1697 x264_mb_predict_mv( h, 0, 8*i, 2, m->mvp );
1698 x264_me_search( h, m, mvc, 2 );
1701 h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 16,
1702 m->mv[0], m->mv[1], 16, 8 );
1703 /* FIXME: ref cost */
1704 i_part_cost_bi += m->cost_mv;
1707 WEIGHTED_AVG( PIXEL_16x8, pix[0], 16, pix[1], 16 );
1708 i_part_cost_bi += h->pixf.mbcmp[PIXEL_16x8]( a->l0.me16x8[i].p_fenc[0], FENC_STRIDE, pix[0], 16 );
1710 i_part_cost = a->l0.me16x8[i].cost;
1711 a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
1712 if( a->l1.me16x8[i].cost < i_part_cost )
1714 i_part_cost = a->l1.me16x8[i].cost;
1715 a->i_mb_partition16x8[i] = D_L1_8x8;
1717 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1719 i_part_cost = i_part_cost_bi;
1720 a->i_mb_partition16x8[i] = D_BI_8x8;
1722 a->i_cost16x8bi += i_part_cost;
1724 x264_mb_cache_mv_b16x8( h, a, i, 0 );
1728 a->i_mb_type16x8 = B_L0_L0
1729 + (a->i_mb_partition16x8[0]>>2) * 3
1730 + (a->i_mb_partition16x8[1]>>2);
1731 a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
1735 if( a->i_cost16x8bi < a->i_best_satd )
1736 a->i_best_satd = a->i_cost16x8bi;
1738 if( a->i_cost16x8bi < a->i_best_satd * 3/2 )
1740 h->mb.i_type = a->i_mb_type16x8;
1741 h->mb.i_partition = D_16x8;
1742 a->i_cost16x8bi = x264_rd_cost_mb( h, a->i_lambda2 );
1745 a->i_cost16x8bi = COST_MAX;
1748 static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
1750 uint8_t **p_fref[2] =
1751 { h->mb.pic.p_fref[0][a->l0.i_ref],
1752 h->mb.pic.p_fref[1][a->l1.i_ref] };
1753 uint8_t pix[2][8*16];
1757 h->mb.i_partition = D_8x16;
1758 a->i_cost8x16bi = 0;
1760 for( i = 0; i < 2; i++ )
1763 int i_part_cost_bi = 0;
1765 for( l = 0; l < 2; l++ )
1767 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1768 x264_me_t *m = &lX->me8x16[i];
1770 m->i_pixel = PIXEL_8x16;
1771 m->p_cost_mv = a->p_cost_mv;
1773 LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
1774 LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 );
1776 mvc[0][0] = lX->me8x8[i].mv[0];
1777 mvc[0][1] = lX->me8x8[i].mv[1];
1778 mvc[1][0] = lX->me8x8[i+2].mv[0];
1779 mvc[1][1] = lX->me8x8[i+2].mv[1];
1781 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
1782 x264_me_search( h, m, mvc, 2 );
1785 h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 8,
1786 m->mv[0], m->mv[1], 8, 16 );
1787 /* FIXME: ref cost */
1788 i_part_cost_bi += m->cost_mv;
1791 WEIGHTED_AVG( PIXEL_8x16, pix[0], 8, pix[1], 8 );
1792 i_part_cost_bi += h->pixf.mbcmp[PIXEL_8x16]( a->l0.me8x16[i].p_fenc[0], FENC_STRIDE, pix[0], 8 );
1794 i_part_cost = a->l0.me8x16[i].cost;
1795 a->i_mb_partition8x16[i] = D_L0_8x8;
1796 if( a->l1.me8x16[i].cost < i_part_cost )
1798 i_part_cost = a->l1.me8x16[i].cost;
1799 a->i_mb_partition8x16[i] = D_L1_8x8;
1801 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1803 i_part_cost = i_part_cost_bi;
1804 a->i_mb_partition8x16[i] = D_BI_8x8;
1806 a->i_cost8x16bi += i_part_cost;
1808 x264_mb_cache_mv_b8x16( h, a, i, 0 );
1812 a->i_mb_type8x16 = B_L0_L0
1813 + (a->i_mb_partition8x16[0]>>2) * 3
1814 + (a->i_mb_partition8x16[1]>>2);
1815 a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
1819 if( a->i_cost8x16bi < a->i_best_satd )
1820 a->i_best_satd = a->i_cost8x16bi;
1822 if( a->i_cost8x16bi < a->i_best_satd * 3/2 )
1824 h->mb.i_type = a->i_mb_type8x16;
1825 h->mb.i_partition = D_8x16;
1826 a->i_cost8x16bi = x264_rd_cost_mb( h, a->i_lambda2 );
1829 a->i_cost8x16bi = COST_MAX;
1833 static void refine_bidir( x264_t *h, x264_mb_analysis_t *a )
1835 const int i_biweight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref];
1838 switch( h->mb.i_partition )
1841 if( h->mb.i_type == B_BI_BI )
1842 x264_me_refine_bidir( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight );
1845 for( i=0; i<2; i++ )
1846 if( a->i_mb_partition16x8[i] == D_BI_8x8 )
1847 x264_me_refine_bidir( h, &a->l0.me16x8[i], &a->l1.me16x8[i], i_biweight );
1850 for( i=0; i<2; i++ )
1851 if( a->i_mb_partition8x16[i] == D_BI_8x8 )
1852 x264_me_refine_bidir( h, &a->l0.me8x16[i], &a->l1.me8x16[i], i_biweight );
1855 for( i=0; i<4; i++ )
1856 if( h->mb.i_sub_partition[i] == D_BI_8x8 )
1857 x264_me_refine_bidir( h, &a->l0.me8x8[i], &a->l1.me8x8[i], i_biweight );
1862 static inline void x264_mb_analyse_transform( x264_t *h )
1864 h->mb.cache.b_transform_8x8_allowed =
1865 h->param.analyse.b_transform_8x8
1866 && !IS_INTRA( h->mb.i_type ) && x264_mb_transform_8x8_allowed( h );
1868 if( h->mb.cache.b_transform_8x8_allowed )
1870 int i_cost4, i_cost8;
1871 /* FIXME only luma mc is needed */
1874 i_cost8 = h->pixf.sa8d[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
1875 h->mb.pic.p_fdec[0], FDEC_STRIDE );
1876 i_cost4 = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
1877 h->mb.pic.p_fdec[0], FDEC_STRIDE );
1879 h->mb.b_transform_8x8 = i_cost8 < i_cost4;
1883 static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *a, int *i_cost )
1885 h->mb.cache.b_transform_8x8_allowed =
1886 h->param.analyse.b_transform_8x8 && x264_mb_transform_8x8_allowed( h );
1888 if( h->mb.cache.b_transform_8x8_allowed )
1891 x264_analyse_update_cache( h, a );
1892 h->mb.b_transform_8x8 = !h->mb.b_transform_8x8;
1893 /* FIXME only luma is needed, but the score for comparison already includes chroma */
1894 i_cost8 = x264_rd_cost_mb( h, a->i_lambda2 );
1896 if( *i_cost >= i_cost8 )
1899 a->i_best_satd = (int64_t)a->i_best_satd * i_cost8 / *i_cost;
1900 /* prevent a rare division by zero in x264_mb_analyse_intra */
1901 if( a->i_best_satd == 0 )
1907 h->mb.b_transform_8x8 = !h->mb.b_transform_8x8;
1912 /*****************************************************************************
1913 * x264_macroblock_analyse:
1914 *****************************************************************************/
1915 void x264_macroblock_analyse( x264_t *h )
1917 x264_mb_analysis_t analysis;
1918 int i_cost = COST_MAX;
1922 x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) );
1924 /*--------------------------- Do the analysis ---------------------------*/
1925 if( h->sh.i_type == SLICE_TYPE_I )
1927 x264_mb_analyse_intra( h, &analysis, COST_MAX );
1929 i_cost = analysis.i_sad_i16x16;
1930 h->mb.i_type = I_16x16;
1931 if( analysis.i_sad_i4x4 < i_cost )
1933 i_cost = analysis.i_sad_i4x4;
1934 h->mb.i_type = I_4x4;
1936 if( analysis.i_sad_i8x8 < i_cost )
1937 h->mb.i_type = I_8x8;
1939 if( h->mb.i_subpel_refine >= 7 )
1940 x264_intra_rd_refine( h, &analysis );
1942 else if( h->sh.i_type == SLICE_TYPE_P )
1945 int i_intra_cost, i_intra_type;
1947 /* Fast P_SKIP detection */
1948 analysis.b_try_pskip = 0;
1949 if( h->param.analyse.b_fast_pskip )
1951 if( h->param.analyse.i_subpel_refine >= 3 )
1952 analysis.b_try_pskip = 1;
1953 else if( h->mb.i_mb_type_left == P_SKIP ||
1954 h->mb.i_mb_type_top == P_SKIP ||
1955 h->mb.i_mb_type_topleft == P_SKIP ||
1956 h->mb.i_mb_type_topright == P_SKIP )
1957 b_skip = x264_macroblock_probe_pskip( h );
1962 h->mb.i_type = P_SKIP;
1963 h->mb.i_partition = D_16x16;
1967 const unsigned int flags = h->param.analyse.inter;
1972 x264_mb_analyse_load_costs( h, &analysis );
1974 x264_mb_analyse_inter_p16x16( h, &analysis );
1976 if( h->mb.i_type == P_SKIP )
1979 if( flags & X264_ANALYSE_PSUB16x16 )
1981 if( h->param.analyse.b_mixed_references )
1982 x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis );
1984 x264_mb_analyse_inter_p8x8( h, &analysis );
1987 /* Select best inter mode */
1989 i_partition = D_16x16;
1990 i_cost = analysis.l0.me16x16.cost;
1992 if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1993 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
1996 i_partition = D_8x8;
1997 h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
1998 h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
2000 i_cost = analysis.l0.i_cost8x8;
2003 if( flags & X264_ANALYSE_PSUB8x8 )
2005 int i_cost_bak = i_cost;
2007 for( i = 0; i < 4; i++ )
2009 x264_mb_analyse_inter_p4x4( h, &analysis, i );
2010 if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
2012 int i_cost8x8 = analysis.l0.i_cost4x4[i];
2013 h->mb.i_sub_partition[i] = D_L0_4x4;
2015 x264_mb_analyse_inter_p8x4( h, &analysis, i );
2016 if( analysis.l0.i_cost8x4[i] < i_cost8x8 )
2018 h->mb.i_sub_partition[i] = D_L0_8x4;
2019 i_cost8x8 = analysis.l0.i_cost8x4[i];
2022 x264_mb_analyse_inter_p4x8( h, &analysis, i );
2023 if( analysis.l0.i_cost4x8[i] < i_cost8x8 )
2025 h->mb.i_sub_partition[i] = D_L0_4x8;
2026 i_cost8x8 = analysis.l0.i_cost4x8[i];
2029 i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
2032 x264_mb_cache_mv_p8x8( h, &analysis, i );
2034 /* TODO: RD per subpartition */
2035 if( b_sub8x8 && analysis.b_mbrd )
2037 i_cost = x264_rd_cost_mb( h, analysis.i_lambda2 );
2038 if( i_cost > i_cost_bak )
2040 i_cost = i_cost_bak;
2041 h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
2042 h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
2048 /* Now do 16x8/8x16 */
2049 i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv;
2050 if( analysis.b_mbrd )
2051 i_thresh16x8 = i_thresh16x8 * analysis.i_lambda2 / analysis.i_lambda;
2052 if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
2053 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 )
2055 x264_mb_analyse_inter_p16x8( h, &analysis );
2056 if( analysis.l0.i_cost16x8 < i_cost )
2059 i_partition = D_16x8;
2060 i_cost = analysis.l0.i_cost16x8;
2063 x264_mb_analyse_inter_p8x16( h, &analysis );
2064 if( analysis.l0.i_cost8x16 < i_cost )
2067 i_partition = D_8x16;
2068 i_cost = analysis.l0.i_cost8x16;
2072 h->mb.i_partition = i_partition;
2075 //FIXME mb_type costs?
2076 if( analysis.b_mbrd )
2078 h->mb.i_type = i_type;
2079 x264_mb_analyse_transform_rd( h, &analysis, &i_cost );
2081 else if( i_partition == D_16x16 )
2083 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
2084 i_cost = analysis.l0.me16x16.cost;
2086 else if( i_partition == D_16x8 )
2088 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
2089 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
2090 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
2092 else if( i_partition == D_8x16 )
2094 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
2095 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
2096 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
2098 else if( i_partition == D_8x8 )
2102 for( i8x8 = 0; i8x8 < 4; i8x8++ )
2104 switch( h->mb.i_sub_partition[i8x8] )
2107 x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
2108 i_cost += analysis.l0.me8x8[i8x8].cost;
2111 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
2112 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
2113 i_cost += analysis.l0.me8x4[i8x8][0].cost +
2114 analysis.l0.me8x4[i8x8][1].cost;
2117 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
2118 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
2119 i_cost += analysis.l0.me4x8[i8x8][0].cost +
2120 analysis.l0.me4x8[i8x8][1].cost;
2124 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
2125 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
2126 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
2127 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
2128 i_cost += analysis.l0.me4x4[i8x8][0].cost +
2129 analysis.l0.me4x4[i8x8][1].cost +
2130 analysis.l0.me4x4[i8x8][2].cost +
2131 analysis.l0.me4x4[i8x8][3].cost;
2134 x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" );
2140 x264_mb_analyse_intra( h, &analysis, i_cost );
2141 if( h->mb.b_chroma_me && !analysis.b_mbrd &&
2142 ( analysis.i_sad_i16x16 < i_cost
2143 || analysis.i_sad_i8x8 < i_cost
2144 || analysis.i_sad_i4x4 < i_cost ))
2146 x264_mb_analyse_intra_chroma( h, &analysis );
2147 analysis.i_sad_i16x16 += analysis.i_sad_i8x8chroma;
2148 analysis.i_sad_i8x8 += analysis.i_sad_i8x8chroma;
2149 analysis.i_sad_i4x4 += analysis.i_sad_i8x8chroma;
2152 i_intra_type = I_16x16;
2153 i_intra_cost = analysis.i_sad_i16x16;
2155 if( analysis.i_sad_i8x8 < i_intra_cost )
2157 i_intra_type = I_8x8;
2158 i_intra_cost = analysis.i_sad_i8x8;
2160 if( analysis.i_sad_i4x4 < i_intra_cost )
2162 i_intra_type = I_4x4;
2163 i_intra_cost = analysis.i_sad_i4x4;
2166 if( i_intra_cost < i_cost )
2168 i_type = i_intra_type;
2169 i_cost = i_intra_cost;
2172 h->mb.i_type = i_type;
2173 h->stat.frame.i_intra_cost += i_intra_cost;
2174 h->stat.frame.i_inter_cost += i_cost;
2176 if( h->mb.i_subpel_refine >= 7 )
2178 if( IS_INTRA( h->mb.i_type ) )
2180 x264_intra_rd_refine( h, &analysis );
2182 else if( i_partition == D_16x16 )
2184 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref );
2185 x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0 );
2187 else if( i_partition == D_16x8 )
2189 x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref );
2190 x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref );
2191 x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0 );
2192 x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 2 );
2194 else if( i_partition == D_8x16 )
2196 x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref );
2197 x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref );
2198 x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0 );
2199 x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 1 );
2201 else if( i_partition == D_8x8 )
2204 x264_analyse_update_cache( h, &analysis );
2205 for( i8x8 = 0; i8x8 < 4; i8x8++ )
2206 if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )
2207 x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8 );
2212 else if( h->sh.i_type == SLICE_TYPE_B )
2214 int i_bskip_cost = COST_MAX;
2217 h->mb.i_type = B_SKIP;
2218 if( h->mb.b_direct_auto_write )
2220 /* direct=auto heuristic: prefer whichever mode allows more Skip macroblocks */
2221 for( i = 0; i < 2; i++ )
2224 h->sh.b_direct_spatial_mv_pred ^= 1;
2225 analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h, i && analysis.b_direct_available ? &b_changed : NULL );
2226 if( analysis.b_direct_available )
2231 b_skip = x264_macroblock_probe_bskip( h );
2233 h->stat.frame.i_direct_score[ h->sh.b_direct_spatial_mv_pred ] += b_skip;
2240 analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h, NULL );
2242 if( analysis.b_direct_available )
2244 if( !h->mb.b_direct_auto_write )
2246 if( h->mb.b_lossless )
2248 /* chance of skip is too small to bother */
2250 else if( analysis.b_mbrd )
2252 i_bskip_cost = ssd_mb( h );
2254 /* 6 = minimum cavlc cost of a non-skipped MB */
2255 if( i_bskip_cost <= 6 * analysis.i_lambda2 )
2257 h->mb.i_type = B_SKIP;
2258 x264_analyse_update_cache( h, &analysis );
2262 else if( !h->mb.b_direct_auto_write )
2264 /* Conditioning the probe on neighboring block types
2265 * doesn't seem to help speed or quality. */
2266 b_skip = x264_macroblock_probe_bskip( h );
2272 const unsigned int flags = h->param.analyse.inter;
2276 x264_mb_analyse_load_costs( h, &analysis );
2278 /* select best inter mode */
2279 /* direct must be first */
2280 if( analysis.b_direct_available )
2281 x264_mb_analyse_inter_direct( h, &analysis );
2283 x264_mb_analyse_inter_b16x16( h, &analysis );
2286 i_partition = D_16x16;
2287 i_cost = analysis.l0.me16x16.cost;
2288 if( analysis.l1.me16x16.cost < i_cost )
2291 i_cost = analysis.l1.me16x16.cost;
2293 if( analysis.i_cost16x16bi < i_cost )
2296 i_cost = analysis.i_cost16x16bi;
2298 if( analysis.i_cost16x16direct < i_cost )
2301 i_cost = analysis.i_cost16x16direct;
2304 if( i_bskip_cost <= i_cost )
2306 h->mb.i_type = B_SKIP;
2307 x264_analyse_update_cache( h, &analysis );
2311 if( flags & X264_ANALYSE_BSUB16x16 )
2313 x264_mb_analyse_inter_b8x8( h, &analysis );
2314 if( analysis.i_cost8x8bi < i_cost )
2317 i_partition = D_8x8;
2318 i_cost = analysis.i_cost8x8bi;
2320 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[1] ||
2321 h->mb.i_sub_partition[2] == h->mb.i_sub_partition[3] )
2323 x264_mb_analyse_inter_b16x8( h, &analysis );
2324 if( analysis.i_cost16x8bi < i_cost )
2326 i_partition = D_16x8;
2327 i_cost = analysis.i_cost16x8bi;
2328 i_type = analysis.i_mb_type16x8;
2331 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
2332 h->mb.i_sub_partition[1] == h->mb.i_sub_partition[3] )
2334 x264_mb_analyse_inter_b8x16( h, &analysis );
2335 if( analysis.i_cost8x16bi < i_cost )
2337 i_partition = D_8x16;
2338 i_cost = analysis.i_cost8x16bi;
2339 i_type = analysis.i_mb_type8x16;
2345 h->mb.i_partition = i_partition;
2347 if( analysis.b_mbrd )
2349 h->mb.i_type = i_type;
2350 x264_mb_analyse_transform_rd( h, &analysis, &i_cost );
2353 else if( i_partition == D_16x16 )
2355 analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
2356 analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
2357 if( i_type == B_L0_L0 )
2359 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
2360 i_cost = analysis.l0.me16x16.cost
2361 + analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
2363 else if( i_type == B_L1_L1 )
2365 x264_me_refine_qpel( h, &analysis.l1.me16x16 );
2366 i_cost = analysis.l1.me16x16.cost
2367 + analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
2369 else if( i_type == B_BI_BI )
2371 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
2372 x264_me_refine_qpel( h, &analysis.l1.me16x16 );
2375 else if( i_partition == D_16x8 )
2377 for( i=0; i<2; i++ )
2379 if( analysis.i_mb_partition16x8[i] != D_L1_8x8 )
2380 x264_me_refine_qpel( h, &analysis.l0.me16x8[i] );
2381 if( analysis.i_mb_partition16x8[i] != D_L0_8x8 )
2382 x264_me_refine_qpel( h, &analysis.l1.me16x8[i] );
2385 else if( i_partition == D_8x16 )
2387 for( i=0; i<2; i++ )
2389 if( analysis.i_mb_partition8x16[i] != D_L1_8x8 )
2390 x264_me_refine_qpel( h, &analysis.l0.me8x16[i] );
2391 if( analysis.i_mb_partition8x16[i] != D_L0_8x8 )
2392 x264_me_refine_qpel( h, &analysis.l1.me8x16[i] );
2395 else if( i_partition == D_8x8 )
2397 for( i=0; i<4; i++ )
2400 int i_part_cost_old;
2402 int i_part_type = h->mb.i_sub_partition[i];
2403 int b_bidir = (i_part_type == D_BI_8x8);
2405 if( i_part_type == D_DIRECT_8x8 )
2407 if( x264_mb_partition_listX_table[0][i_part_type] )
2409 m = &analysis.l0.me8x8[i];
2410 i_part_cost_old = m->cost;
2411 i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
2412 m->cost -= i_type_cost;
2413 x264_me_refine_qpel( h, m );
2415 analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
2417 if( x264_mb_partition_listX_table[1][i_part_type] )
2419 m = &analysis.l1.me8x8[i];
2420 i_part_cost_old = m->cost;
2421 i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
2422 m->cost -= i_type_cost;
2423 x264_me_refine_qpel( h, m );
2425 analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
2427 /* TODO: update mvp? */
2431 /* best intra mode */
2432 x264_mb_analyse_intra( h, &analysis, i_cost );
2434 if( analysis.i_sad_i16x16 < i_cost )
2437 i_cost = analysis.i_sad_i16x16;
2439 if( analysis.i_sad_i8x8 < i_cost )
2442 i_cost = analysis.i_sad_i8x8;
2444 if( analysis.i_sad_i4x4 < i_cost )
2447 i_cost = analysis.i_sad_i4x4;
2450 h->mb.i_type = i_type;
2452 if( h->param.analyse.b_bidir_me )
2453 refine_bidir( h, &analysis );
2457 x264_analyse_update_cache( h, &analysis );
2459 if( !analysis.b_mbrd )
2460 x264_mb_analyse_transform( h );
2462 h->mb.b_trellis = h->param.analyse.i_trellis;
2463 h->mb.b_noise_reduction = h->param.analyse.i_noise_reduction;
2466 /*-------------------- Update MB from the analysis ----------------------*/
2467 static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a )
2471 switch( h->mb.i_type )
2474 for( i = 0; i < 16; i++ )
2476 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
2477 a->i_predict4x4[block_idx_x[i]][block_idx_y[i]];
2480 x264_mb_analyse_intra_chroma( h, a );
2483 for( i = 0; i < 4; i++ )
2484 x264_macroblock_cache_intra8x8_pred( h, 2*(i&1), 2*(i>>1),
2485 a->i_predict8x8[i&1][i>>1] );
2487 x264_mb_analyse_intra_chroma( h, a );
2490 h->mb.i_intra16x16_pred_mode = a->i_predict16x16;
2491 x264_mb_analyse_intra_chroma( h, a );
2495 switch( h->mb.i_partition )
2498 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
2499 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
2503 x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].i_ref );
2504 x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].i_ref );
2505 x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv[0], a->l0.me16x8[0].mv[1] );
2506 x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv[0], a->l0.me16x8[1].mv[1] );
2510 x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].i_ref );
2511 x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].i_ref );
2512 x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv[0], a->l0.me8x16[0].mv[1] );
2513 x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv[0], a->l0.me8x16[1].mv[1] );
2517 x264_log( h, X264_LOG_ERROR, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
2523 x264_macroblock_cache_ref( h, 0, 0, 2, 2, 0, a->l0.me8x8[0].i_ref );
2524 x264_macroblock_cache_ref( h, 2, 0, 2, 2, 0, a->l0.me8x8[1].i_ref );
2525 x264_macroblock_cache_ref( h, 0, 2, 2, 2, 0, a->l0.me8x8[2].i_ref );
2526 x264_macroblock_cache_ref( h, 2, 2, 2, 2, 0, a->l0.me8x8[3].i_ref );
2527 for( i = 0; i < 4; i++ )
2528 x264_mb_cache_mv_p8x8( h, a, i );
2534 x264_mb_predict_mv_pskip( h, mvp );
2536 h->mb.i_partition = D_16x16;
2537 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
2538 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
2544 x264_mb_load_mv_direct8x8( h, 0 );
2545 x264_mb_load_mv_direct8x8( h, 1 );
2546 x264_mb_load_mv_direct8x8( h, 2 );
2547 x264_mb_load_mv_direct8x8( h, 3 );
2551 /* optimize: cache might not need to be rewritten */
2552 for( i = 0; i < 4; i++ )
2553 x264_mb_cache_mv_b8x8( h, a, i, 1 );
2556 default: /* the rest of the B types */
2557 switch( h->mb.i_partition )
2560 switch( h->mb.i_type )
2563 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
2564 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
2566 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
2567 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0, 0 );
2568 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0, 0 );
2571 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
2572 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0, 0 );
2573 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0, 0 );
2575 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
2576 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
2579 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
2580 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
2582 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
2583 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
2588 x264_mb_cache_mv_b16x8( h, a, 0, 1 );
2589 x264_mb_cache_mv_b16x8( h, a, 1, 1 );
2592 x264_mb_cache_mv_b8x16( h, a, 0, 1 );
2593 x264_mb_cache_mv_b8x16( h, a, 1, 1 );
2596 x264_log( h, X264_LOG_ERROR, "internal error (invalid MB type)\n" );
2602 #include "slicetype_decision.c"