1 /*****************************************************************************
2 * analyse.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
30 #include "common/common.h"
31 #include "common/macroblock.h"
32 #include "macroblock.h"
34 #include "ratecontrol.h"
47 int i_cost4x4[4]; /* cost per 8x8 partition */
48 x264_me_t me4x4[4][4];
51 int i_cost8x4[4]; /* cost per 8x8 partition */
52 x264_me_t me8x4[4][2];
55 int i_cost4x8[4]; /* cost per 8x8 partition */
56 x264_me_t me4x8[4][4];
66 } x264_mb_analysis_list_t;
70 /* conduct the analysis using this lamda and QP */
77 /* Take some shortcuts in intra search if intra is deemed unlikely */
80 /* Luma part 16x16 and 4x4 modes stats */
85 int i_predict4x4[4][4];
91 /* II: Inter part P/B frame */
92 x264_mb_analysis_list_t l0;
93 x264_mb_analysis_list_t l1;
95 int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
96 int i_cost16x16direct;
98 int i_cost8x8direct[4];
102 int i_mb_partition16x8[2]; /* mb_partition_e */
103 int i_mb_partition8x16[2];
104 int i_mb_type16x8; /* mb_class_e */
107 int b_direct_available;
109 } x264_mb_analysis_t;
111 static const int i_qp0_cost_table[52] = {
112 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
113 1, 1, 1, 1, /* 8-11 */
114 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
115 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
116 6, 7, 8, 9,10,11,13,14, /* 28-35 */
117 16,18,20,23,25,29,32,36, /* 36-43 */
118 40,45,51,57,64,72,81,91 /* 44-51 */
121 static const uint8_t block_idx_x[16] = {
122 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
124 static const uint8_t block_idx_y[16] = {
125 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
128 /* TODO: calculate CABAC costs */
129 static const int i_mb_b_cost_table[18] = {
130 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
132 static const int i_mb_b16x8_cost_table[16] = {
133 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
135 static const int i_sub_mb_b_cost_table[13] = {
136 7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
138 static const int i_sub_mb_p_cost_table[4] = {
142 /* initialize an array of lambda*nbits for all possible mvs */
143 static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
145 static int16_t *p_cost_mv[52];
147 if( !p_cost_mv[a->i_qp] )
149 /* could be faster, but isn't called many times */
150 /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
152 p_cost_mv[a->i_qp] = x264_malloc( (4*4*h->param.analyse.i_mv_range + 1) * sizeof(int16_t) );
153 p_cost_mv[a->i_qp] += 2*4*h->param.analyse.i_mv_range;
154 for( i = 0; i <= 2*4*h->param.analyse.i_mv_range; i++ )
156 p_cost_mv[a->i_qp][-i] =
157 p_cost_mv[a->i_qp][i] = a->i_lambda * bs_size_se( i );
161 a->p_cost_mv = p_cost_mv[a->i_qp];
164 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
166 memset( a, 0, sizeof( x264_mb_analysis_t ) );
168 /* conduct the analysis using this lamda and QP */
170 a->i_lambda = i_qp0_cost_table[i_qp];
172 h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
173 h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
174 && h->mb.i_subpel_refine >= 5;
180 a->i_sad_i8x8 = COST_MAX;
182 /* II: Inter part P/B frame */
183 if( h->sh.i_type != SLICE_TYPE_I )
186 int i_fmv_range = h->param.analyse.i_mv_range - 16;
188 /* Calculate max allowed MV range */
189 #define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range )
190 h->mb.mv_min_fpel[0] = CLIP_FMV( -16*h->mb.i_mb_x - 8 );
191 h->mb.mv_max_fpel[0] = CLIP_FMV( 16*( h->sps->i_mb_width - h->mb.i_mb_x ) - 8 );
192 h->mb.mv_min[0] = 4*( h->mb.mv_min_fpel[0] - 16 );
193 h->mb.mv_max[0] = 4*( h->mb.mv_max_fpel[0] + 16 );
194 if( h->mb.i_mb_x == 0)
196 h->mb.mv_min_fpel[1] = CLIP_FMV( -16*h->mb.i_mb_y - 8 );
197 h->mb.mv_max_fpel[1] = CLIP_FMV( 16*( h->sps->i_mb_height - h->mb.i_mb_y ) - 8 );
198 h->mb.mv_min[1] = 4*( h->mb.mv_min_fpel[1] - 16 );
199 h->mb.mv_max[1] = 4*( h->mb.mv_max_fpel[1] + 16 );
204 a->l0.i_cost8x8 = COST_MAX;
206 for( i = 0; i < 4; i++ )
210 a->l0.i_cost4x8[i] = COST_MAX;
214 a->l0.i_cost8x16 = COST_MAX;
215 if( h->sh.i_type == SLICE_TYPE_B )
218 a->l1.i_cost8x8 = COST_MAX;
220 for( i = 0; i < 4; i++ )
225 a->i_cost8x8direct[i] = COST_MAX;
232 a->i_cost16x16direct =
235 a->i_cost8x16bi = COST_MAX;
238 /* Fast intra decision */
239 if( h->mb.i_mb_xy > 4 )
241 const unsigned int i_neighbour = h->mb.i_neighbour;
242 if( ((i_neighbour&MB_LEFT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - 1] ))
243 || ((i_neighbour&MB_TOP) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] ))
244 || (((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT)) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] ))
245 || ((i_neighbour&MB_TOPRIGHT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] ))
246 || (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] ))
247 || (h->mb.i_mb_xy < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16])) )
248 { /* intra is likely */ }
263 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
265 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
267 /* top and left avaible */
268 *mode++ = I_PRED_16x16_V;
269 *mode++ = I_PRED_16x16_H;
270 *mode++ = I_PRED_16x16_DC;
271 *mode++ = I_PRED_16x16_P;
274 else if( ( i_neighbour & MB_LEFT ) )
277 *mode++ = I_PRED_16x16_DC_LEFT;
278 *mode++ = I_PRED_16x16_H;
281 else if( ( i_neighbour & MB_TOP ) )
284 *mode++ = I_PRED_16x16_DC_TOP;
285 *mode++ = I_PRED_16x16_V;
291 *mode = I_PRED_16x16_DC_128;
297 static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
299 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
301 /* top and left avaible */
302 *mode++ = I_PRED_CHROMA_V;
303 *mode++ = I_PRED_CHROMA_H;
304 *mode++ = I_PRED_CHROMA_DC;
305 *mode++ = I_PRED_CHROMA_P;
308 else if( ( i_neighbour & MB_LEFT ) )
311 *mode++ = I_PRED_CHROMA_DC_LEFT;
312 *mode++ = I_PRED_CHROMA_H;
315 else if( ( i_neighbour & MB_TOP ) )
318 *mode++ = I_PRED_CHROMA_DC_TOP;
319 *mode++ = I_PRED_CHROMA_V;
325 *mode = I_PRED_CHROMA_DC_128;
331 static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
334 static const unsigned int needmb[16] =
336 MB_LEFT|MB_TOP, MB_TOP,
338 MB_TOP, MB_TOP|MB_TOPRIGHT,
346 /* FIXME even when b_c == 0 there is some case where missing pixels
347 * are emulated and thus more mode are available TODO
348 * analysis and encode should be fixed too */
349 b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
350 b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
351 b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
355 *mode++ = I_PRED_4x4_DC;
356 *mode++ = I_PRED_4x4_H;
357 *mode++ = I_PRED_4x4_V;
358 *mode++ = I_PRED_4x4_DDR;
359 *mode++ = I_PRED_4x4_VR;
360 *mode++ = I_PRED_4x4_HD;
361 *mode++ = I_PRED_4x4_HU;
367 *mode++ = I_PRED_4x4_DDL;
368 *mode++ = I_PRED_4x4_VL;
372 else if( b_a && !b_b )
374 *mode++ = I_PRED_4x4_DC_LEFT;
375 *mode++ = I_PRED_4x4_H;
376 *mode++ = I_PRED_4x4_HU;
379 else if( !b_a && b_b )
381 *mode++ = I_PRED_4x4_DC_TOP;
382 *mode++ = I_PRED_4x4_V;
387 *mode++ = I_PRED_4x4_DC_128;
392 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cost_inter )
394 const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
395 const int i_stride = h->mb.pic.i_stride[0];
396 uint8_t *p_src = h->mb.pic.p_fenc[0];
397 uint8_t *p_dst = h->mb.pic.p_fdec[0];
404 /*---------------- Try all mode and calculate their score ---------------*/
406 /* 16x16 prediction selection */
407 predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
408 for( i = 0; i < i_max; i++ )
413 i_mode = predict_mode[i];
415 /* we do the prediction */
416 h->predict_16x16[i_mode]( p_dst, i_stride );
418 /* we calculate the diff and get the square sum of the diff */
419 i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
420 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
421 /* if i_score is lower it is better */
422 if( res->i_sad_i16x16 > i_sad )
424 res->i_predict16x16 = i_mode;
425 res->i_sad_i16x16 = i_sad;
428 /* cavlc mb type prefix */
429 if( h->sh.i_type == SLICE_TYPE_B )
430 res->i_sad_i16x16 += res->i_lambda * i_mb_b_cost_table[I_16x16];
432 if( res->b_fast_intra )
434 if( res->i_sad_i16x16 > 2*i_cost_inter )
438 /* 4x4 prediction selection */
439 if( flags & X264_ANALYSE_I4x4 )
442 for( idx = 0; idx < 16; idx++ )
450 i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
451 x = block_idx_x[idx];
452 y = block_idx_y[idx];
454 p_src_by = p_src + 4 * x + 4 * y * i_stride;
455 p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
458 predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
459 for( i = 0; i < i_max; i++ )
464 i_mode = predict_mode[i];
466 /* we do the prediction */
467 h->predict_4x4[i_mode]( p_dst_by, i_stride );
469 /* we calculate diff and get the square sum of the diff */
470 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
471 p_src_by, i_stride );
473 i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
475 /* if i_score is lower it is better */
478 res->i_predict4x4[x][y] = i_mode;
482 res->i_sad_i4x4 += i_best;
484 /* we need to encode this mb now (for next ones) */
485 h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
486 x264_mb_encode_i4x4( h, idx, res->i_qp );
488 /* we need to store the 'fixed' version */
489 h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
490 x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
492 res->i_sad_i4x4 += res->i_lambda * 24; /* from JVT (SATD0) */
493 if( h->sh.i_type == SLICE_TYPE_B )
494 res->i_sad_i4x4 += res->i_lambda * i_mb_b_cost_table[I_4x4];
498 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
505 uint8_t *p_dstc[2], *p_srcc[2];
508 if( res->i_sad_i8x8 < COST_MAX )
511 /* 8x8 prediction selection for chroma */
512 p_dstc[0] = h->mb.pic.p_fdec[1];
513 p_dstc[1] = h->mb.pic.p_fdec[2];
514 p_srcc[0] = h->mb.pic.p_fenc[1];
515 p_srcc[1] = h->mb.pic.p_fenc[2];
517 i_stride[0] = h->mb.pic.i_stride[1];
518 i_stride[1] = h->mb.pic.i_stride[2];
520 predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
521 res->i_sad_i8x8 = COST_MAX;
522 for( i = 0; i < i_max; i++ )
527 i_mode = predict_mode[i];
529 /* we do the prediction */
530 h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
531 h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
533 /* we calculate the cost */
534 i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
535 p_srcc[0], i_stride[0] ) +
536 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
537 p_srcc[1], i_stride[1] ) +
538 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
540 /* if i_score is lower it is better */
541 if( res->i_sad_i8x8 > i_sad )
543 res->i_predict8x8 = i_mode;
544 res->i_sad_i8x8 = i_sad;
549 #define LOAD_FENC( m, src, xoff, yoff) \
550 (m)->i_stride[0] = h->mb.pic.i_stride[0]; \
551 (m)->i_stride[1] = h->mb.pic.i_stride[1]; \
552 (m)->p_fenc[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
553 (m)->p_fenc[1] = &(src)[1][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
554 (m)->p_fenc[2] = &(src)[2][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]];
555 #define LOAD_HPELS(m, src, xoff, yoff) \
556 (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
557 (m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \
558 (m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \
559 (m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \
560 (m)->p_fref[4] = &(src)[4][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
561 (m)->p_fref[5] = &(src)[5][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]];
563 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
567 int mvc[4][2], i_mvc;
568 int i_fullpel_thresh = INT_MAX;
569 int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
571 /* 16x16 Search on all ref frame */
572 m.i_pixel = PIXEL_16x16;
573 m.p_cost_mv = a->p_cost_mv;
574 LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
576 a->l0.me16x16.cost = INT_MAX;
577 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
579 const int i_ref_cost = a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
580 i_fullpel_thresh -= i_ref_cost;
582 /* search with ref */
583 LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, 0 );
584 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
585 x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
586 x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
588 m.cost += i_ref_cost;
589 i_fullpel_thresh += i_ref_cost;
591 if( m.cost < a->l0.me16x16.cost )
597 /* save mv for predicting neighbors */
598 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
599 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
602 /* subtract ref cost, so we don't have to add it for the other P types */
603 a->l0.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
605 /* Set global ref, needed for all others modes */
606 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
609 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
611 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
612 uint8_t **p_fenc = h->mb.pic.p_fenc;
613 int mvc[5][2], i_mvc;
616 /* XXX Needed for x264_mb_predict_mv */
617 h->mb.i_partition = D_8x8;
620 mvc[0][0] = a->l0.me16x16.mv[0];
621 mvc[0][1] = a->l0.me16x16.mv[1];
623 for( i = 0; i < 4; i++ )
625 x264_me_t *m = &a->l0.me8x8[i];
629 m->i_pixel = PIXEL_8x8;
630 m->p_cost_mv = a->p_cost_mv;
632 LOAD_FENC( m, p_fenc, 8*x8, 8*y8 );
633 LOAD_HPELS( m, p_fref, 8*x8, 8*y8 );
635 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
636 x264_me_search( h, m, mvc, i_mvc );
638 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
640 mvc[i_mvc][0] = m->mv[0];
641 mvc[i_mvc][1] = m->mv[1];
645 m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
648 a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
649 a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
652 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
654 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
655 uint8_t **p_fenc = h->mb.pic.p_fenc;
659 /* XXX Needed for x264_mb_predict_mv */
660 h->mb.i_partition = D_16x8;
662 for( i = 0; i < 2; i++ )
664 x264_me_t *m = &a->l0.me16x8[i];
666 m->i_pixel = PIXEL_16x8;
667 m->p_cost_mv = a->p_cost_mv;
669 LOAD_FENC( m, p_fenc, 0, 8*i );
670 LOAD_HPELS( m, p_fref, 0, 8*i );
672 mvc[0][0] = a->l0.me8x8[2*i].mv[0];
673 mvc[0][1] = a->l0.me8x8[2*i].mv[1];
674 mvc[1][0] = a->l0.me8x8[2*i+1].mv[0];
675 mvc[1][1] = a->l0.me8x8[2*i+1].mv[1];
677 x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
678 x264_me_search( h, m, mvc, 2 );
680 x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
683 a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
686 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
688 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
689 uint8_t **p_fenc = h->mb.pic.p_fenc;
693 /* XXX Needed for x264_mb_predict_mv */
694 h->mb.i_partition = D_8x16;
696 for( i = 0; i < 2; i++ )
698 x264_me_t *m = &a->l0.me8x16[i];
700 m->i_pixel = PIXEL_8x16;
701 m->p_cost_mv = a->p_cost_mv;
703 LOAD_FENC( m, p_fenc, 8*i, 0 );
704 LOAD_HPELS( m, p_fref, 8*i, 0 );
706 mvc[0][0] = a->l0.me8x8[i].mv[0];
707 mvc[0][1] = a->l0.me8x8[i].mv[1];
708 mvc[1][0] = a->l0.me8x8[i+2].mv[0];
709 mvc[1][1] = a->l0.me8x8[i+2].mv[1];
711 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
712 x264_me_search( h, m, mvc, 2 );
714 x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
717 a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
720 static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
722 uint8_t pix1[8*8], pix2[8*8];
723 const int i_stride = h->mb.pic.i_stride[1];
724 const int off = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
726 #define CHROMA4x4MC( width, height, me, x, y ) \
727 h->mc.mc_chroma( &p_fref[4][off+x+y*i_stride], i_stride, &pix1[x+y*8], 8, (me).mv[0], (me).mv[1], width, height ); \
728 h->mc.mc_chroma( &p_fref[5][off+x+y*i_stride], i_stride, &pix2[x+y*8], 8, (me).mv[0], (me).mv[1], width, height );
730 if( pixel == PIXEL_4x4 )
732 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][0], 0,0 );
733 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][1], 0,2 );
734 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][2], 2,0 );
735 CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][3], 2,2 );
737 else if( pixel == PIXEL_8x4 )
739 CHROMA4x4MC( 4,2, a->l0.me8x4[i8x8][0], 0,0 );
740 CHROMA4x4MC( 4,2, a->l0.me8x4[i8x8][1], 0,2 );
744 CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][0], 0,0 );
745 CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][1], 2,0 );
748 return h->pixf.satd[PIXEL_4x4]( &h->mb.pic.p_fenc[1][off], i_stride, pix1, 8 )
749 + h->pixf.satd[PIXEL_4x4]( &h->mb.pic.p_fenc[2][off], i_stride, pix2, 8 );
752 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
754 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
755 uint8_t **p_fenc = h->mb.pic.p_fenc;
759 /* XXX Needed for x264_mb_predict_mv */
760 h->mb.i_partition = D_8x8;
762 for( i4x4 = 0; i4x4 < 4; i4x4++ )
764 const int idx = 4*i8x8 + i4x4;
765 const int x4 = block_idx_x[idx];
766 const int y4 = block_idx_y[idx];
767 const int i_mvc = (i4x4 == 0);
769 x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
771 m->i_pixel = PIXEL_4x4;
772 m->p_cost_mv = a->p_cost_mv;
774 LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
775 LOAD_HPELS( m, p_fref, 4*x4, 4*y4 );
777 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
778 x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
780 x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
783 a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
784 a->l0.me4x4[i8x8][1].cost +
785 a->l0.me4x4[i8x8][2].cost +
786 a->l0.me4x4[i8x8][3].cost +
787 a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x4];
788 if( h->mb.b_chroma_me )
789 a->l0.i_cost4x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x4 );
792 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
794 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
795 uint8_t **p_fenc = h->mb.pic.p_fenc;
799 /* XXX Needed for x264_mb_predict_mv */
800 h->mb.i_partition = D_8x8;
802 for( i8x4 = 0; i8x4 < 2; i8x4++ )
804 const int idx = 4*i8x8 + 2*i8x4;
805 const int x4 = block_idx_x[idx];
806 const int y4 = block_idx_y[idx];
807 const int i_mvc = (i8x4 == 0);
809 x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
811 m->i_pixel = PIXEL_8x4;
812 m->p_cost_mv = a->p_cost_mv;
814 LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
815 LOAD_HPELS( m, p_fref, 4*x4, 4*y4 );
817 x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
818 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
820 x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
823 a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +
824 a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x4];
825 if( h->mb.b_chroma_me )
826 a->l0.i_cost8x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_8x4 );
829 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
831 uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
832 uint8_t **p_fenc = h->mb.pic.p_fenc;
836 /* XXX Needed for x264_mb_predict_mv */
837 h->mb.i_partition = D_8x8;
839 for( i4x8 = 0; i4x8 < 2; i4x8++ )
841 const int idx = 4*i8x8 + i4x8;
842 const int x4 = block_idx_x[idx];
843 const int y4 = block_idx_y[idx];
844 const int i_mvc = (i4x8 == 0);
846 x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
848 m->i_pixel = PIXEL_4x8;
849 m->p_cost_mv = a->p_cost_mv;
851 LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
852 LOAD_HPELS( m, p_fref, 4*x4, 4*y4 );
854 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
855 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
857 x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
860 a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +
861 a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x8];
862 if( h->mb.b_chroma_me )
863 a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
866 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
868 /* Assumes that fdec still contains the results of
869 * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
871 uint8_t **p_fenc = h->mb.pic.p_fenc;
872 uint8_t **p_fdec = h->mb.pic.p_fdec;
873 int i_stride= h->mb.pic.i_stride[0];
876 a->i_cost16x16direct = 0;
877 for( i = 0; i < 4; i++ )
881 const int off = 8 * x8 + 8 * i_stride * y8;
882 a->i_cost16x16direct +=
883 a->i_cost8x8direct[i] =
884 h->pixf.satd[PIXEL_8x8]( &p_fenc[0][off], i_stride, &p_fdec[0][off], i_stride );
887 a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
890 a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
893 #define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \
895 if( h->param.analyse.b_weighted_bipred ) \
896 h->pixf.avg_weight[size]( pix1, stride1, src2, stride2, \
897 h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] ); \
899 h->pixf.avg[size]( pix1, stride1, src2, stride2 ); \
902 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
904 uint8_t pix1[16*16], pix2[16*16];
907 int src2_ref, pix1_ref;
911 int mvc[5][2], i_mvc;
912 int i_fullpel_thresh = INT_MAX;
913 int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
915 /* 16x16 Search on all ref frame */
916 m.i_pixel = PIXEL_16x16;
917 m.p_cost_mv = a->p_cost_mv;
918 LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
921 a->l0.me16x16.cost = INT_MAX;
922 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
924 /* search with ref */
925 LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, 0 );
926 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
927 x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
928 x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
931 m.cost += a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
933 if( m.cost < a->l0.me16x16.cost )
939 /* save mv for predicting neighbors */
940 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
941 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
943 /* subtract ref cost, so we don't have to add it for the other MB types */
944 a->l0.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
947 i_fullpel_thresh = INT_MAX;
948 p_fullpel_thresh = h->i_ref1>1 ? &i_fullpel_thresh : NULL;
949 a->l1.me16x16.cost = INT_MAX;
950 for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
952 /* search with ref */
953 LOAD_HPELS( &m, h->mb.pic.p_fref[1][i_ref], 0, 0 );
954 x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
955 x264_mb_predict_mv_ref16x16( h, 1, i_ref, mvc, &i_mvc );
956 x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
959 m.cost += a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
961 if( m.cost < a->l1.me16x16.cost )
967 /* save mv for predicting neighbors */
968 h->mb.mvr[1][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
969 h->mb.mvr[1][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
971 /* subtract ref cost, so we don't have to add it for the other MB types */
972 a->l1.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref );
974 /* Set global ref, needed for other modes? */
975 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
976 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
978 /* get cost of BI mode */
979 if ( ((a->l0.me16x16.mv[0] | a->l0.me16x16.mv[1]) & 1) == 0 )
981 /* l0 reference is halfpel, so get_ref on it will make it faster */
982 src2 = h->mc.get_ref( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
984 a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
986 h->mc.mc_luma( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
988 a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
990 src2_ref = a->l0.i_ref;
991 pix1_ref = a->l1.i_ref;
995 /* if l0 was qpel, we'll use get_ref on l1 instead */
996 h->mc.mc_luma( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
998 a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
1000 src2 = h->mc.get_ref( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
1002 a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
1004 src2_ref = a->l1.i_ref;
1005 pix1_ref = a->l0.i_ref;
1008 if( h->param.analyse.b_weighted_bipred )
1009 h->pixf.avg_weight[PIXEL_16x16]( pix1, 16, src2, stride2,
1010 h->mb.bipred_weight[pix1_ref][src2_ref] );
1012 h->pixf.avg[PIXEL_16x16]( pix1, 16, src2, stride2 );
1014 a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 )
1015 + a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref )
1016 + bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) )
1017 + a->l0.me16x16.cost_mv
1018 + a->l1.me16x16.cost_mv;
1021 a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
1022 a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
1023 a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
1026 #define CACHE_MV_BI(x,y,dx,dy,me0,me1,part) \
1027 if( x264_mb_partition_listX_table[0][part] ) \
1029 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
1030 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
1034 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
1035 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0, 0 ); \
1037 x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
1039 if( x264_mb_partition_listX_table[1][part] ) \
1041 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
1042 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
1046 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
1047 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0, 0 ); \
1049 x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
1052 static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1056 if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
1058 x264_mb_load_mv_direct8x8( h, i );
1061 x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0, 0 );
1062 x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0, 0 );
1063 x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
1068 CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
1071 static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1073 CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
1075 static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1077 CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
1081 static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
1083 uint8_t **p_fref[2] =
1084 { h->mb.pic.p_fref[0][a->l0.i_ref],
1085 h->mb.pic.p_fref[1][a->l1.i_ref] };
1086 uint8_t pix[2][8*8];
1089 /* XXX Needed for x264_mb_predict_mv */
1090 h->mb.i_partition = D_8x8;
1094 for( i = 0; i < 4; i++ )
1099 int i_part_cost_bi = 0;
1101 for( l = 0; l < 2; l++ )
1103 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1104 x264_me_t *m = &lX->me8x8[i];
1106 m->i_pixel = PIXEL_8x8;
1107 m->p_cost_mv = a->p_cost_mv;
1109 LOAD_FENC( m, h->mb.pic.p_fenc, 8*x8, 8*y8 );
1110 LOAD_HPELS( m, p_fref[l], 8*x8, 8*y8 );
1112 x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
1113 x264_me_search( h, m, &lX->me16x16.mv, 1 );
1115 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
1118 h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 8,
1119 m->mv[0], m->mv[1], 8, 8 );
1120 i_part_cost_bi += m->cost_mv;
1121 /* FIXME: ref cost */
1124 WEIGHTED_AVG( PIXEL_8x8, pix[0], 8, pix[1], 8 );
1125 i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( a->l0.me8x8[i].p_fenc[0], h->mb.pic.i_stride[0], pix[0], 8 )
1126 + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
1127 a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
1128 a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
1130 i_part_cost = a->l0.me8x8[i].cost;
1131 h->mb.i_sub_partition[i] = D_L0_8x8;
1132 if( a->l1.me8x8[i].cost < i_part_cost )
1134 i_part_cost = a->l1.me8x8[i].cost;
1135 h->mb.i_sub_partition[i] = D_L1_8x8;
1137 if( i_part_cost_bi < i_part_cost )
1139 i_part_cost = i_part_cost_bi;
1140 h->mb.i_sub_partition[i] = D_BI_8x8;
1142 if( a->i_cost8x8direct[i] < i_part_cost )
1144 i_part_cost = a->i_cost8x8direct[i];
1145 h->mb.i_sub_partition[i] = D_DIRECT_8x8;
1147 a->i_cost8x8bi += i_part_cost;
1149 /* XXX Needed for x264_mb_predict_mv */
1150 x264_mb_cache_mv_b8x8( h, a, i, 0 );
1154 a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
1157 static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
1159 uint8_t **p_fref[2] =
1160 { h->mb.pic.p_fref[0][a->l0.i_ref],
1161 h->mb.pic.p_fref[1][a->l1.i_ref] };
1162 uint8_t pix[2][16*8];
1166 h->mb.i_partition = D_16x8;
1167 a->i_cost16x8bi = 0;
1169 for( i = 0; i < 2; i++ )
1172 int i_part_cost_bi = 0;
1174 /* TODO: check only the list(s) that were used in b8x8? */
1175 for( l = 0; l < 2; l++ )
1177 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1178 x264_me_t *m = &lX->me16x8[i];
1180 m->i_pixel = PIXEL_16x8;
1181 m->p_cost_mv = a->p_cost_mv;
1183 LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
1184 LOAD_HPELS( m, p_fref[l], 0, 8*i );
1186 mvc[0][0] = lX->me8x8[2*i].mv[0];
1187 mvc[0][1] = lX->me8x8[2*i].mv[1];
1188 mvc[1][0] = lX->me8x8[2*i+1].mv[0];
1189 mvc[1][1] = lX->me8x8[2*i+1].mv[1];
1191 x264_mb_predict_mv( h, 0, 8*i, 2, m->mvp );
1192 x264_me_search( h, m, mvc, 2 );
1195 h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 16,
1196 m->mv[0], m->mv[1], 16, 8 );
1197 /* FIXME: ref cost */
1198 i_part_cost_bi += m->cost_mv;
1201 WEIGHTED_AVG( PIXEL_16x8, pix[0], 16, pix[1], 16 );
1202 i_part_cost_bi += h->pixf.satd[PIXEL_16x8]( a->l0.me16x8[i].p_fenc[0], h->mb.pic.i_stride[0], pix[0], 16 );
1204 i_part_cost = a->l0.me16x8[i].cost;
1205 a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
1206 if( a->l1.me16x8[i].cost < i_part_cost )
1208 i_part_cost = a->l1.me16x8[i].cost;
1209 a->i_mb_partition16x8[i] = D_L1_8x8;
1211 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1213 i_part_cost = i_part_cost_bi;
1214 a->i_mb_partition16x8[i] = D_BI_8x8;
1216 a->i_cost16x8bi += i_part_cost;
1219 x264_mb_cache_mv_b16x8( h, a, i, 0 );
1223 a->i_mb_type16x8 = B_L0_L0
1224 + (a->i_mb_partition16x8[0]>>2) * 3
1225 + (a->i_mb_partition16x8[1]>>2);
1226 a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
1228 static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
1230 uint8_t **p_fref[2] =
1231 { h->mb.pic.p_fref[0][a->l0.i_ref],
1232 h->mb.pic.p_fref[1][a->l1.i_ref] };
1233 uint8_t pix[2][8*16];
1237 h->mb.i_partition = D_8x16;
1238 a->i_cost8x16bi = 0;
1240 for( i = 0; i < 2; i++ )
1243 int i_part_cost_bi = 0;
1245 for( l = 0; l < 2; l++ )
1247 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1248 x264_me_t *m = &lX->me8x16[i];
1250 m->i_pixel = PIXEL_8x16;
1251 m->p_cost_mv = a->p_cost_mv;
1253 LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
1254 LOAD_HPELS( m, p_fref[l], 8*i, 0 );
1256 mvc[0][0] = lX->me8x8[i].mv[0];
1257 mvc[0][1] = lX->me8x8[i].mv[1];
1258 mvc[1][0] = lX->me8x8[i+2].mv[0];
1259 mvc[1][1] = lX->me8x8[i+2].mv[1];
1261 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
1262 x264_me_search( h, m, mvc, 2 );
1265 h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 8,
1266 m->mv[0], m->mv[1], 8, 16 );
1267 /* FIXME: ref cost */
1268 i_part_cost_bi += m->cost_mv;
1271 WEIGHTED_AVG( PIXEL_8x16, pix[0], 8, pix[1], 8 );
1272 i_part_cost_bi += h->pixf.satd[PIXEL_8x16]( a->l0.me8x16[i].p_fenc[0], h->mb.pic.i_stride[0], pix[0], 8 );
1274 i_part_cost = a->l0.me8x16[i].cost;
1275 a->i_mb_partition8x16[i] = D_L0_8x8;
1276 if( a->l1.me8x16[i].cost < i_part_cost )
1278 i_part_cost = a->l1.me8x16[i].cost;
1279 a->i_mb_partition8x16[i] = D_L1_8x8;
1281 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1283 i_part_cost = i_part_cost_bi;
1284 a->i_mb_partition8x16[i] = D_BI_8x8;
1286 a->i_cost8x16bi += i_part_cost;
1289 x264_mb_cache_mv_b8x16( h, a, i, 0 );
1293 a->i_mb_type8x16 = B_L0_L0
1294 + (a->i_mb_partition8x16[0]>>2) * 3
1295 + (a->i_mb_partition8x16[1]>>2);
1296 a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
1299 /*****************************************************************************
1300 * x264_macroblock_analyse:
1301 *****************************************************************************/
1302 void x264_macroblock_analyse( x264_t *h )
1304 x264_mb_analysis_t analysis;
1307 h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp(h);
1309 /* prevent QP from varying too fast. FIXME what's a sane limit? */
1310 h->mb.qp[h->mb.i_mb_xy] = x264_clip3( h->mb.qp[h->mb.i_mb_xy],
1311 h->mb.i_last_qp - 12, h->mb.i_last_qp + 12 );
1314 x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
1316 /*--------------------------- Do the analysis ---------------------------*/
1317 if( h->sh.i_type == SLICE_TYPE_I )
1319 x264_mb_analyse_intra( h, &analysis, COST_MAX );
1321 if( analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
1322 h->mb.i_type = I_4x4;
1324 h->mb.i_type = I_16x16;
1326 else if( h->sh.i_type == SLICE_TYPE_P )
1328 const unsigned int i_neighbour = h->mb.i_neighbour;
1332 int i_intra_cost, i_intra_type;
1334 /* Fast P_SKIP detection */
1335 if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) ||
1336 ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) ||
1337 ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) ||
1338 ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) )
1340 b_skip = x264_macroblock_probe_pskip( h );
1345 h->mb.i_type = P_SKIP;
1346 h->mb.i_partition = D_16x16;
1350 const unsigned int flags = h->param.analyse.inter;
1354 x264_mb_analyse_load_costs( h, &analysis );
1356 x264_mb_analyse_inter_p16x16( h, &analysis );
1357 if( flags & X264_ANALYSE_PSUB16x16 )
1358 x264_mb_analyse_inter_p8x8( h, &analysis );
1360 /* Select best inter mode */
1362 i_partition = D_16x16;
1363 i_cost = analysis.l0.me16x16.cost;
1365 if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1366 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
1371 i_partition = D_8x8;
1372 h->mb.i_sub_partition[0] = D_L0_8x8;
1373 h->mb.i_sub_partition[1] = D_L0_8x8;
1374 h->mb.i_sub_partition[2] = D_L0_8x8;
1375 h->mb.i_sub_partition[3] = D_L0_8x8;
1377 i_cost = analysis.l0.i_cost8x8;
1380 if( flags & X264_ANALYSE_PSUB8x8 )
1382 for( i = 0; i < 4; i++ )
1384 x264_mb_analyse_inter_p4x4( h, &analysis, i );
1385 if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
1389 h->mb.i_sub_partition[i] = D_L0_4x4;
1390 i_cost8x8 = analysis.l0.i_cost4x4[i];
1392 x264_mb_analyse_inter_p8x4( h, &analysis, i );
1393 if( analysis.l0.i_cost8x4[i] < analysis.l0.i_cost4x4[i] )
1395 h->mb.i_sub_partition[i] = D_L0_8x4;
1396 i_cost8x8 = analysis.l0.i_cost8x4[i];
1399 x264_mb_analyse_inter_p4x8( h, &analysis, i );
1400 if( analysis.l0.i_cost4x8[i] < analysis.l0.i_cost4x4[i] )
1402 h->mb.i_sub_partition[i] = D_L0_4x8;
1403 i_cost8x8 = analysis.l0.i_cost4x8[i];
1406 i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
1411 /* Now do sub 16x8/8x16 */
1412 x264_mb_analyse_inter_p16x8( h, &analysis );
1413 if( analysis.l0.i_cost16x8 < i_cost )
1416 i_partition = D_16x8;
1417 i_cost = analysis.l0.i_cost16x8;
1420 x264_mb_analyse_inter_p8x16( h, &analysis );
1421 if( analysis.l0.i_cost8x16 < i_cost )
1424 i_partition = D_8x16;
1425 i_cost = analysis.l0.i_cost8x16;
1429 h->mb.i_type = i_type;
1430 h->mb.i_partition = i_partition;
1433 if( h->mb.i_partition == D_16x16 )
1435 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1436 i_cost = analysis.l0.me16x16.cost;
1438 else if( h->mb.i_partition == D_16x8 )
1440 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
1441 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
1442 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
1444 else if( h->mb.i_partition == D_8x16 )
1446 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
1447 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
1448 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
1450 else if( h->mb.i_partition == D_8x8 )
1454 for( i8x8 = 0; i8x8 < 4; i8x8++ )
1456 switch( h->mb.i_sub_partition[i8x8] )
1459 x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1460 i_cost += analysis.l0.me8x8[i8x8].cost;
1463 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1464 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1465 i_cost += analysis.l0.me8x4[i8x8][0].cost +
1466 analysis.l0.me8x4[i8x8][1].cost;
1469 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1470 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1471 i_cost += analysis.l0.me4x8[i8x8][0].cost +
1472 analysis.l0.me4x8[i8x8][1].cost;
1476 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1477 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1478 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1479 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1480 i_cost += analysis.l0.me4x4[i8x8][0].cost +
1481 analysis.l0.me4x4[i8x8][1].cost +
1482 analysis.l0.me4x4[i8x8][2].cost +
1483 analysis.l0.me4x4[i8x8][3].cost;
1486 x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" );
1492 x264_mb_analyse_intra( h, &analysis, i_cost );
1493 if( h->mb.b_chroma_me &&
1494 ( analysis.i_sad_i16x16 < i_cost
1495 || ( analysis.i_sad_i4x4 < i_cost )))
1497 x264_mb_analyse_intra_chroma( h, &analysis );
1498 analysis.i_sad_i16x16 += analysis.i_sad_i8x8;
1499 analysis.i_sad_i4x4 += analysis.i_sad_i8x8;
1502 i_intra_type = I_16x16;
1503 i_intra_cost = analysis.i_sad_i16x16;
1505 if( analysis.i_sad_i4x4 < i_intra_cost )
1507 i_intra_type = I_4x4;
1508 i_intra_cost = analysis.i_sad_i4x4;
1511 if( i_intra_cost < i_cost )
1513 h->mb.i_type = i_intra_type;
1514 i_cost = i_intra_cost;
1517 h->stat.frame.i_intra_cost += i_intra_cost;
1518 h->stat.frame.i_inter_cost += i_cost;
1521 else if( h->sh.i_type == SLICE_TYPE_B )
1525 analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
1526 if( analysis.b_direct_available )
1528 h->mb.i_type = B_SKIP;
1531 /* Conditioning the probe on neighboring block types
1532 * doesn't seem to help speed or quality. */
1533 b_skip = x264_macroblock_probe_bskip( h );
1538 const unsigned int flags = h->param.analyse.inter;
1542 x264_mb_analyse_load_costs( h, &analysis );
1544 /* select best inter mode */
1545 /* direct must be first */
1546 if( analysis.b_direct_available )
1547 x264_mb_analyse_inter_direct( h, &analysis );
1549 x264_mb_analyse_inter_b16x16( h, &analysis );
1551 h->mb.i_type = B_L0_L0;
1552 i_partition = D_16x16;
1553 i_cost = analysis.l0.me16x16.cost;
1554 if( analysis.l1.me16x16.cost < i_cost )
1556 h->mb.i_type = B_L1_L1;
1557 i_cost = analysis.l1.me16x16.cost;
1559 if( analysis.i_cost16x16bi < i_cost )
1561 h->mb.i_type = B_BI_BI;
1562 i_cost = analysis.i_cost16x16bi;
1564 if( analysis.i_cost16x16direct < i_cost )
1566 h->mb.i_type = B_DIRECT;
1567 i_cost = analysis.i_cost16x16direct;
1570 if( flags & X264_ANALYSE_BSUB16x16 )
1572 x264_mb_analyse_inter_b8x8( h, &analysis );
1573 if( analysis.i_cost8x8bi < i_cost )
1575 h->mb.i_type = B_8x8;
1576 i_partition = D_8x8;
1577 i_cost = analysis.i_cost8x8bi;
1579 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[1] ||
1580 h->mb.i_sub_partition[2] == h->mb.i_sub_partition[3] )
1582 x264_mb_analyse_inter_b16x8( h, &analysis );
1583 if( analysis.i_cost16x8bi < i_cost )
1585 i_partition = D_16x8;
1586 i_cost = analysis.i_cost16x8bi;
1587 h->mb.i_type = analysis.i_mb_type16x8;
1590 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
1591 h->mb.i_sub_partition[1] == h->mb.i_sub_partition[3] )
1593 x264_mb_analyse_inter_b8x16( h, &analysis );
1594 if( analysis.i_cost8x16bi < i_cost )
1596 i_partition = D_8x16;
1597 i_cost = analysis.i_cost8x16bi;
1598 h->mb.i_type = analysis.i_mb_type8x16;
1604 h->mb.i_partition = i_partition;
1607 if( i_partition == D_16x16 )
1609 analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1610 analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1611 if( h->mb.i_type == B_L0_L0 )
1613 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1614 i_cost = analysis.l0.me16x16.cost
1615 + analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1617 else if( h->mb.i_type == B_L1_L1 )
1619 x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1620 i_cost = analysis.l1.me16x16.cost
1621 + analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1623 else if( h->mb.i_type == B_BI_BI )
1625 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1626 x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1629 else if( i_partition == D_16x8 )
1631 for( i=0; i<2; i++ )
1633 if( analysis.i_mb_partition16x8[i] != D_L1_8x8 )
1634 x264_me_refine_qpel( h, &analysis.l0.me16x8[i] );
1635 if( analysis.i_mb_partition16x8[i] != D_L0_8x8 )
1636 x264_me_refine_qpel( h, &analysis.l1.me16x8[i] );
1639 else if( i_partition == D_8x16 )
1641 for( i=0; i<2; i++ )
1643 if( analysis.i_mb_partition8x16[i] != D_L1_8x8 )
1644 x264_me_refine_qpel( h, &analysis.l0.me8x16[i] );
1645 if( analysis.i_mb_partition8x16[i] != D_L0_8x8 )
1646 x264_me_refine_qpel( h, &analysis.l1.me8x16[i] );
1649 else if( i_partition == D_8x8 )
1651 for( i=0; i<4; i++ )
1654 int i_part_cost_old;
1656 int i_part_type = h->mb.i_sub_partition[i];
1657 int b_bidir = (i_part_type == D_BI_8x8);
1659 if( i_part_type == D_DIRECT_8x8 )
1661 if( x264_mb_partition_listX_table[0][i_part_type] )
1663 m = &analysis.l0.me8x8[i];
1664 i_part_cost_old = m->cost;
1665 i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
1666 m->cost -= i_type_cost;
1667 x264_me_refine_qpel( h, m );
1669 analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
1671 if( x264_mb_partition_listX_table[1][i_part_type] )
1673 m = &analysis.l1.me8x8[i];
1674 i_part_cost_old = m->cost;
1675 i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
1676 m->cost -= i_type_cost;
1677 x264_me_refine_qpel( h, m );
1679 analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
1681 /* TODO: update mvp? */
1685 /* best intra mode */
1686 x264_mb_analyse_intra( h, &analysis, i_cost );
1688 if( analysis.i_sad_i16x16 < i_cost )
1690 h->mb.i_type = I_16x16;
1691 i_cost = analysis.i_sad_i16x16;
1693 if( analysis.i_sad_i4x4 < i_cost )
1695 h->mb.i_type = I_4x4;
1696 i_cost = analysis.i_sad_i4x4;
1701 /*-------------------- Update MB from the analysis ----------------------*/
1702 h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
1703 switch( h->mb.i_type )
1706 for( i = 0; i < 16; i++ )
1708 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1709 analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1712 x264_mb_analyse_intra_chroma( h, &analysis );
1713 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1716 h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
1718 x264_mb_analyse_intra_chroma( h, &analysis );
1719 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1723 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1724 switch( h->mb.i_partition )
1727 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1731 x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].mv[0], analysis.l0.me16x8[0].mv[1] );
1732 x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].mv[0], analysis.l0.me16x8[1].mv[1] );
1736 x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].mv[0], analysis.l0.me8x16[0].mv[1] );
1737 x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].mv[0], analysis.l0.me8x16[1].mv[1] );
1741 x264_log( h, X264_LOG_ERROR, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
1747 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1748 for( i = 0; i < 4; i++ )
1750 const int x = 2*(i%2);
1751 const int y = 2*(i/2);
1753 switch( h->mb.i_sub_partition[i] )
1756 x264_macroblock_cache_mv( h, x, y, 2, 2, 0, analysis.l0.me8x8[i].mv[0], analysis.l0.me8x8[i].mv[1] );
1759 x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, analysis.l0.me8x4[i][0].mv[0], analysis.l0.me8x4[i][0].mv[1] );
1760 x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, analysis.l0.me8x4[i][1].mv[0], analysis.l0.me8x4[i][1].mv[1] );
1763 x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, analysis.l0.me4x8[i][0].mv[0], analysis.l0.me4x8[i][0].mv[1] );
1764 x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, analysis.l0.me4x8[i][1].mv[0], analysis.l0.me4x8[i][1].mv[1] );
1767 x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, analysis.l0.me4x4[i][0].mv[0], analysis.l0.me4x4[i][0].mv[1] );
1768 x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, analysis.l0.me4x4[i][1].mv[0], analysis.l0.me4x4[i][1].mv[1] );
1769 x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, analysis.l0.me4x4[i][2].mv[0], analysis.l0.me4x4[i][2].mv[1] );
1770 x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, analysis.l0.me4x4[i][3].mv[0], analysis.l0.me4x4[i][3].mv[1] );
1773 x264_log( h, X264_LOG_ERROR, "internal error\n" );
1782 x264_mb_predict_mv_pskip( h, mvp );
1784 h->mb.i_partition = D_16x16;
1785 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
1786 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
1791 /* nothing has changed since x264_macroblock_probe_bskip */
1794 x264_mb_load_mv_direct8x8( h, 0 );
1795 x264_mb_load_mv_direct8x8( h, 1 );
1796 x264_mb_load_mv_direct8x8( h, 2 );
1797 x264_mb_load_mv_direct8x8( h, 3 );
1801 /* optimize: cache might not need to be rewritten */
1802 for( i = 0; i < 4; i++ )
1803 x264_mb_cache_mv_b8x8( h, &analysis, i, 1 );
1806 default: /* the rest of the B types */
1807 switch( h->mb.i_partition )
1810 switch( h->mb.i_type )
1813 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1814 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1816 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
1817 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0, 0 );
1818 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0, 0 );
1821 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
1822 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0, 0 );
1823 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0, 0 );
1825 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1826 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1829 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1830 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1832 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1833 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1838 x264_mb_cache_mv_b16x8( h, &analysis, 0, 1 );
1839 x264_mb_cache_mv_b16x8( h, &analysis, 1, 1 );
1842 x264_mb_cache_mv_b8x16( h, &analysis, 0, 1 );
1843 x264_mb_cache_mv_b8x16( h, &analysis, 1, 1 );
1846 x264_log( h, X264_LOG_ERROR, "internal error (invalid MB type)\n" );
1852 #include "slicetype_decision.c"