1 /*****************************************************************************
2 * analyse.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
30 #include "../common/common.h"
31 #include "../common/macroblock.h"
32 #include "macroblock.h"
34 #include "ratecontrol.h"
47 int i_cost4x4[4]; /* cost per 8x8 partition */
48 x264_me_t me4x4[4][4];
51 int i_cost8x4[4]; /* cost per 8x8 partition */
52 x264_me_t me8x4[4][2];
55 int i_cost4x8[4]; /* cost per 8x8 partition */
56 x264_me_t me4x8[4][4];
66 } x264_mb_analysis_list_t;
70 /* conduct the analysis using this lamda and QP */
76 /* Luma part 16x16 and 4x4 modes stats */
81 int i_predict4x4[4][4];
87 /* II: Inter part P/B frame */
90 x264_mb_analysis_list_t l0;
91 x264_mb_analysis_list_t l1;
93 int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
94 int i_cost16x16direct;
96 int i_cost8x8direct[4];
100 int i_mb_partition16x8[2]; /* mb_partition_e */
101 int i_mb_partition8x16[2];
102 int i_mb_type16x8; /* mb_class_e */
105 int b_direct_available;
107 } x264_mb_analysis_t;
109 static const int i_qp0_cost_table[52] = {
110 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
111 1, 1, 1, 1, /* 8-11 */
112 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
113 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
114 6, 7, 8, 9,10,11,13,14, /* 28-35 */
115 16,18,20,23,25,29,32,36, /* 36-43 */
116 40,45,51,57,64,72,81,91 /* 44-51 */
119 static const uint8_t block_idx_x[16] = {
120 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
122 static const uint8_t block_idx_y[16] = {
123 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
126 /* TODO: calculate CABAC costs */
127 static const int i_mb_b_cost_table[18] = {
128 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
130 static const int i_mb_b16x8_cost_table[16] = {
131 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
133 static const int i_sub_mb_b_cost_table[13] = {
134 7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
137 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
139 memset( a, 0, sizeof( x264_mb_analysis_t ) );
141 /* conduct the analysis using this lamda and QP */
143 a->i_lambda = i_qp0_cost_table[i_qp];
146 a->i_sad_i16x16 = -1;
150 /* II: Inter part P/B frame */
151 if( h->sh.i_type != SLICE_TYPE_I )
156 /* Calculate max start MV range */
158 if( h->mb.i_mb_y < dmb )
160 if( h->sps->i_mb_width - h->mb.i_mb_x < dmb )
161 dmb = h->sps->i_mb_width - h->mb.i_mb_x;
162 if( h->sps->i_mb_height - h->mb.i_mb_y < dmb )
163 dmb = h->sps->i_mb_height - h->mb.i_mb_y;
165 a->i_mv_range = 16*dmb + 8;
167 a->l0.me16x16.cost = -1;
168 a->l0.i_cost8x8 = -1;
170 for( i = 0; i < 4; i++ )
172 a->l0.i_cost4x4[i] = -1;
173 a->l0.i_cost8x4[i] = -1;
174 a->l0.i_cost4x8[i] = -1;
177 a->l0.i_cost16x8 = -1;
178 a->l0.i_cost8x16 = -1;
179 if( h->sh.i_type == SLICE_TYPE_B )
181 a->l1.me16x16.cost = -1;
182 a->l1.i_cost8x8 = -1;
184 for( i = 0; i < 4; i++ )
186 a->l1.i_cost4x4[i] = -1;
187 a->l1.i_cost8x4[i] = -1;
188 a->l1.i_cost4x8[i] = -1;
189 a->i_cost8x8direct[i] = -1;
192 a->l1.i_cost16x8 = -1;
193 a->l1.i_cost8x16 = -1;
195 a->i_cost16x16bi = -1;
196 a->i_cost16x16direct = -1;
198 a->i_cost16x8bi = -1;
199 a->i_cost8x16bi = -1;
210 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
212 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
214 /* top and left avaible */
215 *mode++ = I_PRED_16x16_V;
216 *mode++ = I_PRED_16x16_H;
217 *mode++ = I_PRED_16x16_DC;
218 *mode++ = I_PRED_16x16_P;
221 else if( ( i_neighbour & MB_LEFT ) )
224 *mode++ = I_PRED_16x16_DC_LEFT;
225 *mode++ = I_PRED_16x16_H;
228 else if( ( i_neighbour & MB_TOP ) )
231 *mode++ = I_PRED_16x16_DC_TOP;
232 *mode++ = I_PRED_16x16_V;
238 *mode = I_PRED_16x16_DC_128;
244 static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
246 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
248 /* top and left avaible */
249 *mode++ = I_PRED_CHROMA_V;
250 *mode++ = I_PRED_CHROMA_H;
251 *mode++ = I_PRED_CHROMA_DC;
252 *mode++ = I_PRED_CHROMA_P;
255 else if( ( i_neighbour & MB_LEFT ) )
258 *mode++ = I_PRED_CHROMA_DC_LEFT;
259 *mode++ = I_PRED_CHROMA_H;
262 else if( ( i_neighbour & MB_TOP ) )
265 *mode++ = I_PRED_CHROMA_DC_TOP;
266 *mode++ = I_PRED_CHROMA_V;
272 *mode = I_PRED_CHROMA_DC_128;
278 static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
281 static const unsigned int needmb[16] =
283 MB_LEFT|MB_TOP, MB_TOP,
285 MB_TOP, MB_TOP|MB_TOPRIGHT,
293 /* FIXME even when b_c == 0 there is some case where missing pixels
294 * are emulated and thus more mode are available TODO
295 * analysis and encode should be fixed too */
296 b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
297 b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
298 b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
302 *mode++ = I_PRED_4x4_DC;
303 *mode++ = I_PRED_4x4_H;
304 *mode++ = I_PRED_4x4_V;
305 *mode++ = I_PRED_4x4_DDR;
306 *mode++ = I_PRED_4x4_VR;
307 *mode++ = I_PRED_4x4_HD;
308 *mode++ = I_PRED_4x4_HU;
314 *mode++ = I_PRED_4x4_DDL;
315 *mode++ = I_PRED_4x4_VL;
319 else if( b_a && !b_b )
321 *mode++ = I_PRED_4x4_DC_LEFT;
322 *mode++ = I_PRED_4x4_H;
323 *mode++ = I_PRED_4x4_HU;
326 else if( !b_a && b_b )
328 *mode++ = I_PRED_4x4_DC_TOP;
329 *mode++ = I_PRED_4x4_V;
334 *mode++ = I_PRED_4x4_DC_128;
339 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res )
341 const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
342 const int i_stride = h->mb.pic.i_stride[0];
343 uint8_t *p_src = h->mb.pic.p_fenc[0];
344 uint8_t *p_dst = h->mb.pic.p_fdec[0];
351 /*---------------- Try all mode and calculate their score ---------------*/
353 /* 16x16 prediction selection */
354 predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
355 for( i = 0; i < i_max; i++ )
360 i_mode = predict_mode[i];
362 /* we do the prediction */
363 h->predict_16x16[i_mode]( p_dst, i_stride );
365 /* we calculate the diff and get the square sum of the diff */
366 i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
367 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
368 /* if i_score is lower it is better */
369 if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
371 res->i_predict16x16 = i_mode;
372 res->i_sad_i16x16 = i_sad;
376 /* 4x4 prediction selection */
377 if( flags & X264_ANALYSE_I4x4 )
380 for( idx = 0; idx < 16; idx++ )
388 i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
389 x = block_idx_x[idx];
390 y = block_idx_y[idx];
392 p_src_by = p_src + 4 * x + 4 * y * i_stride;
393 p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
396 predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
397 for( i = 0; i < i_max; i++ )
402 i_mode = predict_mode[i];
404 /* we do the prediction */
405 h->predict_4x4[i_mode]( p_dst_by, i_stride );
407 /* we calculate diff and get the square sum of the diff */
408 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
409 p_src_by, i_stride );
411 i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
413 /* if i_score is lower it is better */
414 if( i_best == -1 || i_best > i_sad )
416 res->i_predict4x4[x][y] = i_mode;
420 res->i_sad_i4x4 += i_best;
422 /* we need to encode this mb now (for next ones) */
423 h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
424 x264_mb_encode_i4x4( h, idx, res->i_qp );
426 /* we need to store the 'fixed' version */
427 h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
428 x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
430 res->i_sad_i4x4 += res->i_lambda * 24; /* from JVT (SATD0) */
434 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
441 uint8_t *p_dstc[2], *p_srcc[2];
444 /* 8x8 prediction selection for chroma */
445 p_dstc[0] = h->mb.pic.p_fdec[1];
446 p_dstc[1] = h->mb.pic.p_fdec[2];
447 p_srcc[0] = h->mb.pic.p_fenc[1];
448 p_srcc[1] = h->mb.pic.p_fenc[2];
450 i_stride[0] = h->mb.pic.i_stride[1];
451 i_stride[1] = h->mb.pic.i_stride[2];
453 predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
454 res->i_sad_i8x8 = -1;
455 for( i = 0; i < i_max; i++ )
460 i_mode = predict_mode[i];
462 /* we do the prediction */
463 h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
464 h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
466 /* we calculate the cost */
467 i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
468 p_srcc[0], i_stride[0] ) +
469 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
470 p_srcc[1], i_stride[1] ) +
471 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
473 /* if i_score is lower it is better */
474 if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
476 res->i_predict8x8 = i_mode;
477 res->i_sad_i8x8 = i_sad;
482 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
486 int mvc[4][2], i_mvc;
488 /* 16x16 Search on all ref frame */
489 m.i_pixel = PIXEL_16x16;
491 m.p_fenc = h->mb.pic.p_fenc[0];
492 m.i_stride= h->mb.pic.i_stride[0];
493 m.i_mv_range = a->i_mv_range;
495 a->l0.me16x16.cost = INT_MAX;
496 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
498 /* search with ref */
499 m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
500 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
501 x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
502 x264_me_search( h, &m, mvc, i_mvc );
505 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
507 if( m.cost < a->l0.me16x16.cost )
513 /* save mv for predicting neighbors */
514 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
515 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
518 /* subtract ref cost, so we don't have to add it for the other P types */
519 a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
521 /* Set global ref, needed for all others modes */
522 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
525 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
527 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
528 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
529 int mvc[5][2], i_mvc;
532 /* XXX Needed for x264_mb_predict_mv */
533 h->mb.i_partition = D_8x8;
536 mvc[0][0] = a->l0.me16x16.mv[0];
537 mvc[0][1] = a->l0.me16x16.mv[1];
539 for( i = 0; i < 4; i++ )
541 x264_me_t *m = &a->l0.me8x8[i];
545 m->i_pixel = PIXEL_8x8;
548 m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
549 m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
550 m->i_stride= h->mb.pic.i_stride[0];
551 m->i_mv_range = a->i_mv_range;
553 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
554 x264_me_search( h, m, mvc, i_mvc );
556 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
558 mvc[i_mvc][0] = m->mv[0];
559 mvc[i_mvc][1] = m->mv[1];
563 a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
564 a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
567 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
569 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
570 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
574 /* XXX Needed for x264_mb_predict_mv */
575 h->mb.i_partition = D_16x8;
577 for( i = 0; i < 2; i++ )
579 x264_me_t *m = &a->l0.me16x8[i];
581 m->i_pixel = PIXEL_16x8;
584 m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
585 m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
586 m->i_stride= h->mb.pic.i_stride[0];
587 m->i_mv_range = a->i_mv_range;
589 mvc[0][0] = a->l0.me8x8[2*i].mv[0];
590 mvc[0][1] = a->l0.me8x8[2*i].mv[1];
591 mvc[1][0] = a->l0.me8x8[2*i+1].mv[0];
592 mvc[1][1] = a->l0.me8x8[2*i+1].mv[1];
594 x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
595 x264_me_search( h, m, mvc, 2 );
597 x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
600 a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
603 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
605 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
606 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
610 /* XXX Needed for x264_mb_predict_mv */
611 h->mb.i_partition = D_8x16;
613 for( i = 0; i < 2; i++ )
615 x264_me_t *m = &a->l0.me8x16[i];
617 m->i_pixel = PIXEL_8x16;
620 m->p_fenc = &p_fenc[8*i];
621 m->p_fref = &p_fref[8*i];
622 m->i_stride= h->mb.pic.i_stride[0];
623 m->i_mv_range = a->i_mv_range;
625 mvc[0][0] = a->l0.me8x8[i].mv[0];
626 mvc[0][1] = a->l0.me8x8[i].mv[1];
627 mvc[1][0] = a->l0.me8x8[i+2].mv[0];
628 mvc[1][1] = a->l0.me8x8[i+2].mv[1];
630 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
631 x264_me_search( h, m, mvc, 2 );
633 x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
636 a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
639 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
641 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
642 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
646 /* XXX Needed for x264_mb_predict_mv */
647 h->mb.i_partition = D_8x8;
649 for( i4x4 = 0; i4x4 < 4; i4x4++ )
651 const int idx = 4*i8x8 + i4x4;
652 const int x4 = block_idx_x[idx];
653 const int y4 = block_idx_y[idx];
654 const int i_mvc = (i4x4 == 0);
656 x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
658 m->i_pixel = PIXEL_4x4;
661 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
662 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
663 m->i_stride= h->mb.pic.i_stride[0];
664 m->i_mv_range = a->i_mv_range;
666 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
667 x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
669 x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
672 a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
673 a->l0.me4x4[i8x8][1].cost +
674 a->l0.me4x4[i8x8][2].cost +
675 a->l0.me4x4[i8x8][3].cost;
678 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
680 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
681 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
685 /* XXX Needed for x264_mb_predict_mv */
686 h->mb.i_partition = D_8x8;
688 for( i8x4 = 0; i8x4 < 2; i8x4++ )
690 const int idx = 4*i8x8 + 2*i8x4;
691 const int x4 = block_idx_x[idx];
692 const int y4 = block_idx_y[idx];
693 const int i_mvc = (i8x4 == 0);
695 x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
697 m->i_pixel = PIXEL_8x4;
700 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
701 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
702 m->i_stride= h->mb.pic.i_stride[0];
703 m->i_mv_range = a->i_mv_range;
705 x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
706 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
708 x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
711 a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost;
714 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
716 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
717 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
721 /* XXX Needed for x264_mb_predict_mv */
722 h->mb.i_partition = D_8x8;
724 for( i4x8 = 0; i4x8 < 2; i4x8++ )
726 const int idx = 4*i8x8 + i4x8;
727 const int x4 = block_idx_x[idx];
728 const int y4 = block_idx_y[idx];
729 const int i_mvc = (i4x8 == 0);
731 x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
733 m->i_pixel = PIXEL_4x8;
736 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
737 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
738 m->i_stride= h->mb.pic.i_stride[0];
739 m->i_mv_range = a->i_mv_range;
741 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
742 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
744 x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
747 a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost;
750 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
752 /* Assumes that fdec still contains the results of
753 * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
755 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
756 uint8_t *p_fdec = h->mb.pic.p_fdec[0];
757 int i_stride= h->mb.pic.i_stride[0];
760 a->i_cost16x16direct = 0;
761 for( i = 0; i < 4; i++ )
765 const int off = 8 * x8 + 8 * i_stride * y8;
766 a->i_cost16x16direct +=
767 a->i_cost8x8direct[i] =
768 h->pixf.satd[PIXEL_8x8]( &p_fenc[off], i_stride, &p_fdec[off], i_stride );
771 a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
774 a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
777 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
779 uint8_t pix1[16*16], pix2[16*16];
784 /* 16x16 Search on all ref frame */
785 m.i_pixel = PIXEL_16x16;
787 m.p_fenc = h->mb.pic.p_fenc[0];
788 m.i_stride= h->mb.pic.i_stride[0];
789 m.i_mv_range = a->i_mv_range;
792 a->l0.me16x16.cost = INT_MAX;
793 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
795 /* search with ref */
796 m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
797 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
798 x264_me_search( h, &m, NULL, 0 );
801 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
803 if( m.cost < a->l0.me16x16.cost )
809 /* subtract ref cost, so we don't have to add it for the other MB types */
810 a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
813 a->l1.me16x16.cost = INT_MAX;
814 for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
816 /* search with ref */
817 m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
818 x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
819 x264_me_search( h, &m, NULL, 0 );
822 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
824 if( m.cost < a->l1.me16x16.cost )
830 /* subtract ref cost, so we don't have to add it for the other MB types */
831 a->l1.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref );
833 /* Set global ref, needed for other modes? */
834 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
835 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
837 /* get cost of BI mode */
838 h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
840 a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
842 h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
844 a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
846 h->pixf.avg[PIXEL_16x16]( pix1, 16, pix2, 16 );
848 a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 ) +
849 a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref ) +
850 bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) +
851 bs_size_se( a->l0.me16x16.mv[0] - a->l0.me16x16.mvp[0] ) +
852 bs_size_se( a->l0.me16x16.mv[1] - a->l0.me16x16.mvp[1] ) +
853 bs_size_se( a->l1.me16x16.mv[0] - a->l1.me16x16.mvp[0] ) +
854 bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
857 a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
858 a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
859 a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
862 #define CACHE_MV_BI(x,y,dx,dy,me0,me1,part) \
863 if( x264_mb_partition_listX_table[0][part] ) \
865 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
866 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
870 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
871 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0, 0 ); \
873 x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
875 if( x264_mb_partition_listX_table[1][part] ) \
877 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
878 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
882 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
883 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0, 0 ); \
885 x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
888 static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
892 if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
894 x264_mb_load_mv_direct8x8( h, i );
897 x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0, 0 );
898 x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0, 0 );
899 x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
904 CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
907 static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
909 CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
911 static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
913 CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
917 static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
919 uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
920 h->mb.pic.p_fref[1][a->l1.i_ref][0] };
921 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
925 /* XXX Needed for x264_mb_predict_mv */
926 h->mb.i_partition = D_8x8;
930 for( i = 0; i < 4; i++ )
934 uint8_t *p_fenc_i = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
936 int i_part_cost_bi = 0;
938 for( l = 0; l < 2; l++ )
940 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
941 x264_me_t *m = &lX->me8x8[i];
943 m->i_pixel = PIXEL_8x8;
946 m->p_fenc = p_fenc_i;
947 m->p_fref = &p_fref[l][8*(y8*h->mb.pic.i_stride[0]+x8)];
948 m->i_stride = h->mb.pic.i_stride[0];
949 m->i_mv_range = a->i_mv_range;
951 x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
952 x264_me_search( h, m, &lX->me16x16.mv, 1 );
954 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
955 lX->i_cost8x8 += m->cost;
958 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
959 m->mv[0], m->mv[1], 8, 8 );
960 /* FIXME: ref cost */
961 i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
962 bs_size_se( m->mv[1] - m->mvp[1] ) +
963 i_sub_mb_b_cost_table[D_L0_8x8] );
966 h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 );
967 i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 )
968 + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
970 i_part_cost = a->l0.me8x8[i].cost;
971 h->mb.i_sub_partition[i] = D_L0_8x8;
972 if( a->l1.me8x8[i].cost < i_part_cost )
974 i_part_cost = a->l1.me8x8[i].cost;
975 h->mb.i_sub_partition[i] = D_L1_8x8;
977 if( i_part_cost_bi < i_part_cost )
979 i_part_cost = i_part_cost_bi;
980 h->mb.i_sub_partition[i] = D_BI_8x8;
982 if( a->i_cost8x8direct[i] < i_part_cost && a->i_cost8x8direct[i] >= 0)
984 i_part_cost = a->i_cost8x8direct[i];
985 h->mb.i_sub_partition[i] = D_DIRECT_8x8;
987 a->i_cost8x8bi += i_part_cost;
989 /* XXX Needed for x264_mb_predict_mv */
990 x264_mb_cache_mv_b8x8( h, a, i, 0 );
994 a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
997 static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
999 uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1000 h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1001 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1002 uint8_t pix[2][8*8];
1003 int i_ref_stride = h->mb.pic.i_stride[0];
1007 h->mb.i_partition = D_16x8;
1008 a->i_cost16x8bi = 0;
1010 for( i = 0; i < 2; i++ )
1012 uint8_t *p_fenc_i = &p_fenc[8*i*i_ref_stride];
1014 int i_part_cost_bi = 0;
1016 /* TODO: check only the list(s) that were used in b8x8? */
1017 for( l = 0; l < 2; l++ )
1019 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1020 x264_me_t *m = &lX->me16x8[i];
1022 m->i_pixel = PIXEL_16x8;
1023 m->lm = a->i_lambda;
1025 m->p_fenc = p_fenc_i;
1026 m->i_stride= i_ref_stride;
1027 m->p_fref = &p_fref[l][8*i*i_ref_stride];
1028 m->i_mv_range = a->i_mv_range;
1030 mvc[0][0] = lX->me8x8[2*i].mv[0];
1031 mvc[0][1] = lX->me8x8[2*i].mv[1];
1032 mvc[1][0] = lX->me8x8[2*i+1].mv[0];
1033 mvc[1][1] = lX->me8x8[2*i+1].mv[1];
1035 x264_mb_predict_mv( h, 0, 8*i, 2, m->mvp );
1036 x264_me_search( h, m, mvc, 2 );
1039 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1040 m->mv[0], m->mv[1], 8, 8 );
1041 /* FIXME: ref cost */
1042 i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1043 bs_size_se( m->mv[1] - m->mvp[1] ) );
1046 h->pixf.avg[PIXEL_16x8]( pix[0], 8, pix[1], 8 );
1047 i_part_cost_bi += h->pixf.satd[PIXEL_16x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1049 i_part_cost = a->l0.me16x8[i].cost;
1050 a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
1051 if( a->l1.me16x8[i].cost < i_part_cost )
1053 i_part_cost = a->l1.me16x8[i].cost;
1054 a->i_mb_partition16x8[i] = D_L1_8x8;
1056 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1058 i_part_cost = i_part_cost_bi;
1059 a->i_mb_partition16x8[i] = D_BI_8x8;
1061 a->i_cost16x8bi += i_part_cost;
1064 x264_mb_cache_mv_b16x8( h, a, i, 0 );
1068 a->i_mb_type16x8 = B_L0_L0
1069 + (a->i_mb_partition16x8[0]>>2) * 3
1070 + (a->i_mb_partition16x8[1]>>2);
1071 a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
1073 static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
1075 uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1076 h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1077 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1078 uint8_t pix[2][8*8];
1079 int i_ref_stride = h->mb.pic.i_stride[0];
1083 h->mb.i_partition = D_8x16;
1084 a->i_cost8x16bi = 0;
1086 for( i = 0; i < 2; i++ )
1088 uint8_t *p_fenc_i = &p_fenc[8*i];
1090 int i_part_cost_bi = 0;
1092 for( l = 0; l < 2; l++ )
1094 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1095 x264_me_t *m = &lX->me8x16[i];
1097 m->i_pixel = PIXEL_8x16;
1098 m->lm = a->i_lambda;
1100 m->p_fenc = p_fenc_i;
1101 m->p_fref = &p_fref[l][8*i];
1102 m->i_stride= i_ref_stride;
1103 m->i_mv_range = a->i_mv_range;
1105 mvc[0][0] = lX->me8x8[i].mv[0];
1106 mvc[0][1] = lX->me8x8[i].mv[1];
1107 mvc[1][0] = lX->me8x8[i+2].mv[0];
1108 mvc[1][1] = lX->me8x8[i+2].mv[1];
1110 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
1111 x264_me_search( h, m, mvc, 2 );
1114 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1115 m->mv[0], m->mv[1], 8, 8 );
1116 /* FIXME: ref cost */
1117 i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1118 bs_size_se( m->mv[1] - m->mvp[1] ) );
1121 h->pixf.avg[PIXEL_8x16]( pix[0], 8, pix[1], 8 );
1122 i_part_cost_bi += h->pixf.satd[PIXEL_8x16]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1124 i_part_cost = a->l0.me8x16[i].cost;
1125 a->i_mb_partition8x16[i] = D_L0_8x8;
1126 if( a->l1.me8x16[i].cost < i_part_cost )
1128 i_part_cost = a->l1.me8x16[i].cost;
1129 a->i_mb_partition8x16[i] = D_L1_8x8;
1131 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1133 i_part_cost = i_part_cost_bi;
1134 a->i_mb_partition8x16[i] = D_BI_8x8;
1136 a->i_cost8x16bi += i_part_cost;
1139 x264_mb_cache_mv_b8x16( h, a, i, 0 );
1143 a->i_mb_type8x16 = B_L0_L0
1144 + (a->i_mb_partition8x16[0]>>2) * 3
1145 + (a->i_mb_partition8x16[1]>>2);
1146 a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
1149 /*****************************************************************************
1150 * x264_macroblock_analyse:
1151 *****************************************************************************/
1152 void x264_macroblock_analyse( x264_t *h )
1154 x264_mb_analysis_t analysis;
1157 h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp(h);
1159 /* FIXME check if it's 12 */
1160 if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp < -12 )
1161 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp - 12;
1162 else if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp > 12 )
1163 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp + 12;
1166 x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
1168 /*--------------------------- Do the analysis ---------------------------*/
1169 if( h->sh.i_type == SLICE_TYPE_I )
1171 x264_mb_analyse_intra( h, &analysis );
1173 if( analysis.i_sad_i4x4 >= 0 && analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
1174 h->mb.i_type = I_4x4;
1176 h->mb.i_type = I_16x16;
1178 else if( h->sh.i_type == SLICE_TYPE_P )
1180 const unsigned int i_neighbour = h->mb.i_neighbour;
1184 int i_intra_cost, i_intra_type;
1186 /* Fast P_SKIP detection */
1187 if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) ||
1188 ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) ||
1189 ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) ||
1190 ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) )
1192 b_skip = x264_macroblock_probe_pskip( h );
1197 h->mb.i_type = P_SKIP;
1198 h->mb.i_partition = D_16x16;
1202 const unsigned int flags = h->param.analyse.inter;
1206 x264_mb_analyse_inter_p16x16( h, &analysis );
1207 if( flags & X264_ANALYSE_PSUB16x16 )
1208 x264_mb_analyse_inter_p8x8( h, &analysis );
1210 /* Select best inter mode */
1212 i_partition = D_16x16;
1213 i_cost = analysis.l0.me16x16.cost;
1215 if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1216 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
1221 i_partition = D_8x8;
1222 h->mb.i_sub_partition[0] = D_L0_8x8;
1223 h->mb.i_sub_partition[1] = D_L0_8x8;
1224 h->mb.i_sub_partition[2] = D_L0_8x8;
1225 h->mb.i_sub_partition[3] = D_L0_8x8;
1227 i_cost = analysis.l0.i_cost8x8;
1230 if( flags & X264_ANALYSE_PSUB8x8 )
1232 for( i = 0; i < 4; i++ )
1234 x264_mb_analyse_inter_p4x4( h, &analysis, i );
1235 if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
1239 h->mb.i_sub_partition[i] = D_L0_4x4;
1240 i_cost8x8 = analysis.l0.i_cost4x4[i];
1242 x264_mb_analyse_inter_p8x4( h, &analysis, i );
1243 if( analysis.l0.i_cost8x4[i] < analysis.l0.i_cost4x4[i] )
1245 h->mb.i_sub_partition[i] = D_L0_8x4;
1246 i_cost8x8 = analysis.l0.i_cost8x4[i];
1249 x264_mb_analyse_inter_p4x8( h, &analysis, i );
1250 if( analysis.l0.i_cost4x8[i] < analysis.l0.i_cost4x4[i] )
1252 h->mb.i_sub_partition[i] = D_L0_4x8;
1253 i_cost8x8 = analysis.l0.i_cost4x8[i];
1256 i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
1261 /* Now do sub 16x8/8x16 */
1262 x264_mb_analyse_inter_p16x8( h, &analysis );
1263 if( analysis.l0.i_cost16x8 < i_cost )
1266 i_partition = D_16x8;
1267 i_cost = analysis.l0.i_cost16x8;
1270 x264_mb_analyse_inter_p8x16( h, &analysis );
1271 if( analysis.l0.i_cost8x16 < i_cost )
1274 i_partition = D_8x16;
1275 i_cost = analysis.l0.i_cost8x16;
1279 h->mb.i_type = i_type;
1280 h->mb.i_partition = i_partition;
1283 if( h->mb.i_partition == D_16x16 )
1285 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1286 i_cost = analysis.l0.me16x16.cost;
1288 else if( h->mb.i_partition == D_16x8 )
1290 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
1291 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
1292 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
1294 else if( h->mb.i_partition == D_8x16 )
1296 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
1297 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
1298 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
1300 else if( h->mb.i_partition == D_8x8 )
1304 for( i8x8 = 0; i8x8 < 4; i8x8++ )
1306 switch( h->mb.i_sub_partition[i8x8] )
1309 x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1310 i_cost += analysis.l0.me8x8[i8x8].cost;
1313 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1314 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1315 i_cost += analysis.l0.me8x4[i8x8][0].cost +
1316 analysis.l0.me8x4[i8x8][1].cost;
1319 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1320 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1321 i_cost += analysis.l0.me4x8[i8x8][0].cost +
1322 analysis.l0.me4x8[i8x8][1].cost;
1326 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1327 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1328 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1329 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1330 i_cost += analysis.l0.me4x4[i8x8][0].cost +
1331 analysis.l0.me4x4[i8x8][1].cost +
1332 analysis.l0.me4x4[i8x8][2].cost +
1333 analysis.l0.me4x4[i8x8][3].cost;
1336 fprintf( stderr, "internal error (!8x8 && !4x4)" );
1342 x264_mb_analyse_intra( h, &analysis );
1343 i_intra_type = I_16x16;
1344 i_intra_cost = analysis.i_sad_i16x16;
1346 if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_intra_cost )
1348 i_intra_type = I_4x4;
1349 i_intra_cost = analysis.i_sad_i4x4;
1352 if( i_intra_cost >= 0 && i_intra_cost < i_cost )
1354 h->mb.i_type = i_intra_type;
1355 i_cost = i_intra_cost;
1358 h->stat.frame.i_intra_cost += i_intra_cost;
1359 h->stat.frame.i_inter_cost += i_cost;
1362 else if( h->sh.i_type == SLICE_TYPE_B )
1366 analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
1367 if( analysis.b_direct_available )
1369 h->mb.i_type = B_SKIP;
1372 /* Conditioning the probe on neighboring block types
1373 * doesn't seem to help speed or quality. */
1374 b_skip = x264_macroblock_probe_bskip( h );
1379 const unsigned int flags = h->param.analyse.inter;
1383 /* select best inter mode */
1384 /* direct must be first */
1385 if( analysis.b_direct_available )
1386 x264_mb_analyse_inter_direct( h, &analysis );
1388 x264_mb_analyse_inter_b16x16( h, &analysis );
1390 h->mb.i_type = B_L0_L0;
1391 i_partition = D_16x16;
1392 i_cost = analysis.l0.me16x16.cost;
1393 if( analysis.l1.me16x16.cost < i_cost )
1395 h->mb.i_type = B_L1_L1;
1396 i_cost = analysis.l1.me16x16.cost;
1398 if( analysis.i_cost16x16bi < i_cost )
1400 h->mb.i_type = B_BI_BI;
1401 i_cost = analysis.i_cost16x16bi;
1403 if( analysis.i_cost16x16direct < i_cost && analysis.i_cost16x16direct >= 0 )
1405 h->mb.i_type = B_DIRECT;
1406 i_cost = analysis.i_cost16x16direct;
1409 if( flags & X264_ANALYSE_BSUB16x16 )
1411 x264_mb_analyse_inter_b8x8( h, &analysis );
1412 if( analysis.i_cost8x8bi < i_cost )
1414 h->mb.i_type = B_8x8;
1415 i_partition = D_8x8;
1416 i_cost = analysis.i_cost8x8bi;
1418 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[1] ||
1419 h->mb.i_sub_partition[2] == h->mb.i_sub_partition[3] )
1421 x264_mb_analyse_inter_b16x8( h, &analysis );
1422 if( analysis.i_cost16x8bi < i_cost )
1424 i_partition = D_16x8;
1425 i_cost = analysis.i_cost16x8bi;
1426 h->mb.i_type = analysis.i_mb_type16x8;
1429 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
1430 h->mb.i_sub_partition[1] == h->mb.i_sub_partition[3] )
1432 x264_mb_analyse_inter_b8x16( h, &analysis );
1433 if( analysis.i_cost8x16bi < i_cost )
1435 i_partition = D_8x16;
1436 i_cost = analysis.i_cost8x16bi;
1437 h->mb.i_type = analysis.i_mb_type8x16;
1443 h->mb.i_partition = i_partition;
1446 if( i_partition == D_16x16 )
1448 if( h->mb.i_type == B_L0_L0 )
1450 analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1451 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1452 analysis.l0.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1453 i_cost = analysis.l0.me16x16.cost;
1455 else if( h->mb.i_type == B_L1_L1 )
1457 analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1458 x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1459 analysis.l1.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1460 i_cost = analysis.l1.me16x16.cost;
1463 /* TODO: refine bidir, 8x8 */
1465 /* best intra mode */
1466 x264_mb_analyse_intra( h, &analysis );
1468 analysis.i_sad_i16x16 += analysis.i_lambda * i_mb_b_cost_table[I_16x16];
1469 analysis.i_sad_i4x4 += analysis.i_lambda * i_mb_b_cost_table[I_4x4];
1471 if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1473 h->mb.i_type = I_16x16;
1474 i_cost = analysis.i_sad_i16x16;
1476 if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1478 h->mb.i_type = I_4x4;
1479 i_cost = analysis.i_sad_i4x4;
1484 /*-------------------- Update MB from the analysis ----------------------*/
1485 h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
1486 switch( h->mb.i_type )
1489 for( i = 0; i < 16; i++ )
1491 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1492 analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1495 x264_mb_analyse_intra_chroma( h, &analysis );
1496 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1499 h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
1501 x264_mb_analyse_intra_chroma( h, &analysis );
1502 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1506 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1507 switch( h->mb.i_partition )
1510 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1514 x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].mv[0], analysis.l0.me16x8[0].mv[1] );
1515 x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].mv[0], analysis.l0.me16x8[1].mv[1] );
1519 x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].mv[0], analysis.l0.me8x16[0].mv[1] );
1520 x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].mv[0], analysis.l0.me8x16[1].mv[1] );
1524 fprintf( stderr, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
1530 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1531 for( i = 0; i < 4; i++ )
1533 const int x = 2*(i%2);
1534 const int y = 2*(i/2);
1536 switch( h->mb.i_sub_partition[i] )
1539 x264_macroblock_cache_mv( h, x, y, 2, 2, 0, analysis.l0.me8x8[i].mv[0], analysis.l0.me8x8[i].mv[1] );
1542 x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, analysis.l0.me8x4[i][0].mv[0], analysis.l0.me8x4[i][0].mv[1] );
1543 x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, analysis.l0.me8x4[i][1].mv[0], analysis.l0.me8x4[i][1].mv[1] );
1546 x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, analysis.l0.me4x8[i][0].mv[0], analysis.l0.me4x8[i][0].mv[1] );
1547 x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, analysis.l0.me4x8[i][1].mv[0], analysis.l0.me4x8[i][1].mv[1] );
1550 x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, analysis.l0.me4x4[i][0].mv[0], analysis.l0.me4x4[i][0].mv[1] );
1551 x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, analysis.l0.me4x4[i][1].mv[0], analysis.l0.me4x4[i][1].mv[1] );
1552 x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, analysis.l0.me4x4[i][2].mv[0], analysis.l0.me4x4[i][2].mv[1] );
1553 x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, analysis.l0.me4x4[i][3].mv[0], analysis.l0.me4x4[i][3].mv[1] );
1556 fprintf( stderr, "internal error\n" );
1565 x264_mb_predict_mv_pskip( h, mvp );
1567 h->mb.i_partition = D_16x16;
1568 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
1569 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
1574 /* nothing has changed since x264_macroblock_probe_bskip */
1577 x264_mb_load_mv_direct8x8( h, 0 );
1578 x264_mb_load_mv_direct8x8( h, 1 );
1579 x264_mb_load_mv_direct8x8( h, 2 );
1580 x264_mb_load_mv_direct8x8( h, 3 );
1584 /* optimize: cache might not need to be rewritten */
1585 for( i = 0; i < 4; i++ )
1586 x264_mb_cache_mv_b8x8( h, &analysis, i, 1 );
1589 default: /* the rest of the B types */
1590 switch( h->mb.i_partition )
1593 switch( h->mb.i_type )
1596 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1597 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1599 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
1600 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0, 0 );
1601 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0, 0 );
1604 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
1605 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0, 0 );
1606 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0, 0 );
1608 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1609 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1612 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1613 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1615 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1616 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1621 x264_mb_cache_mv_b16x8( h, &analysis, 0, 1 );
1622 x264_mb_cache_mv_b16x8( h, &analysis, 1, 1 );
1625 x264_mb_cache_mv_b8x16( h, &analysis, 0, 1 );
1626 x264_mb_cache_mv_b8x16( h, &analysis, 1, 1 );
1629 fprintf( stderr, "internal error (invalid MB type)\n" );