1 /*****************************************************************************
2 * analyse.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
30 #include "../common/common.h"
31 #include "../common/macroblock.h"
32 #include "macroblock.h"
34 #include "ratecontrol.h"
47 int i_cost4x4[4]; /* cost per 8x8 partition */
48 x264_me_t me4x4[4][4];
51 int i_cost8x4[4]; /* cost per 8x8 partition */
52 x264_me_t me8x4[4][2];
55 int i_cost4x8[4]; /* cost per 8x8 partition */
56 x264_me_t me4x8[4][4];
66 } x264_mb_analysis_list_t;
70 /* conduct the analysis using this lamda and QP */
76 /* Luma part 16x16 and 4x4 modes stats */
81 int i_predict4x4[4][4];
87 /* II: Inter part P/B frame */
90 x264_mb_analysis_list_t l0;
91 x264_mb_analysis_list_t l1;
93 int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
94 int i_cost16x16direct;
96 int i_cost8x8direct[4];
100 int i_mb_partition16x8[2]; /* mb_partition_e */
101 int i_mb_partition8x16[2];
102 int i_mb_type16x8; /* mb_class_e */
105 int b_direct_available;
107 } x264_mb_analysis_t;
109 static const int i_qp0_cost_table[52] = {
110 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
111 1, 1, 1, 1, /* 8-11 */
112 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
113 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
114 6, 7, 8, 9,10,11,13,14, /* 28-35 */
115 16,18,20,23,25,29,32,36, /* 36-43 */
116 40,45,51,57,64,72,81,91 /* 44-51 */
119 static const uint8_t block_idx_x[16] = {
120 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
122 static const uint8_t block_idx_y[16] = {
123 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
126 /* TODO: calculate CABAC costs */
127 static const int i_mb_b_cost_table[18] = {
128 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
130 static const int i_mb_b16x8_cost_table[16] = {
131 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
133 static const int i_sub_mb_b_cost_table[13] = {
134 7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
136 static const int i_sub_mb_p_cost_table[4] = {
140 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
142 memset( a, 0, sizeof( x264_mb_analysis_t ) );
144 /* conduct the analysis using this lamda and QP */
146 a->i_lambda = i_qp0_cost_table[i_qp];
149 a->i_sad_i16x16 = -1;
153 /* II: Inter part P/B frame */
154 if( h->sh.i_type != SLICE_TYPE_I )
159 /* Calculate max start MV range */
161 if( h->mb.i_mb_y < dmb )
163 if( h->sps->i_mb_width - h->mb.i_mb_x < dmb )
164 dmb = h->sps->i_mb_width - h->mb.i_mb_x;
165 if( h->sps->i_mb_height - h->mb.i_mb_y < dmb )
166 dmb = h->sps->i_mb_height - h->mb.i_mb_y;
168 a->i_mv_range = 16*dmb + 8;
170 a->l0.me16x16.cost = -1;
171 a->l0.i_cost8x8 = -1;
173 for( i = 0; i < 4; i++ )
175 a->l0.i_cost4x4[i] = -1;
176 a->l0.i_cost8x4[i] = -1;
177 a->l0.i_cost4x8[i] = -1;
180 a->l0.i_cost16x8 = -1;
181 a->l0.i_cost8x16 = -1;
182 if( h->sh.i_type == SLICE_TYPE_B )
184 a->l1.me16x16.cost = -1;
185 a->l1.i_cost8x8 = -1;
187 for( i = 0; i < 4; i++ )
189 a->l1.i_cost4x4[i] = -1;
190 a->l1.i_cost8x4[i] = -1;
191 a->l1.i_cost4x8[i] = -1;
192 a->i_cost8x8direct[i] = -1;
195 a->l1.i_cost16x8 = -1;
196 a->l1.i_cost8x16 = -1;
198 a->i_cost16x16bi = -1;
199 a->i_cost16x16direct = -1;
201 a->i_cost16x8bi = -1;
202 a->i_cost8x16bi = -1;
213 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
215 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
217 /* top and left avaible */
218 *mode++ = I_PRED_16x16_V;
219 *mode++ = I_PRED_16x16_H;
220 *mode++ = I_PRED_16x16_DC;
221 *mode++ = I_PRED_16x16_P;
224 else if( ( i_neighbour & MB_LEFT ) )
227 *mode++ = I_PRED_16x16_DC_LEFT;
228 *mode++ = I_PRED_16x16_H;
231 else if( ( i_neighbour & MB_TOP ) )
234 *mode++ = I_PRED_16x16_DC_TOP;
235 *mode++ = I_PRED_16x16_V;
241 *mode = I_PRED_16x16_DC_128;
247 static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
249 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
251 /* top and left avaible */
252 *mode++ = I_PRED_CHROMA_V;
253 *mode++ = I_PRED_CHROMA_H;
254 *mode++ = I_PRED_CHROMA_DC;
255 *mode++ = I_PRED_CHROMA_P;
258 else if( ( i_neighbour & MB_LEFT ) )
261 *mode++ = I_PRED_CHROMA_DC_LEFT;
262 *mode++ = I_PRED_CHROMA_H;
265 else if( ( i_neighbour & MB_TOP ) )
268 *mode++ = I_PRED_CHROMA_DC_TOP;
269 *mode++ = I_PRED_CHROMA_V;
275 *mode = I_PRED_CHROMA_DC_128;
281 static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
284 static const unsigned int needmb[16] =
286 MB_LEFT|MB_TOP, MB_TOP,
288 MB_TOP, MB_TOP|MB_TOPRIGHT,
296 /* FIXME even when b_c == 0 there is some case where missing pixels
297 * are emulated and thus more mode are available TODO
298 * analysis and encode should be fixed too */
299 b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
300 b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
301 b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
305 *mode++ = I_PRED_4x4_DC;
306 *mode++ = I_PRED_4x4_H;
307 *mode++ = I_PRED_4x4_V;
308 *mode++ = I_PRED_4x4_DDR;
309 *mode++ = I_PRED_4x4_VR;
310 *mode++ = I_PRED_4x4_HD;
311 *mode++ = I_PRED_4x4_HU;
317 *mode++ = I_PRED_4x4_DDL;
318 *mode++ = I_PRED_4x4_VL;
322 else if( b_a && !b_b )
324 *mode++ = I_PRED_4x4_DC_LEFT;
325 *mode++ = I_PRED_4x4_H;
326 *mode++ = I_PRED_4x4_HU;
329 else if( !b_a && b_b )
331 *mode++ = I_PRED_4x4_DC_TOP;
332 *mode++ = I_PRED_4x4_V;
337 *mode++ = I_PRED_4x4_DC_128;
342 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res )
344 const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
345 const int i_stride = h->mb.pic.i_stride[0];
346 uint8_t *p_src = h->mb.pic.p_fenc[0];
347 uint8_t *p_dst = h->mb.pic.p_fdec[0];
354 /*---------------- Try all mode and calculate their score ---------------*/
356 /* 16x16 prediction selection */
357 predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
358 for( i = 0; i < i_max; i++ )
363 i_mode = predict_mode[i];
365 /* we do the prediction */
366 h->predict_16x16[i_mode]( p_dst, i_stride );
368 /* we calculate the diff and get the square sum of the diff */
369 i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
370 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
371 /* if i_score is lower it is better */
372 if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
374 res->i_predict16x16 = i_mode;
375 res->i_sad_i16x16 = i_sad;
379 /* 4x4 prediction selection */
380 if( flags & X264_ANALYSE_I4x4 )
383 for( idx = 0; idx < 16; idx++ )
391 i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
392 x = block_idx_x[idx];
393 y = block_idx_y[idx];
395 p_src_by = p_src + 4 * x + 4 * y * i_stride;
396 p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
399 predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
400 for( i = 0; i < i_max; i++ )
405 i_mode = predict_mode[i];
407 /* we do the prediction */
408 h->predict_4x4[i_mode]( p_dst_by, i_stride );
410 /* we calculate diff and get the square sum of the diff */
411 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
412 p_src_by, i_stride );
414 i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
416 /* if i_score is lower it is better */
417 if( i_best == -1 || i_best > i_sad )
419 res->i_predict4x4[x][y] = i_mode;
423 res->i_sad_i4x4 += i_best;
425 /* we need to encode this mb now (for next ones) */
426 h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
427 x264_mb_encode_i4x4( h, idx, res->i_qp );
429 /* we need to store the 'fixed' version */
430 h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
431 x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
433 res->i_sad_i4x4 += res->i_lambda * 24; /* from JVT (SATD0) */
437 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
444 uint8_t *p_dstc[2], *p_srcc[2];
447 /* 8x8 prediction selection for chroma */
448 p_dstc[0] = h->mb.pic.p_fdec[1];
449 p_dstc[1] = h->mb.pic.p_fdec[2];
450 p_srcc[0] = h->mb.pic.p_fenc[1];
451 p_srcc[1] = h->mb.pic.p_fenc[2];
453 i_stride[0] = h->mb.pic.i_stride[1];
454 i_stride[1] = h->mb.pic.i_stride[2];
456 predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
457 res->i_sad_i8x8 = -1;
458 for( i = 0; i < i_max; i++ )
463 i_mode = predict_mode[i];
465 /* we do the prediction */
466 h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
467 h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
469 /* we calculate the cost */
470 i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
471 p_srcc[0], i_stride[0] ) +
472 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
473 p_srcc[1], i_stride[1] ) +
474 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
476 /* if i_score is lower it is better */
477 if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
479 res->i_predict8x8 = i_mode;
480 res->i_sad_i8x8 = i_sad;
485 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
489 int mvc[4][2], i_mvc;
490 int i_fullpel_thresh = INT_MAX;
491 int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
493 /* 16x16 Search on all ref frame */
494 m.i_pixel = PIXEL_16x16;
496 m.p_fenc = h->mb.pic.p_fenc[0];
497 m.i_stride= h->mb.pic.i_stride[0];
498 m.i_mv_range = a->i_mv_range;
500 a->l0.me16x16.cost = INT_MAX;
501 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
503 const int i_ref_cost = m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
504 i_fullpel_thresh -= i_ref_cost;
506 /* search with ref */
507 m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
508 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
509 x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
510 x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
512 m.cost += i_ref_cost;
513 i_fullpel_thresh += i_ref_cost;
515 if( m.cost < a->l0.me16x16.cost )
521 /* save mv for predicting neighbors */
522 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
523 h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
526 /* subtract ref cost, so we don't have to add it for the other P types */
527 a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
529 /* Set global ref, needed for all others modes */
530 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
533 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
535 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
536 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
537 int mvc[5][2], i_mvc;
540 /* XXX Needed for x264_mb_predict_mv */
541 h->mb.i_partition = D_8x8;
544 mvc[0][0] = a->l0.me16x16.mv[0];
545 mvc[0][1] = a->l0.me16x16.mv[1];
547 for( i = 0; i < 4; i++ )
549 x264_me_t *m = &a->l0.me8x8[i];
553 m->i_pixel = PIXEL_8x8;
556 m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
557 m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
558 m->i_stride= h->mb.pic.i_stride[0];
559 m->i_mv_range = a->i_mv_range;
561 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
562 x264_me_search( h, m, mvc, i_mvc );
564 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
566 mvc[i_mvc][0] = m->mv[0];
567 mvc[i_mvc][1] = m->mv[1];
571 m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
574 a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
575 a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
578 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
580 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
581 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
585 /* XXX Needed for x264_mb_predict_mv */
586 h->mb.i_partition = D_16x8;
588 for( i = 0; i < 2; i++ )
590 x264_me_t *m = &a->l0.me16x8[i];
592 m->i_pixel = PIXEL_16x8;
595 m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
596 m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
597 m->i_stride= h->mb.pic.i_stride[0];
598 m->i_mv_range = a->i_mv_range;
600 mvc[0][0] = a->l0.me8x8[2*i].mv[0];
601 mvc[0][1] = a->l0.me8x8[2*i].mv[1];
602 mvc[1][0] = a->l0.me8x8[2*i+1].mv[0];
603 mvc[1][1] = a->l0.me8x8[2*i+1].mv[1];
605 x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
606 x264_me_search( h, m, mvc, 2 );
608 x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
611 a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
614 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
616 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
617 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
621 /* XXX Needed for x264_mb_predict_mv */
622 h->mb.i_partition = D_8x16;
624 for( i = 0; i < 2; i++ )
626 x264_me_t *m = &a->l0.me8x16[i];
628 m->i_pixel = PIXEL_8x16;
631 m->p_fenc = &p_fenc[8*i];
632 m->p_fref = &p_fref[8*i];
633 m->i_stride= h->mb.pic.i_stride[0];
634 m->i_mv_range = a->i_mv_range;
636 mvc[0][0] = a->l0.me8x8[i].mv[0];
637 mvc[0][1] = a->l0.me8x8[i].mv[1];
638 mvc[1][0] = a->l0.me8x8[i+2].mv[0];
639 mvc[1][1] = a->l0.me8x8[i+2].mv[1];
641 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
642 x264_me_search( h, m, mvc, 2 );
644 x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
647 a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
650 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
652 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
653 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
657 /* XXX Needed for x264_mb_predict_mv */
658 h->mb.i_partition = D_8x8;
660 for( i4x4 = 0; i4x4 < 4; i4x4++ )
662 const int idx = 4*i8x8 + i4x4;
663 const int x4 = block_idx_x[idx];
664 const int y4 = block_idx_y[idx];
665 const int i_mvc = (i4x4 == 0);
667 x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
669 m->i_pixel = PIXEL_4x4;
672 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
673 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
674 m->i_stride= h->mb.pic.i_stride[0];
675 m->i_mv_range = a->i_mv_range;
677 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
678 x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
680 x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
683 a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
684 a->l0.me4x4[i8x8][1].cost +
685 a->l0.me4x4[i8x8][2].cost +
686 a->l0.me4x4[i8x8][3].cost +
687 a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x4];
690 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
692 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
693 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
697 /* XXX Needed for x264_mb_predict_mv */
698 h->mb.i_partition = D_8x8;
700 for( i8x4 = 0; i8x4 < 2; i8x4++ )
702 const int idx = 4*i8x8 + 2*i8x4;
703 const int x4 = block_idx_x[idx];
704 const int y4 = block_idx_y[idx];
705 const int i_mvc = (i8x4 == 0);
707 x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
709 m->i_pixel = PIXEL_8x4;
712 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
713 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
714 m->i_stride= h->mb.pic.i_stride[0];
715 m->i_mv_range = a->i_mv_range;
717 x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
718 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
720 x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
723 a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +
724 a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x4];
727 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
729 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
730 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
734 /* XXX Needed for x264_mb_predict_mv */
735 h->mb.i_partition = D_8x8;
737 for( i4x8 = 0; i4x8 < 2; i4x8++ )
739 const int idx = 4*i8x8 + i4x8;
740 const int x4 = block_idx_x[idx];
741 const int y4 = block_idx_y[idx];
742 const int i_mvc = (i4x8 == 0);
744 x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
746 m->i_pixel = PIXEL_4x8;
749 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
750 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
751 m->i_stride= h->mb.pic.i_stride[0];
752 m->i_mv_range = a->i_mv_range;
754 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
755 x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
757 x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
760 a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +
761 a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x8];
764 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
766 /* Assumes that fdec still contains the results of
767 * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
769 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
770 uint8_t *p_fdec = h->mb.pic.p_fdec[0];
771 int i_stride= h->mb.pic.i_stride[0];
774 a->i_cost16x16direct = 0;
775 for( i = 0; i < 4; i++ )
779 const int off = 8 * x8 + 8 * i_stride * y8;
780 a->i_cost16x16direct +=
781 a->i_cost8x8direct[i] =
782 h->pixf.satd[PIXEL_8x8]( &p_fenc[off], i_stride, &p_fdec[off], i_stride );
785 a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
788 a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
791 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
793 uint8_t pix1[16*16], pix2[16*16];
798 /* 16x16 Search on all ref frame */
799 m.i_pixel = PIXEL_16x16;
801 m.p_fenc = h->mb.pic.p_fenc[0];
802 m.i_stride= h->mb.pic.i_stride[0];
803 m.i_mv_range = a->i_mv_range;
806 a->l0.me16x16.cost = INT_MAX;
807 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
809 /* search with ref */
810 m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
811 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
812 x264_me_search( h, &m, NULL, 0 );
815 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
817 if( m.cost < a->l0.me16x16.cost )
823 /* subtract ref cost, so we don't have to add it for the other MB types */
824 a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
827 a->l1.me16x16.cost = INT_MAX;
828 for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
830 /* search with ref */
831 m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
832 x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
833 x264_me_search( h, &m, NULL, 0 );
836 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
838 if( m.cost < a->l1.me16x16.cost )
844 /* subtract ref cost, so we don't have to add it for the other MB types */
845 a->l1.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref );
847 /* Set global ref, needed for other modes? */
848 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
849 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
851 /* get cost of BI mode */
852 h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
854 a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
856 h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
858 a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
860 h->pixf.avg[PIXEL_16x16]( pix1, 16, pix2, 16 );
862 a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 ) +
863 a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref ) +
864 bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) +
865 bs_size_se( a->l0.me16x16.mv[0] - a->l0.me16x16.mvp[0] ) +
866 bs_size_se( a->l0.me16x16.mv[1] - a->l0.me16x16.mvp[1] ) +
867 bs_size_se( a->l1.me16x16.mv[0] - a->l1.me16x16.mvp[0] ) +
868 bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
871 a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
872 a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
873 a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
876 #define CACHE_MV_BI(x,y,dx,dy,me0,me1,part) \
877 if( x264_mb_partition_listX_table[0][part] ) \
879 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
880 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
884 x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
885 x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0, 0 ); \
887 x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
889 if( x264_mb_partition_listX_table[1][part] ) \
891 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
892 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
896 x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
897 x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0, 0 ); \
899 x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
902 static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
906 if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
908 x264_mb_load_mv_direct8x8( h, i );
911 x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0, 0 );
912 x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0, 0 );
913 x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
918 CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
921 static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
923 CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
925 static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
927 CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
931 static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
933 uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
934 h->mb.pic.p_fref[1][a->l1.i_ref][0] };
935 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
939 /* XXX Needed for x264_mb_predict_mv */
940 h->mb.i_partition = D_8x8;
944 for( i = 0; i < 4; i++ )
948 uint8_t *p_fenc_i = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
950 int i_part_cost_bi = 0;
952 for( l = 0; l < 2; l++ )
954 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
955 x264_me_t *m = &lX->me8x8[i];
957 m->i_pixel = PIXEL_8x8;
960 m->p_fenc = p_fenc_i;
961 m->p_fref = &p_fref[l][8*(y8*h->mb.pic.i_stride[0]+x8)];
962 m->i_stride = h->mb.pic.i_stride[0];
963 m->i_mv_range = a->i_mv_range;
965 x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
966 x264_me_search( h, m, &lX->me16x16.mv, 1 );
968 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
969 lX->i_cost8x8 += m->cost;
972 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
973 m->mv[0], m->mv[1], 8, 8 );
974 /* FIXME: ref cost */
975 i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
976 bs_size_se( m->mv[1] - m->mvp[1] ) +
977 i_sub_mb_b_cost_table[D_L0_8x8] );
980 h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 );
981 i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 )
982 + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
984 i_part_cost = a->l0.me8x8[i].cost;
985 h->mb.i_sub_partition[i] = D_L0_8x8;
986 if( a->l1.me8x8[i].cost < i_part_cost )
988 i_part_cost = a->l1.me8x8[i].cost;
989 h->mb.i_sub_partition[i] = D_L1_8x8;
991 if( i_part_cost_bi < i_part_cost )
993 i_part_cost = i_part_cost_bi;
994 h->mb.i_sub_partition[i] = D_BI_8x8;
996 if( a->i_cost8x8direct[i] < i_part_cost && a->i_cost8x8direct[i] >= 0)
998 i_part_cost = a->i_cost8x8direct[i];
999 h->mb.i_sub_partition[i] = D_DIRECT_8x8;
1001 a->i_cost8x8bi += i_part_cost;
1003 /* XXX Needed for x264_mb_predict_mv */
1004 x264_mb_cache_mv_b8x8( h, a, i, 0 );
1008 a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
1011 static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
1013 uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1014 h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1015 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1016 uint8_t pix[2][8*8];
1017 int i_ref_stride = h->mb.pic.i_stride[0];
1021 h->mb.i_partition = D_16x8;
1022 a->i_cost16x8bi = 0;
1024 for( i = 0; i < 2; i++ )
1026 uint8_t *p_fenc_i = &p_fenc[8*i*i_ref_stride];
1028 int i_part_cost_bi = 0;
1030 /* TODO: check only the list(s) that were used in b8x8? */
1031 for( l = 0; l < 2; l++ )
1033 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1034 x264_me_t *m = &lX->me16x8[i];
1036 m->i_pixel = PIXEL_16x8;
1037 m->lm = a->i_lambda;
1039 m->p_fenc = p_fenc_i;
1040 m->i_stride= i_ref_stride;
1041 m->p_fref = &p_fref[l][8*i*i_ref_stride];
1042 m->i_mv_range = a->i_mv_range;
1044 mvc[0][0] = lX->me8x8[2*i].mv[0];
1045 mvc[0][1] = lX->me8x8[2*i].mv[1];
1046 mvc[1][0] = lX->me8x8[2*i+1].mv[0];
1047 mvc[1][1] = lX->me8x8[2*i+1].mv[1];
1049 x264_mb_predict_mv( h, 0, 8*i, 2, m->mvp );
1050 x264_me_search( h, m, mvc, 2 );
1053 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1054 m->mv[0], m->mv[1], 8, 8 );
1055 /* FIXME: ref cost */
1056 i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1057 bs_size_se( m->mv[1] - m->mvp[1] ) );
1060 h->pixf.avg[PIXEL_16x8]( pix[0], 8, pix[1], 8 );
1061 i_part_cost_bi += h->pixf.satd[PIXEL_16x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1063 i_part_cost = a->l0.me16x8[i].cost;
1064 a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
1065 if( a->l1.me16x8[i].cost < i_part_cost )
1067 i_part_cost = a->l1.me16x8[i].cost;
1068 a->i_mb_partition16x8[i] = D_L1_8x8;
1070 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1072 i_part_cost = i_part_cost_bi;
1073 a->i_mb_partition16x8[i] = D_BI_8x8;
1075 a->i_cost16x8bi += i_part_cost;
1078 x264_mb_cache_mv_b16x8( h, a, i, 0 );
1082 a->i_mb_type16x8 = B_L0_L0
1083 + (a->i_mb_partition16x8[0]>>2) * 3
1084 + (a->i_mb_partition16x8[1]>>2);
1085 a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
1087 static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
1089 uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1090 h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1091 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1092 uint8_t pix[2][8*8];
1093 int i_ref_stride = h->mb.pic.i_stride[0];
1097 h->mb.i_partition = D_8x16;
1098 a->i_cost8x16bi = 0;
1100 for( i = 0; i < 2; i++ )
1102 uint8_t *p_fenc_i = &p_fenc[8*i];
1104 int i_part_cost_bi = 0;
1106 for( l = 0; l < 2; l++ )
1108 x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1109 x264_me_t *m = &lX->me8x16[i];
1111 m->i_pixel = PIXEL_8x16;
1112 m->lm = a->i_lambda;
1114 m->p_fenc = p_fenc_i;
1115 m->p_fref = &p_fref[l][8*i];
1116 m->i_stride= i_ref_stride;
1117 m->i_mv_range = a->i_mv_range;
1119 mvc[0][0] = lX->me8x8[i].mv[0];
1120 mvc[0][1] = lX->me8x8[i].mv[1];
1121 mvc[1][0] = lX->me8x8[i+2].mv[0];
1122 mvc[1][1] = lX->me8x8[i+2].mv[1];
1124 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
1125 x264_me_search( h, m, mvc, 2 );
1128 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1129 m->mv[0], m->mv[1], 8, 8 );
1130 /* FIXME: ref cost */
1131 i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1132 bs_size_se( m->mv[1] - m->mvp[1] ) );
1135 h->pixf.avg[PIXEL_8x16]( pix[0], 8, pix[1], 8 );
1136 i_part_cost_bi += h->pixf.satd[PIXEL_8x16]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1138 i_part_cost = a->l0.me8x16[i].cost;
1139 a->i_mb_partition8x16[i] = D_L0_8x8;
1140 if( a->l1.me8x16[i].cost < i_part_cost )
1142 i_part_cost = a->l1.me8x16[i].cost;
1143 a->i_mb_partition8x16[i] = D_L1_8x8;
1145 if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1147 i_part_cost = i_part_cost_bi;
1148 a->i_mb_partition8x16[i] = D_BI_8x8;
1150 a->i_cost8x16bi += i_part_cost;
1153 x264_mb_cache_mv_b8x16( h, a, i, 0 );
1157 a->i_mb_type8x16 = B_L0_L0
1158 + (a->i_mb_partition8x16[0]>>2) * 3
1159 + (a->i_mb_partition8x16[1]>>2);
1160 a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
1163 /*****************************************************************************
1164 * x264_macroblock_analyse:
1165 *****************************************************************************/
1166 void x264_macroblock_analyse( x264_t *h )
1168 x264_mb_analysis_t analysis;
1171 h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp(h);
1173 /* FIXME check if it's 12 */
1174 if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp < -12 )
1175 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp - 12;
1176 else if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp > 12 )
1177 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp + 12;
1180 x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
1182 /*--------------------------- Do the analysis ---------------------------*/
1183 if( h->sh.i_type == SLICE_TYPE_I )
1185 x264_mb_analyse_intra( h, &analysis );
1187 if( analysis.i_sad_i4x4 >= 0 && analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
1188 h->mb.i_type = I_4x4;
1190 h->mb.i_type = I_16x16;
1192 else if( h->sh.i_type == SLICE_TYPE_P )
1194 const unsigned int i_neighbour = h->mb.i_neighbour;
1198 int i_intra_cost, i_intra_type;
1200 /* Fast P_SKIP detection */
1201 if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) ||
1202 ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) ||
1203 ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) ||
1204 ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) )
1206 b_skip = x264_macroblock_probe_pskip( h );
1211 h->mb.i_type = P_SKIP;
1212 h->mb.i_partition = D_16x16;
1216 const unsigned int flags = h->param.analyse.inter;
1220 x264_mb_analyse_inter_p16x16( h, &analysis );
1221 if( flags & X264_ANALYSE_PSUB16x16 )
1222 x264_mb_analyse_inter_p8x8( h, &analysis );
1224 /* Select best inter mode */
1226 i_partition = D_16x16;
1227 i_cost = analysis.l0.me16x16.cost;
1229 if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1230 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
1235 i_partition = D_8x8;
1236 h->mb.i_sub_partition[0] = D_L0_8x8;
1237 h->mb.i_sub_partition[1] = D_L0_8x8;
1238 h->mb.i_sub_partition[2] = D_L0_8x8;
1239 h->mb.i_sub_partition[3] = D_L0_8x8;
1241 i_cost = analysis.l0.i_cost8x8;
1244 if( flags & X264_ANALYSE_PSUB8x8 )
1246 for( i = 0; i < 4; i++ )
1248 x264_mb_analyse_inter_p4x4( h, &analysis, i );
1249 if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
1253 h->mb.i_sub_partition[i] = D_L0_4x4;
1254 i_cost8x8 = analysis.l0.i_cost4x4[i];
1256 x264_mb_analyse_inter_p8x4( h, &analysis, i );
1257 if( analysis.l0.i_cost8x4[i] < analysis.l0.i_cost4x4[i] )
1259 h->mb.i_sub_partition[i] = D_L0_8x4;
1260 i_cost8x8 = analysis.l0.i_cost8x4[i];
1263 x264_mb_analyse_inter_p4x8( h, &analysis, i );
1264 if( analysis.l0.i_cost4x8[i] < analysis.l0.i_cost4x4[i] )
1266 h->mb.i_sub_partition[i] = D_L0_4x8;
1267 i_cost8x8 = analysis.l0.i_cost4x8[i];
1270 i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
1275 /* Now do sub 16x8/8x16 */
1276 x264_mb_analyse_inter_p16x8( h, &analysis );
1277 if( analysis.l0.i_cost16x8 < i_cost )
1280 i_partition = D_16x8;
1281 i_cost = analysis.l0.i_cost16x8;
1284 x264_mb_analyse_inter_p8x16( h, &analysis );
1285 if( analysis.l0.i_cost8x16 < i_cost )
1288 i_partition = D_8x16;
1289 i_cost = analysis.l0.i_cost8x16;
1293 h->mb.i_type = i_type;
1294 h->mb.i_partition = i_partition;
1297 if( h->mb.i_partition == D_16x16 )
1299 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1300 i_cost = analysis.l0.me16x16.cost;
1302 else if( h->mb.i_partition == D_16x8 )
1304 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
1305 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
1306 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
1308 else if( h->mb.i_partition == D_8x16 )
1310 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
1311 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
1312 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
1314 else if( h->mb.i_partition == D_8x8 )
1318 for( i8x8 = 0; i8x8 < 4; i8x8++ )
1320 switch( h->mb.i_sub_partition[i8x8] )
1323 x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1324 i_cost += analysis.l0.me8x8[i8x8].cost;
1327 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1328 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1329 i_cost += analysis.l0.me8x4[i8x8][0].cost +
1330 analysis.l0.me8x4[i8x8][1].cost;
1333 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1334 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1335 i_cost += analysis.l0.me4x8[i8x8][0].cost +
1336 analysis.l0.me4x8[i8x8][1].cost;
1340 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1341 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1342 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1343 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1344 i_cost += analysis.l0.me4x4[i8x8][0].cost +
1345 analysis.l0.me4x4[i8x8][1].cost +
1346 analysis.l0.me4x4[i8x8][2].cost +
1347 analysis.l0.me4x4[i8x8][3].cost;
1350 fprintf( stderr, "internal error (!8x8 && !4x4)" );
1356 x264_mb_analyse_intra( h, &analysis );
1357 i_intra_type = I_16x16;
1358 i_intra_cost = analysis.i_sad_i16x16;
1360 if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_intra_cost )
1362 i_intra_type = I_4x4;
1363 i_intra_cost = analysis.i_sad_i4x4;
1366 if( i_intra_cost >= 0 && i_intra_cost < i_cost )
1368 h->mb.i_type = i_intra_type;
1369 i_cost = i_intra_cost;
1372 h->stat.frame.i_intra_cost += i_intra_cost;
1373 h->stat.frame.i_inter_cost += i_cost;
1376 else if( h->sh.i_type == SLICE_TYPE_B )
1380 analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
1381 if( analysis.b_direct_available )
1383 h->mb.i_type = B_SKIP;
1386 /* Conditioning the probe on neighboring block types
1387 * doesn't seem to help speed or quality. */
1388 b_skip = x264_macroblock_probe_bskip( h );
1393 const unsigned int flags = h->param.analyse.inter;
1397 /* select best inter mode */
1398 /* direct must be first */
1399 if( analysis.b_direct_available )
1400 x264_mb_analyse_inter_direct( h, &analysis );
1402 x264_mb_analyse_inter_b16x16( h, &analysis );
1404 h->mb.i_type = B_L0_L0;
1405 i_partition = D_16x16;
1406 i_cost = analysis.l0.me16x16.cost;
1407 if( analysis.l1.me16x16.cost < i_cost )
1409 h->mb.i_type = B_L1_L1;
1410 i_cost = analysis.l1.me16x16.cost;
1412 if( analysis.i_cost16x16bi < i_cost )
1414 h->mb.i_type = B_BI_BI;
1415 i_cost = analysis.i_cost16x16bi;
1417 if( analysis.i_cost16x16direct < i_cost && analysis.i_cost16x16direct >= 0 )
1419 h->mb.i_type = B_DIRECT;
1420 i_cost = analysis.i_cost16x16direct;
1423 if( flags & X264_ANALYSE_BSUB16x16 )
1425 x264_mb_analyse_inter_b8x8( h, &analysis );
1426 if( analysis.i_cost8x8bi < i_cost )
1428 h->mb.i_type = B_8x8;
1429 i_partition = D_8x8;
1430 i_cost = analysis.i_cost8x8bi;
1432 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[1] ||
1433 h->mb.i_sub_partition[2] == h->mb.i_sub_partition[3] )
1435 x264_mb_analyse_inter_b16x8( h, &analysis );
1436 if( analysis.i_cost16x8bi < i_cost )
1438 i_partition = D_16x8;
1439 i_cost = analysis.i_cost16x8bi;
1440 h->mb.i_type = analysis.i_mb_type16x8;
1443 if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
1444 h->mb.i_sub_partition[1] == h->mb.i_sub_partition[3] )
1446 x264_mb_analyse_inter_b8x16( h, &analysis );
1447 if( analysis.i_cost8x16bi < i_cost )
1449 i_partition = D_8x16;
1450 i_cost = analysis.i_cost8x16bi;
1451 h->mb.i_type = analysis.i_mb_type8x16;
1457 h->mb.i_partition = i_partition;
1460 if( i_partition == D_16x16 )
1462 if( h->mb.i_type == B_L0_L0 )
1464 analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1465 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1466 analysis.l0.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1467 i_cost = analysis.l0.me16x16.cost;
1469 else if( h->mb.i_type == B_L1_L1 )
1471 analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1472 x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1473 analysis.l1.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1474 i_cost = analysis.l1.me16x16.cost;
1477 /* TODO: refine bidir, 8x8 */
1479 /* best intra mode */
1480 x264_mb_analyse_intra( h, &analysis );
1482 analysis.i_sad_i16x16 += analysis.i_lambda * i_mb_b_cost_table[I_16x16];
1483 analysis.i_sad_i4x4 += analysis.i_lambda * i_mb_b_cost_table[I_4x4];
1485 if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1487 h->mb.i_type = I_16x16;
1488 i_cost = analysis.i_sad_i16x16;
1490 if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1492 h->mb.i_type = I_4x4;
1493 i_cost = analysis.i_sad_i4x4;
1498 /*-------------------- Update MB from the analysis ----------------------*/
1499 h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
1500 switch( h->mb.i_type )
1503 for( i = 0; i < 16; i++ )
1505 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1506 analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1509 x264_mb_analyse_intra_chroma( h, &analysis );
1510 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1513 h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
1515 x264_mb_analyse_intra_chroma( h, &analysis );
1516 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1520 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1521 switch( h->mb.i_partition )
1524 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1528 x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].mv[0], analysis.l0.me16x8[0].mv[1] );
1529 x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].mv[0], analysis.l0.me16x8[1].mv[1] );
1533 x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].mv[0], analysis.l0.me8x16[0].mv[1] );
1534 x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].mv[0], analysis.l0.me8x16[1].mv[1] );
1538 fprintf( stderr, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
1544 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1545 for( i = 0; i < 4; i++ )
1547 const int x = 2*(i%2);
1548 const int y = 2*(i/2);
1550 switch( h->mb.i_sub_partition[i] )
1553 x264_macroblock_cache_mv( h, x, y, 2, 2, 0, analysis.l0.me8x8[i].mv[0], analysis.l0.me8x8[i].mv[1] );
1556 x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, analysis.l0.me8x4[i][0].mv[0], analysis.l0.me8x4[i][0].mv[1] );
1557 x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, analysis.l0.me8x4[i][1].mv[0], analysis.l0.me8x4[i][1].mv[1] );
1560 x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, analysis.l0.me4x8[i][0].mv[0], analysis.l0.me4x8[i][0].mv[1] );
1561 x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, analysis.l0.me4x8[i][1].mv[0], analysis.l0.me4x8[i][1].mv[1] );
1564 x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, analysis.l0.me4x4[i][0].mv[0], analysis.l0.me4x4[i][0].mv[1] );
1565 x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, analysis.l0.me4x4[i][1].mv[0], analysis.l0.me4x4[i][1].mv[1] );
1566 x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, analysis.l0.me4x4[i][2].mv[0], analysis.l0.me4x4[i][2].mv[1] );
1567 x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, analysis.l0.me4x4[i][3].mv[0], analysis.l0.me4x4[i][3].mv[1] );
1570 fprintf( stderr, "internal error\n" );
1579 x264_mb_predict_mv_pskip( h, mvp );
1581 h->mb.i_partition = D_16x16;
1582 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
1583 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
1588 /* nothing has changed since x264_macroblock_probe_bskip */
1591 x264_mb_load_mv_direct8x8( h, 0 );
1592 x264_mb_load_mv_direct8x8( h, 1 );
1593 x264_mb_load_mv_direct8x8( h, 2 );
1594 x264_mb_load_mv_direct8x8( h, 3 );
1598 /* optimize: cache might not need to be rewritten */
1599 for( i = 0; i < 4; i++ )
1600 x264_mb_cache_mv_b8x8( h, &analysis, i, 1 );
1603 default: /* the rest of the B types */
1604 switch( h->mb.i_partition )
1607 switch( h->mb.i_type )
1610 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1611 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1613 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
1614 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0, 0 );
1615 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0, 0 );
1618 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
1619 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0, 0 );
1620 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0, 0 );
1622 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1623 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1626 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1627 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1629 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1630 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1635 x264_mb_cache_mv_b16x8( h, &analysis, 0, 1 );
1636 x264_mb_cache_mv_b16x8( h, &analysis, 1, 1 );
1639 x264_mb_cache_mv_b8x16( h, &analysis, 0, 1 );
1640 x264_mb_cache_mv_b8x16( h, &analysis, 1, 1 );
1643 fprintf( stderr, "internal error (invalid MB type)\n" );