1 /*****************************************************************************
2 * analyse.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
29 #include "../core/common.h"
30 #include "../core/macroblock.h"
31 #include "macroblock.h"
33 #include "ratecontrol.h"
46 int i_cost4x4[4]; /* cost per 8x8 partition */
47 x264_me_t me4x4[4][4];
50 int i_cost8x4[4]; /* cost per 8x8 partition */
51 x264_me_t me8x4[4][2];
54 int i_cost4x8[4]; /* cost per 8x8 partition */
55 x264_me_t me4x8[4][4];
65 } x264_mb_analysis_list_t;
69 /* conduct the analysis using this lamda and QP */
75 /* Luma part 16x16 and 4x4 modes stats */
80 int i_predict4x4[4][4];
86 /* II: Inter part P/B frame */
89 x264_mb_analysis_list_t l0;
90 x264_mb_analysis_list_t l1;
92 int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
96 static const int i_qp0_cost_table[52] = {
97 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
98 1, 1, 1, 1, /* 8-11 */
99 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
100 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
101 6, 7, 8, 9,10,11,13,14, /* 28-35 */
102 16,18,20,23,25,29,32,36, /* 36-43 */
103 40,45,51,57,64,72,81,91 /* 44-51 */
106 static const uint8_t block_idx_x[16] = {
107 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
109 static const uint8_t block_idx_y[16] = {
110 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
113 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
115 memset( a, 0, sizeof( x264_mb_analysis_t ) );
117 /* conduct the analysis using this lamda and QP */
119 a->i_lambda = i_qp0_cost_table[i_qp];
122 a->i_sad_i16x16 = -1;
126 /* II: Inter part P/B frame */
127 if( h->sh.i_type != SLICE_TYPE_I )
132 /* Calculate max start MV range */
134 if( h->mb.i_mb_y < dmb )
136 if( h->sps->i_mb_width - h->mb.i_mb_x < dmb )
137 dmb = h->sps->i_mb_width - h->mb.i_mb_x;
138 if( h->sps->i_mb_height - h->mb.i_mb_y < dmb )
139 dmb = h->sps->i_mb_height - h->mb.i_mb_y;
141 a->i_mv_range = 16*dmb + 8;
143 a->l0.me16x16.cost = -1;
144 a->l0.i_cost8x8 = -1;
146 for( i = 0; i < 4; i++ )
148 a->l0.i_cost4x4[i] = -1;
149 a->l0.i_cost8x4[i] = -1;
150 a->l0.i_cost4x8[i] = -1;
153 a->l0.i_cost16x8 = -1;
154 a->l0.i_cost8x16 = -1;
155 if( h->sh.i_type == SLICE_TYPE_B )
157 a->l1.me16x16.cost = -1;
158 a->l1.i_cost8x8 = -1;
160 for( i = 0; i < 4; i++ )
162 a->l1.i_cost4x4[i] = -1;
163 a->l1.i_cost8x4[i] = -1;
164 a->l1.i_cost4x8[i] = -1;
167 a->l1.i_cost16x8 = -1;
168 a->l1.i_cost8x16 = -1;
170 a->i_cost16x16bi = -1;
181 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
183 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
185 /* top and left avaible */
186 *mode++ = I_PRED_16x16_V;
187 *mode++ = I_PRED_16x16_H;
188 *mode++ = I_PRED_16x16_DC;
189 *mode++ = I_PRED_16x16_P;
192 else if( ( i_neighbour & MB_LEFT ) )
195 *mode++ = I_PRED_16x16_DC_LEFT;
196 *mode++ = I_PRED_16x16_H;
199 else if( ( i_neighbour & MB_TOP ) )
202 *mode++ = I_PRED_16x16_DC_TOP;
203 *mode++ = I_PRED_16x16_V;
209 *mode = I_PRED_16x16_DC_128;
215 static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
217 if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
219 /* top and left avaible */
220 *mode++ = I_PRED_CHROMA_V;
221 *mode++ = I_PRED_CHROMA_H;
222 *mode++ = I_PRED_CHROMA_DC;
223 *mode++ = I_PRED_CHROMA_P;
226 else if( ( i_neighbour & MB_LEFT ) )
229 *mode++ = I_PRED_CHROMA_DC_LEFT;
230 *mode++ = I_PRED_CHROMA_H;
233 else if( ( i_neighbour & MB_TOP ) )
236 *mode++ = I_PRED_CHROMA_DC_TOP;
237 *mode++ = I_PRED_CHROMA_V;
243 *mode = I_PRED_CHROMA_DC_128;
249 static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
252 static const unsigned int needmb[16] =
254 MB_LEFT|MB_TOP, MB_TOP,
256 MB_TOP, MB_TOP|MB_TOPRIGHT,
264 /* FIXME even when b_c == 0 there is some case where missing pixels
265 * are emulated and thus more mode are available TODO
266 * analysis and encode should be fixed too */
267 b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
268 b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
269 b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
273 *mode++ = I_PRED_4x4_DC;
274 *mode++ = I_PRED_4x4_H;
275 *mode++ = I_PRED_4x4_V;
276 *mode++ = I_PRED_4x4_DDR;
277 *mode++ = I_PRED_4x4_VR;
278 *mode++ = I_PRED_4x4_HD;
279 *mode++ = I_PRED_4x4_HU;
285 *mode++ = I_PRED_4x4_DDL;
286 *mode++ = I_PRED_4x4_VL;
290 else if( b_a && !b_b )
292 *mode++ = I_PRED_4x4_DC_LEFT;
293 *mode++ = I_PRED_4x4_H;
296 else if( !b_a && b_b )
298 *mode++ = I_PRED_4x4_DC_TOP;
299 *mode++ = I_PRED_4x4_V;
304 *mode++ = I_PRED_4x4_DC_128;
309 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res )
311 const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
312 const int i_stride = h->mb.pic.i_stride[0];
313 uint8_t *p_src = h->mb.pic.p_fenc[0];
314 uint8_t *p_dst = h->mb.pic.p_fdec[0];
321 /*---------------- Try all mode and calculate their score ---------------*/
323 /* 16x16 prediction selection */
324 predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
325 for( i = 0; i < i_max; i++ )
330 i_mode = predict_mode[i];
332 /* we do the prediction */
333 h->predict_16x16[i_mode]( p_dst, i_stride );
335 /* we calculate the diff and get the square sum of the diff */
336 i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
337 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
338 /* if i_score is lower it is better */
339 if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
341 res->i_predict16x16 = i_mode;
342 res->i_sad_i16x16 = i_sad;
346 /* 4x4 prediction selection */
347 if( flags & X264_ANALYSE_I4x4 )
350 for( idx = 0; idx < 16; idx++ )
358 i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
359 x = block_idx_x[idx];
360 y = block_idx_y[idx];
362 p_src_by = p_src + 4 * x + 4 * y * i_stride;
363 p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
366 predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
367 for( i = 0; i < i_max; i++ )
372 i_mode = predict_mode[i];
374 /* we do the prediction */
375 h->predict_4x4[i_mode]( p_dst_by, i_stride );
377 /* we calculate diff and get the square sum of the diff */
378 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
379 p_src_by, i_stride );
381 i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
383 /* if i_score is lower it is better */
384 if( i_best == -1 || i_best > i_sad )
386 res->i_predict4x4[x][y] = i_mode;
390 res->i_sad_i4x4 += i_best;
392 /* we need to encode this mb now (for next ones) */
393 h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
394 x264_mb_encode_i4x4( h, idx, res->i_qp );
396 /* we need to store the 'fixed' version */
397 h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
398 x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
400 res->i_sad_i4x4 += res->i_lambda * 24; /* from JVT (SATD0) */
404 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
411 uint8_t *p_dstc[2], *p_srcc[2];
414 /* 8x8 prediction selection for chroma */
415 p_dstc[0] = h->mb.pic.p_fdec[1];
416 p_dstc[1] = h->mb.pic.p_fdec[2];
417 p_srcc[0] = h->mb.pic.p_fenc[1];
418 p_srcc[1] = h->mb.pic.p_fenc[2];
420 i_stride[0] = h->mb.pic.i_stride[1];
421 i_stride[1] = h->mb.pic.i_stride[2];
423 predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
424 res->i_sad_i8x8 = -1;
425 for( i = 0; i < i_max; i++ )
430 i_mode = predict_mode[i];
432 /* we do the prediction */
433 h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
434 h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
436 /* we calculate the cost */
437 i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
438 p_srcc[0], i_stride[0] ) +
439 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
440 p_srcc[1], i_stride[1] ) +
441 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
443 /* if i_score is lower it is better */
444 if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
446 res->i_predict8x8 = i_mode;
447 res->i_sad_i8x8 = i_sad;
452 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
457 /* 16x16 Search on all ref frame */
458 m.i_pixel = PIXEL_16x16;
460 m.p_fenc = h->mb.pic.p_fenc[0];
461 m.i_stride= h->mb.pic.i_stride[0];
462 m.i_mv_range = a->i_mv_range;
468 m.p_fref = h->mb.pic.p_fref[0][0][0];
469 x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
470 x264_me_search( h, &m );
475 for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
477 /* search with ref */
478 m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
479 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
480 x264_me_search( h, &m );
483 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
485 if( m.cost < a->l0.me16x16.cost )
492 /* Set global ref, needed for all others modes */
493 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
496 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
498 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
499 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
503 /* XXX Needed for x264_mb_predict_mv */
504 h->mb.i_partition = D_8x8;
506 for( i = 0; i < 4; i++ )
508 x264_me_t *m = &a->l0.me8x8[i];
512 m->i_pixel = PIXEL_8x8;
515 m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
516 m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
517 m->i_stride= h->mb.pic.i_stride[0];
518 m->i_mv_range = a->i_mv_range;
523 m->mvc[0] = a->l0.me16x16.mv[0];
524 m->mvc[1] = a->l0.me16x16.mv[1];
531 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
532 x264_me_search( h, m );
534 x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
537 a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
538 a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
541 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
543 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
544 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
548 /* XXX Needed for x264_mb_predict_mv */
549 h->mb.i_partition = D_16x8;
551 for( i = 0; i < 2; i++ )
553 x264_me_t *m = &a->l0.me16x8[i];
555 m->i_pixel = PIXEL_16x8;
558 m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
559 m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
560 m->i_stride= h->mb.pic.i_stride[0];
561 m->i_mv_range = a->i_mv_range;
564 m->mvc[0] = a->l0.me8x8[2*i].mv[0];
565 m->mvc[1] = a->l0.me8x8[2*i].mv[1];
567 x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
568 x264_me_search( h, m );
570 x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
573 a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
576 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
578 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
579 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
583 /* XXX Needed for x264_mb_predict_mv */
584 h->mb.i_partition = D_8x16;
586 for( i = 0; i < 2; i++ )
588 x264_me_t *m = &a->l0.me8x16[i];
590 m->i_pixel = PIXEL_8x16;
593 m->p_fenc = &p_fenc[8*i];
594 m->p_fref = &p_fref[8*i];
595 m->i_stride= h->mb.pic.i_stride[0];
596 m->i_mv_range = a->i_mv_range;
599 m->mvc[0] = a->l0.me8x8[i].mv[0];
600 m->mvc[1] = a->l0.me8x8[i].mv[1];
602 x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
603 x264_me_search( h, m );
605 x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
608 a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
611 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
613 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
614 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
618 /* XXX Needed for x264_mb_predict_mv */
619 h->mb.i_partition = D_8x8;
621 for( i4x4 = 0; i4x4 < 4; i4x4++ )
623 const int idx = 4*i8x8 + i4x4;
624 const int x4 = block_idx_x[idx];
625 const int y4 = block_idx_y[idx];
627 x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
629 m->i_pixel = PIXEL_4x4;
632 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
633 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
634 m->i_stride= h->mb.pic.i_stride[0];
635 m->i_mv_range = a->i_mv_range;
640 m->mvc[0] = a->l0.me8x8[i8x8].mv[0];
641 m->mvc[1] = a->l0.me8x8[i8x8].mv[1];
648 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
649 x264_me_search( h, m );
651 x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
654 a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
655 a->l0.me4x4[i8x8][1].cost +
656 a->l0.me4x4[i8x8][2].cost +
657 a->l0.me4x4[i8x8][3].cost;
660 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
662 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
663 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
667 /* XXX Needed for x264_mb_predict_mv */
668 h->mb.i_partition = D_8x8;
670 for( i8x4 = 0; i8x4 < 2; i8x4++ )
672 const int idx = 4*i8x8 + 2*i8x4;
673 const int x4 = block_idx_x[idx];
674 const int y4 = block_idx_y[idx];
676 x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
678 m->i_pixel = PIXEL_8x4;
681 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
682 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
683 m->i_stride= h->mb.pic.i_stride[0];
684 m->i_mv_range = a->i_mv_range;
689 m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0];
690 m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1];
697 x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
698 x264_me_search( h, m );
700 x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
703 a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost;
706 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
708 uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
709 uint8_t *p_fenc = h->mb.pic.p_fenc[0];
713 /* XXX Needed for x264_mb_predict_mv */
714 h->mb.i_partition = D_8x8;
716 for( i4x8 = 0; i4x8 < 2; i4x8++ )
718 const int idx = 4*i8x8 + i4x8;
719 const int x4 = block_idx_x[idx];
720 const int y4 = block_idx_y[idx];
722 x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
724 m->i_pixel = PIXEL_4x8;
727 m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
728 m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
729 m->i_stride= h->mb.pic.i_stride[0];
730 m->i_mv_range = a->i_mv_range;
735 m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0];
736 m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1];
743 x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
744 x264_me_search( h, m );
746 x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
749 a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost;
753 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
755 uint8_t pix1[16*16], pix2[16*16];
760 /* 16x16 Search on all ref frame */
761 m.i_pixel = PIXEL_16x16;
763 m.p_fenc = h->mb.pic.p_fenc[0];
764 m.i_stride= h->mb.pic.i_stride[0];
766 m.i_mv_range = a->i_mv_range;
768 /* ME for List 0 ref 0 */
769 m.p_fref = h->mb.pic.p_fref[0][0][0];
770 x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
771 x264_me_search( h, &m );
776 for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
778 /* search with ref */
779 m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
780 x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
781 x264_me_search( h, &m );
784 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
786 if( m.cost < a->l0.me16x16.cost )
793 /* ME for list 1 ref 0 */
794 m.p_fref = h->mb.pic.p_fref[1][0][0];
795 x264_mb_predict_mv_16x16( h, 1, 0, m.mvp );
796 x264_me_search( h, &m );
801 for( i_ref = 1; i_ref < h->i_ref1; i_ref++ )
803 /* search with ref */
804 m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
805 x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
806 x264_me_search( h, &m );
809 m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
811 if( m.cost < a->l1.me16x16.cost )
818 /* Set global ref, needed for all others modes FIXME some work for mixed block mode */
819 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
820 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
822 /* get cost of BI mode */
823 h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
825 a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
827 h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
829 a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
831 h->pixf.avg[PIXEL_16x16]( pix1, 16, pix2, 16 );
833 a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 ) +
834 a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref ) +
835 bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) +
836 bs_size_se( a->l0.me16x16.mv[0] - a->l0.me16x16.mvp[0] ) +
837 bs_size_se( a->l0.me16x16.mv[1] - a->l0.me16x16.mvp[1] ) +
838 bs_size_se( a->l1.me16x16.mv[0] - a->l1.me16x16.mvp[0] ) +
839 bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
842 /*****************************************************************************
843 * x264_macroblock_analyse:
844 *****************************************************************************/
845 void x264_macroblock_analyse( x264_t *h )
847 x264_mb_analysis_t analysis;
850 h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp(h);
852 /* FIXME check if it's 12 */
853 if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp < -12 )
854 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp - 12;
855 else if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp > 12 )
856 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp + 12;
859 x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
861 /*--------------------------- Do the analysis ---------------------------*/
862 if( h->sh.i_type == SLICE_TYPE_I )
864 x264_mb_analyse_intra( h, &analysis );
866 if( analysis.i_sad_i4x4 >= 0 && analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
867 h->mb.i_type = I_4x4;
869 h->mb.i_type = I_16x16;
871 else if( h->sh.i_type == SLICE_TYPE_P )
873 const unsigned int i_neighbour = h->mb.i_neighbour;
878 /* Fast P_SKIP detection */
879 if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) ||
880 ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) ||
881 ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) ||
882 ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) )
884 b_skip = x264_macroblock_probe_pskip( h );
889 h->mb.i_type = P_SKIP;
890 h->mb.i_partition = D_16x16;
894 const unsigned int flags = h->param.analyse.inter;
898 x264_mb_analyse_inter_p16x16( h, &analysis );
899 if( flags & X264_ANALYSE_PSUB16x16 )
900 x264_mb_analyse_inter_p8x8( h, &analysis );
902 /* Select best inter mode */
904 i_partition = D_16x16;
905 i_cost = analysis.l0.me16x16.cost;
907 if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
908 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
914 h->mb.i_sub_partition[0] = D_L0_8x8;
915 h->mb.i_sub_partition[1] = D_L0_8x8;
916 h->mb.i_sub_partition[2] = D_L0_8x8;
917 h->mb.i_sub_partition[3] = D_L0_8x8;
919 i_cost = analysis.l0.i_cost8x8;
922 if( flags & X264_ANALYSE_PSUB8x8 )
924 for( i = 0; i < 4; i++ )
926 x264_mb_analyse_inter_p4x4( h, &analysis, i );
927 if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
931 h->mb.i_sub_partition[i] = D_L0_4x4;
932 i_cost8x8 = analysis.l0.i_cost4x4[i];
934 x264_mb_analyse_inter_p8x4( h, &analysis, i );
935 if( analysis.l0.i_cost8x4[i] < analysis.l0.i_cost4x4[i] )
937 h->mb.i_sub_partition[i] = D_L0_8x4;
938 i_cost8x8 = analysis.l0.i_cost8x4[i];
941 x264_mb_analyse_inter_p4x8( h, &analysis, i );
942 if( analysis.l0.i_cost4x8[i] < analysis.l0.i_cost4x4[i] )
944 h->mb.i_sub_partition[i] = D_L0_4x8;
945 i_cost8x8 = analysis.l0.i_cost4x8[i];
948 i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
953 /* Now do sub 16x8/8x16 */
954 x264_mb_analyse_inter_p16x8( h, &analysis );
955 if( analysis.l0.i_cost16x8 < i_cost )
958 i_partition = D_16x8;
959 i_cost = analysis.l0.i_cost16x8;
962 x264_mb_analyse_inter_p8x16( h, &analysis );
963 if( analysis.l0.i_cost8x16 < i_cost )
966 i_partition = D_8x16;
967 i_cost = analysis.l0.i_cost8x16;
971 h->mb.i_type = i_type;
972 h->mb.i_partition = i_partition;
975 if( h->mb.i_partition == D_16x16 )
977 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
978 i_cost = analysis.l0.me16x16.cost;
980 else if( h->mb.i_partition == D_16x8 )
982 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
983 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
984 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
986 else if( h->mb.i_partition == D_8x16 )
988 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
989 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
990 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
992 else if( h->mb.i_partition == D_8x8 )
996 for( i8x8 = 0; i8x8 < 4; i8x8++ )
998 switch( h->mb.i_sub_partition[i8x8] )
1001 x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1002 i_cost += analysis.l0.me8x8[i8x8].cost;
1005 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1006 x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1007 i_cost += analysis.l0.me8x4[i8x8][0].cost +
1008 analysis.l0.me8x4[i8x8][1].cost;
1011 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1012 x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1013 i_cost += analysis.l0.me4x8[i8x8][0].cost +
1014 analysis.l0.me4x8[i8x8][1].cost;
1018 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1019 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1020 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1021 x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1022 i_cost += analysis.l0.me4x4[i8x8][0].cost +
1023 analysis.l0.me4x4[i8x8][1].cost +
1024 analysis.l0.me4x4[i8x8][2].cost +
1025 analysis.l0.me4x4[i8x8][3].cost;
1028 fprintf( stderr, "internal error (!8x8 && !4x4)" );
1034 x264_mb_analyse_intra( h, &analysis );
1035 if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1037 h->mb.i_type = I_16x16;
1038 i_cost = analysis.i_sad_i16x16;
1041 if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1043 h->mb.i_type = I_4x4;
1044 i_cost = analysis.i_sad_i4x4;
1048 else if( h->sh.i_type == SLICE_TYPE_B )
1052 /* best inter mode */
1053 x264_mb_analyse_inter_b16x16( h, &analysis );
1054 h->mb.i_type = B_L0_L0;
1055 h->mb.i_partition = D_16x16;
1056 i_cost = analysis.l0.me16x16.cost;
1058 if( analysis.l1.me16x16.cost < i_cost )
1060 h->mb.i_type = B_L1_L1;
1061 i_cost = analysis.l1.me16x16.cost;
1063 if( analysis.i_cost16x16bi < i_cost )
1065 h->mb.i_type = B_BI_BI;
1066 i_cost = analysis.i_cost16x16bi;
1069 /* best intra mode */
1070 x264_mb_analyse_intra( h, &analysis );
1071 if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1073 h->mb.i_type = I_16x16;
1074 i_cost = analysis.i_sad_i16x16;
1076 if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1078 h->mb.i_type = I_4x4;
1079 i_cost = analysis.i_sad_i4x4;
1084 /*-------------------- Update MB from the analysis ----------------------*/
1085 h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
1086 switch( h->mb.i_type )
1089 for( i = 0; i < 16; i++ )
1091 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1092 analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1095 x264_mb_analyse_intra_chroma( h, &analysis );
1096 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1099 h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
1101 x264_mb_analyse_intra_chroma( h, &analysis );
1102 h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1106 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1107 switch( h->mb.i_partition )
1110 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1114 x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].mv[0], analysis.l0.me16x8[0].mv[1] );
1115 x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].mv[0], analysis.l0.me16x8[1].mv[1] );
1119 x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].mv[0], analysis.l0.me8x16[0].mv[1] );
1120 x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].mv[0], analysis.l0.me8x16[1].mv[1] );
1124 fprintf( stderr, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
1130 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1131 for( i = 0; i < 4; i++ )
1133 const int x = 2*(i%2);
1134 const int y = 2*(i/2);
1136 switch( h->mb.i_sub_partition[i] )
1139 x264_macroblock_cache_mv( h, x, y, 2, 2, 0, analysis.l0.me8x8[i].mv[0], analysis.l0.me8x8[i].mv[1] );
1142 x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, analysis.l0.me8x4[i][0].mv[0], analysis.l0.me8x4[i][0].mv[1] );
1143 x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, analysis.l0.me8x4[i][1].mv[0], analysis.l0.me8x4[i][1].mv[1] );
1146 x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, analysis.l0.me4x8[i][0].mv[0], analysis.l0.me4x8[i][0].mv[1] );
1147 x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, analysis.l0.me4x8[i][1].mv[0], analysis.l0.me4x8[i][1].mv[1] );
1150 x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, analysis.l0.me4x4[i][0].mv[0], analysis.l0.me4x4[i][0].mv[1] );
1151 x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, analysis.l0.me4x4[i][1].mv[0], analysis.l0.me4x4[i][1].mv[1] );
1152 x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, analysis.l0.me4x4[i][2].mv[0], analysis.l0.me4x4[i][2].mv[1] );
1153 x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, analysis.l0.me4x4[i][3].mv[0], analysis.l0.me4x4[i][3].mv[1] );
1156 fprintf( stderr, "internal error\n" );
1165 x264_mb_predict_mv_pskip( h, mvp );
1167 h->mb.i_partition = D_16x16;
1168 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
1169 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
1174 switch( h->mb.i_partition )
1177 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1178 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1180 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
1181 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0, 0 );
1182 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0, 0 );
1185 fprintf( stderr, "internal error\n" );
1190 switch( h->mb.i_partition )
1193 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
1194 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0, 0 );
1195 x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0, 0 );
1197 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1198 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1202 fprintf( stderr, "internal error\n" );
1207 switch( h->mb.i_partition )
1210 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1211 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1213 x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1214 x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1218 fprintf( stderr, "internal error\n" );
1224 fprintf( stderr, "internal error (invalid MB type)\n" );