git.sesse.net Git - x264/blob - encoder/analyse.c

   1 /*****************************************************************************
   2  * analyse.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 x264 project
   5  * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *          Loren Merritt <lorenm@u.washington.edu>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include <math.h>
  29 #include <limits.h>
  30
  31 #include "common/common.h"
  32 #include "common/macroblock.h"
  33 #include "macroblock.h"
  34 #include "me.h"
  35 #include "ratecontrol.h"
  36 #include "analyse.h"
  37 #include "rdo.c"
  38
  39 typedef struct
  40 {
  41     /* 16x16 */
  42     int i_ref;
  43     x264_me_t me16x16;
  44
  45     /* 8x8 */
  46     int       i_cost8x8;
  47     x264_me_t me8x8[4];
  48
  49     /* Sub 4x4 */
  50     int       i_cost4x4[4]; /* cost per 8x8 partition */
  51     x264_me_t me4x4[4][4];
  52
  53     /* Sub 8x4 */
  54     int       i_cost8x4[4]; /* cost per 8x8 partition */
  55     x264_me_t me8x4[4][2];
  56
  57     /* Sub 4x8 */
  58     int       i_cost4x8[4]; /* cost per 8x8 partition */
  59     x264_me_t me4x8[4][4];
  60
  61     /* 16x8 */
  62     int       i_cost16x8;
  63     x264_me_t me16x8[2];
  64
  65     /* 8x16 */
  66     int       i_cost8x16;
  67     x264_me_t me8x16[2];
  68
  69 } x264_mb_analysis_list_t;
  70
  71 typedef struct
  72 {
  73     /* conduct the analysis using this lamda and QP */
  74     int i_lambda;
  75     int i_lambda2;
  76     int i_qp;
  77     int16_t *p_cost_mv;
  78     int b_mbrd;
  79
  80
  81     /* I: Intra part */
  82     /* Take some shortcuts in intra search if intra is deemed unlikely */
  83     int b_fast_intra;
  84     int i_best_satd;
  85
  86     /* Luma part */
  87     int i_sad_i16x16;
  88     int i_predict16x16;
  89
  90     int i_sad_i8x8;
  91     int i_predict8x8[2][2];
  92
  93     int i_sad_i4x4;
  94     int i_predict4x4[4][4];
  95
  96     /* Chroma part */
  97     int i_sad_i8x8chroma;
  98     int i_predict8x8chroma;
  99
 100     /* II: Inter part P/B frame */
 101     x264_mb_analysis_list_t l0;
 102     x264_mb_analysis_list_t l1;
 103
 104     int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
 105     int i_cost16x16direct;
 106     int i_cost8x8bi;
 107     int i_cost8x8direct[4];
 108     int i_cost16x8bi;
 109     int i_cost8x16bi;
 110
 111     int i_mb_partition16x8[2]; /* mb_partition_e */
 112     int i_mb_partition8x16[2];
 113     int i_mb_type16x8; /* mb_class_e */
 114     int i_mb_type8x16;
 115
 116     int b_direct_available;
 117
 118 } x264_mb_analysis_t;
 119
 120 /* lambda = pow(2,qp/6-2) */
 121 static const int i_qp0_cost_table[52] = {
 122    1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
 123    1, 1, 1, 1,              /*  8-11 */
 124    1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
 125    3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
 126    6, 7, 8, 9,10,11,13,14,  /* 28-35 */
 127   16,18,20,23,25,29,32,36,  /* 36-43 */
 128   40,45,51,57,64,72,81,91   /* 44-51 */
 129 };
 130
 131 /* pow(lambda,2) * .9 */
 132 static const int i_qp0_cost2_table[52] = {
 133    1,   1,   1,   1,   1,   1, /*  0-5  */
 134    1,   1,   1,   1,   1,   1, /*  6-11 */
 135    1,   1,   1,   2,   2,   3, /* 12-17 */
 136    4,   5,   6,   7,   9,  11, /* 18-23 */
 137   14,  18,  23,  29,  36,  46, /* 24-29 */
 138   58,  73,  91, 115, 145, 183, /* 30-35 */
 139  230, 290, 366, 461, 581, 731, /* 36-41 */
 140  922,1161,1463,1843,2322,2926, /* 42-47 */
 141 3686,4645,5852,7373
 142 };
 143
 144 static const uint8_t block_idx_x[16] = {
 145     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
 146 };
 147 static const uint8_t block_idx_y[16] = {
 148     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
 149 };
 150
 151 /* TODO: calculate CABAC costs */
 152 static const int i_mb_b_cost_table[19] = {
 153     9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
 154 };
 155 static const int i_mb_b16x8_cost_table[16] = {
 156     0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
 157 };
 158 static const int i_sub_mb_b_cost_table[13] = {
 159     7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
 160 };
 161 static const int i_sub_mb_p_cost_table[4] = {
 162     5, 3, 3, 1
 163 };
 164
 165 static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
 166
 167 /* initialize an array of lambda*nbits for all possible mvs */
 168 static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
 169 {
 170     static int16_t *p_cost_mv[52];
 171
 172     if( !p_cost_mv[a->i_qp] )
 173     {
 174         /* could be faster, but isn't called many times */
 175         /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
 176         int i;
 177         p_cost_mv[a->i_qp] = x264_malloc( (4*4*h->param.analyse.i_mv_range + 1) * sizeof(int16_t) );
 178         p_cost_mv[a->i_qp] += 2*4*h->param.analyse.i_mv_range;
 179         for( i = 0; i <= 2*4*h->param.analyse.i_mv_range; i++ )
 180         {
 181             p_cost_mv[a->i_qp][-i] =
 182             p_cost_mv[a->i_qp][i]  = a->i_lambda * bs_size_se( i );
 183         }
 184     }
 185
 186     a->p_cost_mv = p_cost_mv[a->i_qp];
 187 }
 188
 189 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
 190 {
 191     memset( a, 0, sizeof( x264_mb_analysis_t ) );
 192
 193     /* conduct the analysis using this lamda and QP */
 194     a->i_qp = i_qp;
 195     a->i_lambda = i_qp0_cost_table[i_qp];
 196     a->i_lambda2 = i_qp0_cost2_table[i_qp];
 197     a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 && h->sh.i_type != SLICE_TYPE_B;
 198
 199     h->mb.i_me_method = h->param.analyse.i_me_method;
 200     h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
 201     h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
 202                         && h->mb.i_subpel_refine >= 5;
 203
 204     h->mb.b_transform_8x8 = 0;
 205
 206     /* I: Intra part */
 207     a->i_sad_i16x16 =
 208     a->i_sad_i8x8   =
 209     a->i_sad_i4x4   =
 210     a->i_sad_i8x8chroma = COST_MAX;
 211
 212     a->b_fast_intra = 0;
 213     a->i_best_satd = COST_MAX;
 214
 215     /* II: Inter part P/B frame */
 216     if( h->sh.i_type != SLICE_TYPE_I )
 217     {
 218         int i;
 219         int i_fmv_range = h->param.analyse.i_mv_range - 16;
 220
 221         /* Calculate max allowed MV range */
 222 #define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range )
 223         h->mb.mv_min_fpel[0] = CLIP_FMV( -16*h->mb.i_mb_x - 8 );
 224         h->mb.mv_max_fpel[0] = CLIP_FMV( 16*( h->sps->i_mb_width - h->mb.i_mb_x ) - 8 );
 225         h->mb.mv_min[0] = 4*( h->mb.mv_min_fpel[0] - 16 );
 226         h->mb.mv_max[0] = 4*( h->mb.mv_max_fpel[0] + 16 );
 227         if( h->mb.i_mb_x == 0)
 228         {
 229             h->mb.mv_min_fpel[1] = CLIP_FMV( -16*h->mb.i_mb_y - 8 );
 230             h->mb.mv_max_fpel[1] = CLIP_FMV( 16*( h->sps->i_mb_height - h->mb.i_mb_y ) - 8 );
 231             h->mb.mv_min[1] = 4*( h->mb.mv_min_fpel[1] - 16 );
 232             h->mb.mv_max[1] = 4*( h->mb.mv_max_fpel[1] + 16 );
 233         }
 234 #undef CLIP_FMV
 235
 236         a->l0.me16x16.cost =
 237         a->l0.i_cost8x8    = COST_MAX;
 238
 239         for( i = 0; i < 4; i++ )
 240         {
 241             a->l0.i_cost4x4[i] =
 242             a->l0.i_cost8x4[i] =
 243             a->l0.i_cost4x8[i] = COST_MAX;
 244         }
 245
 246         a->l0.i_cost16x8   =
 247         a->l0.i_cost8x16   = COST_MAX;
 248         if( h->sh.i_type == SLICE_TYPE_B )
 249         {
 250             a->l1.me16x16.cost =
 251             a->l1.i_cost8x8    = COST_MAX;
 252
 253             for( i = 0; i < 4; i++ )
 254             {
 255                 a->l1.i_cost4x4[i] =
 256                 a->l1.i_cost8x4[i] =
 257                 a->l1.i_cost4x8[i] =
 258                 a->i_cost8x8direct[i] = COST_MAX;
 259             }
 260
 261             a->l1.i_cost16x8   =
 262             a->l1.i_cost8x16   =
 263
 264             a->i_cost16x16bi   =
 265             a->i_cost16x16direct =
 266             a->i_cost8x8bi     =
 267             a->i_cost16x8bi    =
 268             a->i_cost8x16bi    = COST_MAX;
 269         }
 270
 271         /* Fast intra decision */
 272         if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
 273         {
 274             if( a->b_mbrd
 275                || IS_INTRA( h->mb.i_mb_type_left )
 276                || IS_INTRA( h->mb.i_mb_type_top )
 277                || IS_INTRA( h->mb.i_mb_type_topleft )
 278                || IS_INTRA( h->mb.i_mb_type_topright )
 279                || (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] ))
 280                || (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) )
 281             { /* intra is likely */ }
 282             else
 283             {
 284                 a->b_fast_intra = 1;
 285             }
 286         }
 287     }
 288 }
 289
 290
 291
 292 /*
 293  * Handle intra mb
 294  */
 295 /* Max = 4 */
 296 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 297 {
 298     if( i_neighbour & MB_TOPLEFT )
 299     {
 300         /* top and left avaible */
 301         *mode++ = I_PRED_16x16_V;
 302         *mode++ = I_PRED_16x16_H;
 303         *mode++ = I_PRED_16x16_DC;
 304         *mode++ = I_PRED_16x16_P;
 305         *pi_count = 4;
 306     }
 307     else if( i_neighbour & MB_LEFT )
 308     {
 309         /* left available*/
 310         *mode++ = I_PRED_16x16_DC_LEFT;
 311         *mode++ = I_PRED_16x16_H;
 312         *pi_count = 2;
 313     }
 314     else if( i_neighbour & MB_TOP )
 315     {
 316         /* top available*/
 317         *mode++ = I_PRED_16x16_DC_TOP;
 318         *mode++ = I_PRED_16x16_V;
 319         *pi_count = 2;
 320     }
 321     else
 322     {
 323         /* none avaible */
 324         *mode = I_PRED_16x16_DC_128;
 325         *pi_count = 1;
 326     }
 327 }
 328
 329 /* Max = 4 */
 330 static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 331 {
 332     if( i_neighbour & MB_TOPLEFT )
 333     {
 334         /* top and left avaible */
 335         *mode++ = I_PRED_CHROMA_V;
 336         *mode++ = I_PRED_CHROMA_H;
 337         *mode++ = I_PRED_CHROMA_DC;
 338         *mode++ = I_PRED_CHROMA_P;
 339         *pi_count = 4;
 340     }
 341     else if( i_neighbour & MB_LEFT )
 342     {
 343         /* left available*/
 344         *mode++ = I_PRED_CHROMA_DC_LEFT;
 345         *mode++ = I_PRED_CHROMA_H;
 346         *pi_count = 2;
 347     }
 348     else if( i_neighbour & MB_TOP )
 349     {
 350         /* top available*/
 351         *mode++ = I_PRED_CHROMA_DC_TOP;
 352         *mode++ = I_PRED_CHROMA_V;
 353         *pi_count = 2;
 354     }
 355     else
 356     {
 357         /* none avaible */
 358         *mode = I_PRED_CHROMA_DC_128;
 359         *pi_count = 1;
 360     }
 361 }
 362
 363 /* MAX = 9 */
 364 static void predict_4x4_mode_available( unsigned int i_neighbour,
 365                                         int *mode, int *pi_count )
 366 {
 367     /* FIXME even when b_tr == 0 there is some case where missing pixels
 368      * are emulated and thus more mode are available TODO
 369      * analysis and encode should be fixed too */
 370     int b_l = i_neighbour & MB_LEFT;
 371     int b_t = i_neighbour & MB_TOP;
 372     int b_tr = i_neighbour & MB_TOPRIGHT;
 373
 374     if( b_l && b_t )
 375     {
 376         *mode++ = I_PRED_4x4_DC;
 377         *mode++ = I_PRED_4x4_H;
 378         *mode++ = I_PRED_4x4_V;
 379         *mode++ = I_PRED_4x4_DDR;
 380         *mode++ = I_PRED_4x4_VR;
 381         *mode++ = I_PRED_4x4_HD;
 382         *mode++ = I_PRED_4x4_HU;
 383         *pi_count = 7;
 384     }
 385     else if( b_l )
 386     {
 387         *mode++ = I_PRED_4x4_DC_LEFT;
 388         *mode++ = I_PRED_4x4_H;
 389         *mode++ = I_PRED_4x4_HU;
 390         *pi_count = 3;
 391     }
 392     else if( b_t )
 393     {
 394         *mode++ = I_PRED_4x4_DC_TOP;
 395         *mode++ = I_PRED_4x4_V;
 396         *pi_count = 2;
 397     }
 398     else
 399     {
 400         *mode++ = I_PRED_4x4_DC_128;
 401         *pi_count = 1;
 402     }
 403
 404     if( b_t && b_tr )
 405     {
 406         *mode++ = I_PRED_4x4_DDL;
 407         *mode++ = I_PRED_4x4_VL;
 408         (*pi_count) += 2;
 409     }
 410 }
 411
 412 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
 413 {
 414     int i;
 415
 416     int i_max;
 417     int predict_mode[9];
 418
 419     uint8_t *p_dstc[2], *p_srcc[2];
 420     int      i_stride[2];
 421
 422     if( a->i_sad_i8x8chroma < COST_MAX )
 423         return;
 424
 425     /* 8x8 prediction selection for chroma */
 426     p_dstc[0] = h->mb.pic.p_fdec[1];
 427     p_dstc[1] = h->mb.pic.p_fdec[2];
 428     p_srcc[0] = h->mb.pic.p_fenc[1];
 429     p_srcc[1] = h->mb.pic.p_fenc[2];
 430
 431     i_stride[0] = h->mb.pic.i_stride[1];
 432     i_stride[1] = h->mb.pic.i_stride[2];
 433
 434     predict_8x8chroma_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 435     a->i_sad_i8x8chroma = COST_MAX;
 436     for( i = 0; i < i_max; i++ )
 437     {
 438         int i_sad;
 439         int i_mode;
 440
 441         i_mode = predict_mode[i];
 442
 443         /* we do the prediction */
 444         h->predict_8x8c[i_mode]( p_dstc[0], i_stride[0] );
 445         h->predict_8x8c[i_mode]( p_dstc[1], i_stride[1] );
 446
 447         /* we calculate the cost */
 448         i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], i_stride[0],
 449                                           p_srcc[0], i_stride[0] ) +
 450                 h->pixf.mbcmp[PIXEL_8x8]( p_dstc[1], i_stride[1],
 451                                           p_srcc[1], i_stride[1] ) +
 452                 a->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
 453
 454         /* if i_score is lower it is better */
 455         if( a->i_sad_i8x8chroma > i_sad )
 456         {
 457             a->i_predict8x8chroma = i_mode;
 458             a->i_sad_i8x8chroma   = i_sad;
 459         }
 460     }
 461
 462     h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
 463 }
 464
 465 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_inter )
 466 {
 467     const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
 468     const int i_stride = h->mb.pic.i_stride[0];
 469     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 470     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 471     int      f8_satd_rd_ratio = 0;
 472
 473     int i, idx;
 474     int i_max;
 475     int predict_mode[9];
 476
 477     const int i_satd_thresh = a->i_best_satd * 5/4 + a->i_lambda * 10;
 478
 479     /*---------------- Try all mode and calculate their score ---------------*/
 480
 481     /* 16x16 prediction selection */
 482     predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 483     for( i = 0; i < i_max; i++ )
 484     {
 485         int i_sad;
 486         int i_mode;
 487
 488         i_mode = predict_mode[i];
 489         h->predict_16x16[i_mode]( p_dst, i_stride );
 490
 491         i_sad = h->pixf.mbcmp[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
 492                 a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
 493         if( a->i_sad_i16x16 > i_sad )
 494         {
 495             a->i_predict16x16 = i_mode;
 496             a->i_sad_i16x16   = i_sad;
 497         }
 498     }
 499
 500     if( a->b_mbrd )
 501     {
 502         f8_satd_rd_ratio = ((unsigned)i_cost_inter << 8) / a->i_best_satd + 1;
 503         x264_mb_analyse_intra_chroma( h, a );
 504         if( h->mb.b_chroma_me )
 505             a->i_sad_i16x16 += a->i_sad_i8x8chroma;
 506         if( a->i_sad_i16x16 < i_satd_thresh )
 507         {
 508             h->mb.i_type = I_16x16;
 509             h->mb.i_intra16x16_pred_mode = a->i_predict16x16;
 510             a->i_sad_i16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
 511         }
 512         else
 513             a->i_sad_i16x16 = a->i_sad_i16x16 * f8_satd_rd_ratio >> 8;
 514     }
 515     else
 516     {
 517         if( h->sh.i_type == SLICE_TYPE_B )
 518             /* cavlc mb type prefix */
 519             a->i_sad_i16x16 += a->i_lambda * i_mb_b_cost_table[I_16x16];
 520         if( a->b_fast_intra && a->i_sad_i16x16 > 2*i_cost_inter )
 521             return;
 522     }
 523
 524     /* 4x4 prediction selection */
 525     if( flags & X264_ANALYSE_I4x4 )
 526     {
 527         a->i_sad_i4x4 = 0;
 528         for( idx = 0; idx < 16; idx++ )
 529         {
 530             uint8_t *p_src_by;
 531             uint8_t *p_dst_by;
 532             int     i_best;
 533             int x, y;
 534             int i_pred_mode;
 535
 536             i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
 537             x = block_idx_x[idx];
 538             y = block_idx_y[idx];
 539
 540             p_src_by = p_src + 4 * x + 4 * y * i_stride;
 541             p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
 542
 543             i_best = COST_MAX;
 544             predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );
 545             for( i = 0; i < i_max; i++ )
 546             {
 547                 int i_sad;
 548                 int i_mode;
 549
 550                 i_mode = predict_mode[i];
 551                 h->predict_4x4[i_mode]( p_dst_by, i_stride );
 552
 553                 i_sad = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, i_stride,
 554                                                   p_src_by, i_stride )
 555                       + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
 556
 557                 if( i_best > i_sad )
 558                 {
 559                     a->i_predict4x4[x][y] = i_mode;
 560                     i_best = i_sad;
 561                 }
 562             }
 563             a->i_sad_i4x4 += i_best;
 564
 565             /* we need to encode this block now (for next ones) */
 566             h->predict_4x4[a->i_predict4x4[x][y]]( p_dst_by, i_stride );
 567             x264_mb_encode_i4x4( h, idx, a->i_qp );
 568
 569             h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y];
 570         }
 571
 572         a->i_sad_i4x4 += a->i_lambda * 24;    /* from JVT (SATD0) */
 573         if( a->b_mbrd )
 574         {
 575             if( h->mb.b_chroma_me )
 576                 a->i_sad_i4x4 += a->i_sad_i8x8chroma;
 577             if( a->i_sad_i4x4 < i_satd_thresh )
 578             {
 579                 h->mb.i_type = I_4x4;
 580                 a->i_sad_i4x4 = x264_rd_cost_mb( h, a->i_lambda2 );
 581             }
 582             else
 583                 a->i_sad_i4x4 = a->i_sad_i4x4 * f8_satd_rd_ratio >> 8;
 584         }
 585         else
 586         {
 587             if( h->sh.i_type == SLICE_TYPE_B )
 588                 a->i_sad_i4x4 += a->i_lambda * i_mb_b_cost_table[I_4x4];
 589         }
 590     }
 591
 592     /* 8x8 prediction selection */
 593     if( flags & X264_ANALYSE_I8x8 )
 594     {
 595         a->i_sad_i8x8 = 0;
 596         for( idx = 0; idx < 4; idx++ )
 597         {
 598             uint8_t *p_src_by;
 599             uint8_t *p_dst_by;
 600             int     i_best;
 601             int x, y;
 602             int i_pred_mode;
 603
 604             i_pred_mode= x264_mb_predict_intra4x4_mode( h, 4*idx );
 605             x = idx&1;
 606             y = idx>>1;
 607
 608             p_src_by = p_src + 8 * x + 8 * y * i_stride;
 609             p_dst_by = p_dst + 8 * x + 8 * y * i_stride;
 610
 611             i_best = COST_MAX;
 612             predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max );
 613             for( i = 0; i < i_max; i++ )
 614             {
 615                 int i_sad;
 616                 int i_mode;
 617
 618                 i_mode = predict_mode[i];
 619                 h->predict_8x8[i_mode]( p_dst_by, i_stride, h->mb.i_neighbour );
 620
 621                 /* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */
 622                 i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dst_by, i_stride,
 623                                                   p_src_by, i_stride )
 624                       + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
 625
 626                 if( i_best > i_sad )
 627                 {
 628                     a->i_predict8x8[x][y] = i_mode;
 629                     i_best = i_sad;
 630                 }
 631             }
 632             a->i_sad_i8x8 += i_best;
 633
 634             /* we need to encode this block now (for next ones) */
 635             h->predict_8x8[a->i_predict8x8[x][y]]( p_dst_by, i_stride, h->mb.i_neighbour );
 636             x264_mb_encode_i8x8( h, idx, a->i_qp );
 637
 638             x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] );
 639         }
 640
 641         if( a->b_mbrd )
 642         {
 643             if( h->mb.b_chroma_me )
 644                 a->i_sad_i8x8 += a->i_sad_i8x8chroma;
 645             if( a->i_sad_i8x8 < i_satd_thresh )
 646             {
 647                 h->mb.i_type = I_8x8;
 648                 a->i_sad_i8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
 649             }
 650             else
 651                 a->i_sad_i8x8 = a->i_sad_i8x8 * f8_satd_rd_ratio >> 8;
 652         }
 653         else
 654         {
 655             // FIXME some bias like in i4x4?
 656             if( h->sh.i_type == SLICE_TYPE_B )
 657                 a->i_sad_i8x8 += a->i_lambda * i_mb_b_cost_table[I_8x8];
 658         }
 659     }
 660 }
 661
 662 #define LOAD_FENC( m, src, xoff, yoff) \
 663     (m)->i_stride[0] = h->mb.pic.i_stride[0]; \
 664     (m)->i_stride[1] = h->mb.pic.i_stride[1]; \
 665     (m)->p_fenc[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
 666     (m)->p_fenc[1] = &(src)[1][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
 667     (m)->p_fenc[2] = &(src)[2][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]];
 668 #define LOAD_HPELS(m, src, xoff, yoff) \
 669     (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
 670     (m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \
 671     (m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \
 672     (m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \
 673     (m)->p_fref[4] = &(src)[4][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
 674     (m)->p_fref[5] = &(src)[5][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]];
 675
 676 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
 677 {
 678     x264_me_t m;
 679     int i_ref;
 680     int mvc[4][2], i_mvc;
 681     int i_fullpel_thresh = INT_MAX;
 682     int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
 683
 684     /* 16x16 Search on all ref frame */
 685     m.i_pixel = PIXEL_16x16;
 686     m.p_cost_mv = a->p_cost_mv;
 687     LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
 688
 689     a->l0.me16x16.cost = INT_MAX;
 690     for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
 691     {
 692         const int i_ref_cost = a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
 693         i_fullpel_thresh -= i_ref_cost;
 694
 695         /* search with ref */
 696         LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, 0 );
 697         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
 698         x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
 699         x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
 700
 701         m.cost += i_ref_cost;
 702         i_fullpel_thresh += i_ref_cost;
 703
 704         if( m.cost < a->l0.me16x16.cost )
 705         {
 706             a->l0.i_ref = i_ref;
 707             a->l0.me16x16 = m;
 708         }
 709
 710         /* save mv for predicting neighbors */
 711         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
 712         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
 713     }
 714
 715     /* Set global ref, needed for all others modes */
 716     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
 717
 718     if( a->b_mbrd )
 719     {
 720         a->i_best_satd = a->l0.me16x16.cost;
 721         h->mb.i_type = P_L0;
 722         h->mb.i_partition = D_16x16;
 723         x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
 724         a->l0.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 );
 725     }
 726     else
 727     {
 728         /* subtract ref cost, so we don't have to add it for the other P types */
 729         a->l0.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
 730     }
 731 }
 732
 733 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
 734 {
 735     uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
 736     uint8_t  **p_fenc = h->mb.pic.p_fenc;
 737     int mvc[5][2], i_mvc;
 738     int i;
 739
 740     /* XXX Needed for x264_mb_predict_mv */
 741     h->mb.i_partition = D_8x8;
 742
 743     i_mvc = 1;
 744     mvc[0][0] = a->l0.me16x16.mv[0];
 745     mvc[0][1] = a->l0.me16x16.mv[1];
 746
 747     for( i = 0; i < 4; i++ )
 748     {
 749         x264_me_t *m = &a->l0.me8x8[i];
 750         const int x8 = i%2;
 751         const int y8 = i/2;
 752
 753         m->i_pixel = PIXEL_8x8;
 754         m->p_cost_mv = a->p_cost_mv;
 755
 756         LOAD_FENC( m, p_fenc, 8*x8, 8*y8 );
 757         LOAD_HPELS( m, p_fref, 8*x8, 8*y8 );
 758
 759         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 760         x264_me_search( h, m, mvc, i_mvc );
 761
 762         x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
 763
 764         mvc[i_mvc][0] = m->mv[0];
 765         mvc[i_mvc][1] = m->mv[1];
 766         i_mvc++;
 767
 768         /* mb type cost */
 769         m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
 770     }
 771
 772     a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
 773                       a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
 774     if( a->b_mbrd )
 775     {
 776         if( a->i_best_satd > a->l0.i_cost8x8 )
 777             a->i_best_satd = a->l0.i_cost8x8;
 778         h->mb.i_type = P_8x8;
 779         h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
 780         h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
 781         a->l0.i_cost8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
 782     }
 783 }
 784
 785 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
 786 {
 787     uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
 788     uint8_t  **p_fenc = h->mb.pic.p_fenc;
 789     int mvc[2][2];
 790     int i;
 791
 792     /* XXX Needed for x264_mb_predict_mv */
 793     h->mb.i_partition = D_16x8;
 794
 795     for( i = 0; i < 2; i++ )
 796     {
 797         x264_me_t *m = &a->l0.me16x8[i];
 798
 799         m->i_pixel = PIXEL_16x8;
 800         m->p_cost_mv = a->p_cost_mv;
 801
 802         LOAD_FENC( m, p_fenc, 0, 8*i );
 803         LOAD_HPELS( m, p_fref, 0, 8*i );
 804
 805         mvc[0][0] = a->l0.me8x8[2*i].mv[0];
 806         mvc[0][1] = a->l0.me8x8[2*i].mv[1];
 807         mvc[1][0] = a->l0.me8x8[2*i+1].mv[0];
 808         mvc[1][1] = a->l0.me8x8[2*i+1].mv[1];
 809
 810         x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
 811         x264_me_search( h, m, mvc, 2 );
 812
 813         x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
 814     }
 815
 816     a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
 817     if( a->b_mbrd )
 818     {
 819         if( a->i_best_satd > a->l0.i_cost16x8 )
 820             a->i_best_satd = a->l0.i_cost16x8;
 821         h->mb.i_type = P_L0;
 822         a->l0.i_cost16x8 = x264_rd_cost_mb( h, a->i_lambda2 );
 823     }
 824 }
 825
 826 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
 827 {
 828     uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
 829     uint8_t  **p_fenc = h->mb.pic.p_fenc;
 830     int mvc[2][2];
 831     int i;
 832
 833     /* XXX Needed for x264_mb_predict_mv */
 834     h->mb.i_partition = D_8x16;
 835
 836     for( i = 0; i < 2; i++ )
 837     {
 838         x264_me_t *m = &a->l0.me8x16[i];
 839
 840         m->i_pixel = PIXEL_8x16;
 841         m->p_cost_mv = a->p_cost_mv;
 842
 843         LOAD_FENC( m, p_fenc, 8*i, 0 );
 844         LOAD_HPELS( m, p_fref, 8*i, 0 );
 845
 846         mvc[0][0] = a->l0.me8x8[i].mv[0];
 847         mvc[0][1] = a->l0.me8x8[i].mv[1];
 848         mvc[1][0] = a->l0.me8x8[i+2].mv[0];
 849         mvc[1][1] = a->l0.me8x8[i+2].mv[1];
 850
 851         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 852         x264_me_search( h, m, mvc, 2 );
 853
 854         x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
 855     }
 856
 857     a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
 858     if( a->b_mbrd )
 859     {
 860         if( a->i_best_satd > a->l0.i_cost8x16 )
 861             a->i_best_satd = a->l0.i_cost8x16;
 862         h->mb.i_type = P_L0;
 863         a->l0.i_cost8x16 = x264_rd_cost_mb( h, a->i_lambda2 );
 864     }
 865 }
 866
 867 static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
 868 {
 869     uint8_t pix1[8*8], pix2[8*8];
 870     const int i_stride = h->mb.pic.i_stride[1];
 871     const int off = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
 872
 873 #define CHROMA4x4MC( width, height, me, x, y ) \
 874     h->mc.mc_chroma( &p_fref[4][off+x+y*i_stride], i_stride, &pix1[x+y*8], 8, (me).mv[0], (me).mv[1], width, height ); \
 875     h->mc.mc_chroma( &p_fref[5][off+x+y*i_stride], i_stride, &pix2[x+y*8], 8, (me).mv[0], (me).mv[1], width, height );
 876
 877     if( pixel == PIXEL_4x4 )
 878     {
 879         CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][0], 0,0 );
 880         CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][1], 0,2 );
 881         CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][2], 2,0 );
 882         CHROMA4x4MC( 2,2, a->l0.me4x4[i8x8][3], 2,2 );
 883     }
 884     else if( pixel == PIXEL_8x4 )
 885     {
 886         CHROMA4x4MC( 4,2, a->l0.me8x4[i8x8][0], 0,0 );
 887         CHROMA4x4MC( 4,2, a->l0.me8x4[i8x8][1], 0,2 );
 888     }
 889     else
 890     {
 891         CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][0], 0,0 );
 892         CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][1], 2,0 );
 893     }
 894
 895     return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][off], i_stride, pix1, 8 )
 896          + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][off], i_stride, pix2, 8 );
 897 }
 898
 899 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 900 {
 901     uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
 902     uint8_t  **p_fenc = h->mb.pic.p_fenc;
 903
 904     int i4x4;
 905
 906     /* XXX Needed for x264_mb_predict_mv */
 907     h->mb.i_partition = D_8x8;
 908
 909     for( i4x4 = 0; i4x4 < 4; i4x4++ )
 910     {
 911         const int idx = 4*i8x8 + i4x4;
 912         const int x4 = block_idx_x[idx];
 913         const int y4 = block_idx_y[idx];
 914         const int i_mvc = (i4x4 == 0);
 915
 916         x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
 917
 918         m->i_pixel = PIXEL_4x4;
 919         m->p_cost_mv = a->p_cost_mv;
 920
 921         LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
 922         LOAD_HPELS( m, p_fref, 4*x4, 4*y4 );
 923
 924         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
 925         x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
 926
 927         x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
 928     }
 929
 930     a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
 931                          a->l0.me4x4[i8x8][1].cost +
 932                          a->l0.me4x4[i8x8][2].cost +
 933                          a->l0.me4x4[i8x8][3].cost +
 934                          a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x4];
 935     if( h->mb.b_chroma_me )
 936         a->l0.i_cost4x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x4 );
 937 }
 938
 939 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 940 {
 941     uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
 942     uint8_t  **p_fenc = h->mb.pic.p_fenc;
 943
 944     int i8x4;
 945
 946     /* XXX Needed for x264_mb_predict_mv */
 947     h->mb.i_partition = D_8x8;
 948
 949     for( i8x4 = 0; i8x4 < 2; i8x4++ )
 950     {
 951         const int idx = 4*i8x8 + 2*i8x4;
 952         const int x4 = block_idx_x[idx];
 953         const int y4 = block_idx_y[idx];
 954         const int i_mvc = (i8x4 == 0);
 955
 956         x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
 957
 958         m->i_pixel = PIXEL_8x4;
 959         m->p_cost_mv = a->p_cost_mv;
 960
 961         LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
 962         LOAD_HPELS( m, p_fref, 4*x4, 4*y4 );
 963
 964         x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
 965         x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
 966
 967         x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
 968     }
 969
 970     a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +
 971                             a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x4];
 972     if( h->mb.b_chroma_me )
 973         a->l0.i_cost8x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_8x4 );
 974 }
 975
 976 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 977 {
 978     uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
 979     uint8_t  **p_fenc = h->mb.pic.p_fenc;
 980
 981     int i4x8;
 982
 983     /* XXX Needed for x264_mb_predict_mv */
 984     h->mb.i_partition = D_8x8;
 985
 986     for( i4x8 = 0; i4x8 < 2; i4x8++ )
 987     {
 988         const int idx = 4*i8x8 + i4x8;
 989         const int x4 = block_idx_x[idx];
 990         const int y4 = block_idx_y[idx];
 991         const int i_mvc = (i4x8 == 0);
 992
 993         x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
 994
 995         m->i_pixel = PIXEL_4x8;
 996         m->p_cost_mv = a->p_cost_mv;
 997
 998         LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
 999         LOAD_HPELS( m, p_fref, 4*x4, 4*y4 );
1000
1001         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
1002         x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
1003
1004         x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
1005     }
1006
1007     a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +
1008                             a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x8];
1009     if( h->mb.b_chroma_me )
1010         a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
1011 }
1012
1013 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
1014 {
1015     /* Assumes that fdec still contains the results of
1016      * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
1017
1018     uint8_t **p_fenc = h->mb.pic.p_fenc;
1019     uint8_t **p_fdec = h->mb.pic.p_fdec;
1020     int i_stride= h->mb.pic.i_stride[0];
1021     int i;
1022
1023     a->i_cost16x16direct = 0;
1024     for( i = 0; i < 4; i++ )
1025     {
1026         const int x8 = i%2;
1027         const int y8 = i/2;
1028         const int off = 8 * x8 + 8 * i_stride * y8;
1029         a->i_cost16x16direct +=
1030         a->i_cost8x8direct[i] =
1031             h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[0][off], i_stride, &p_fdec[0][off], i_stride );
1032
1033         /* mb type cost */
1034         a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
1035     }
1036
1037     a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
1038 }
1039
1040 #define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \
1041     { \
1042         if( h->param.analyse.b_weighted_bipred ) \
1043             h->pixf.avg_weight[size]( pix1, stride1, src2, stride2, \
1044                     h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] ); \
1045         else \
1046             h->pixf.avg[size]( pix1, stride1, src2, stride2 ); \
1047     }
1048
1049 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
1050 {
1051     uint8_t pix1[16*16], pix2[16*16];
1052     uint8_t *src2;
1053     int stride2 = 16;
1054     int src2_ref, pix1_ref;
1055
1056     x264_me_t m;
1057     int i_ref;
1058     int mvc[5][2], i_mvc;
1059     int i_fullpel_thresh = INT_MAX;
1060     int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
1061
1062     /* 16x16 Search on all ref frame */
1063     m.i_pixel = PIXEL_16x16;
1064     m.p_cost_mv = a->p_cost_mv;
1065     LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
1066
1067     /* ME for List 0 */
1068     a->l0.me16x16.cost = INT_MAX;
1069     for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
1070     {
1071         /* search with ref */
1072         LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, 0 );
1073         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
1074         x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
1075         x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
1076
1077         /* add ref cost */
1078         m.cost += a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1079
1080         if( m.cost < a->l0.me16x16.cost )
1081         {
1082             a->l0.i_ref = i_ref;
1083             a->l0.me16x16 = m;
1084         }
1085
1086         /* save mv for predicting neighbors */
1087         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
1088         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
1089     }
1090     /* subtract ref cost, so we don't have to add it for the other MB types */
1091     a->l0.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
1092
1093     /* ME for list 1 */
1094     i_fullpel_thresh = INT_MAX;
1095     p_fullpel_thresh = h->i_ref1>1 ? &i_fullpel_thresh : NULL;
1096     a->l1.me16x16.cost = INT_MAX;
1097     for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
1098     {
1099         /* search with ref */
1100         LOAD_HPELS( &m, h->mb.pic.p_fref[1][i_ref], 0, 0 );
1101         x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
1102         x264_mb_predict_mv_ref16x16( h, 1, i_ref, mvc, &i_mvc );
1103         x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
1104
1105         /* add ref cost */
1106         m.cost += a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
1107
1108         if( m.cost < a->l1.me16x16.cost )
1109         {
1110             a->l1.i_ref = i_ref;
1111             a->l1.me16x16 = m;
1112         }
1113
1114         /* save mv for predicting neighbors */
1115         h->mb.mvr[1][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
1116         h->mb.mvr[1][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
1117     }
1118     /* subtract ref cost, so we don't have to add it for the other MB types */
1119     a->l1.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref );
1120
1121     /* Set global ref, needed for other modes? */
1122     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
1123     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
1124
1125     /* get cost of BI mode */
1126     if ( ((a->l0.me16x16.mv[0] | a->l0.me16x16.mv[1]) & 1) == 0 )
1127     {
1128         /* l0 reference is halfpel, so get_ref on it will make it faster */
1129         src2 = h->mc.get_ref( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
1130                         pix2, &stride2,
1131                         a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
1132                         16, 16 );
1133         h->mc.mc_luma( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
1134                         pix1, 16,
1135                         a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
1136                         16, 16 );
1137         src2_ref = a->l0.i_ref;
1138         pix1_ref = a->l1.i_ref;
1139     }
1140     else
1141     {
1142         /* if l0 was qpel, we'll use get_ref on l1 instead */
1143         h->mc.mc_luma( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
1144                         pix1, 16,
1145                         a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
1146                         16, 16 );
1147         src2 = h->mc.get_ref( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
1148                         pix2, &stride2,
1149                         a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
1150                         16, 16 );
1151         src2_ref = a->l1.i_ref;
1152         pix1_ref = a->l0.i_ref;
1153     }
1154
1155     if( h->param.analyse.b_weighted_bipred )
1156         h->pixf.avg_weight[PIXEL_16x16]( pix1, 16, src2, stride2,
1157                 h->mb.bipred_weight[pix1_ref][src2_ref] );
1158     else
1159         h->pixf.avg[PIXEL_16x16]( pix1, 16, src2, stride2 );
1160
1161     a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 )
1162                      + a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref )
1163                                      + bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) )
1164                      + a->l0.me16x16.cost_mv
1165                      + a->l1.me16x16.cost_mv;
1166
1167     /* mb type cost */
1168     a->i_cost16x16bi   += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
1169     a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
1170     a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
1171 }
1172
1173 static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i )
1174 {
1175     const int x = 2*(i%2);
1176     const int y = 2*(i/2);
1177
1178     switch( h->mb.i_sub_partition[i] )
1179     {
1180         case D_L0_8x8:
1181             x264_macroblock_cache_mv( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv[0], a->l0.me8x8[i].mv[1] );
1182             break;
1183         case D_L0_8x4:
1184             x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv[0], a->l0.me8x4[i][0].mv[1] );
1185             x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv[0], a->l0.me8x4[i][1].mv[1] );
1186             break;
1187         case D_L0_4x8:
1188             x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv[0], a->l0.me4x8[i][0].mv[1] );
1189             x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv[0], a->l0.me4x8[i][1].mv[1] );
1190             break;
1191         case D_L0_4x4:
1192             x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv[0], a->l0.me4x4[i][0].mv[1] );
1193             x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv[0], a->l0.me4x4[i][1].mv[1] );
1194             x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv[0], a->l0.me4x4[i][2].mv[1] );
1195             x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv[0], a->l0.me4x4[i][3].mv[1] );
1196             break;
1197         default:
1198             x264_log( h, X264_LOG_ERROR, "internal error\n" );
1199             break;
1200     }
1201 }
1202
1203 #define CACHE_MV_BI(x,y,dx,dy,me0,me1,part) \
1204     if( x264_mb_partition_listX_table[0][part] ) \
1205     { \
1206         x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
1207         x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
1208     } \
1209     else \
1210     { \
1211         x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
1212         x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, 0, 0 ); \
1213         if( b_mvd ) \
1214             x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
1215     } \
1216     if( x264_mb_partition_listX_table[1][part] ) \
1217     { \
1218         x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
1219         x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
1220     } \
1221     else \
1222     { \
1223         x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
1224         x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, 0, 0 ); \
1225         if( b_mvd ) \
1226             x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
1227     }
1228
1229 static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1230 {
1231     int x = (i%2)*2;
1232     int y = (i/2)*2;
1233     if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
1234     {
1235         x264_mb_load_mv_direct8x8( h, i );
1236         if( b_mvd )
1237         {
1238             x264_macroblock_cache_mvd(  h, x, y, 2, 2, 0, 0, 0 );
1239             x264_macroblock_cache_mvd(  h, x, y, 2, 2, 1, 0, 0 );
1240             x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
1241         }
1242     }
1243     else
1244     {
1245         CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
1246     }
1247 }
1248 static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1249 {
1250     CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
1251 }
1252 static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
1253 {
1254     CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
1255 }
1256 #undef CACHE_MV_BI
1257
1258 static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
1259 {
1260     uint8_t **p_fref[2] =
1261         { h->mb.pic.p_fref[0][a->l0.i_ref],
1262           h->mb.pic.p_fref[1][a->l1.i_ref] };
1263     uint8_t pix[2][8*8];
1264     int i, l;
1265
1266     /* XXX Needed for x264_mb_predict_mv */
1267     h->mb.i_partition = D_8x8;
1268
1269     a->i_cost8x8bi = 0;
1270
1271     for( i = 0; i < 4; i++ )
1272     {
1273         const int x8 = i%2;
1274         const int y8 = i/2;
1275         int i_part_cost;
1276         int i_part_cost_bi = 0;
1277
1278         for( l = 0; l < 2; l++ )
1279         {
1280             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1281             x264_me_t *m = &lX->me8x8[i];
1282
1283             m->i_pixel = PIXEL_8x8;
1284             m->p_cost_mv = a->p_cost_mv;
1285
1286             LOAD_FENC( m, h->mb.pic.p_fenc, 8*x8, 8*y8 );
1287             LOAD_HPELS( m, p_fref[l], 8*x8, 8*y8 );
1288
1289             x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
1290             x264_me_search( h, m, &lX->me16x16.mv, 1 );
1291
1292             x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
1293
1294             /* BI mode */
1295             h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 8,
1296                             m->mv[0], m->mv[1], 8, 8 );
1297             i_part_cost_bi += m->cost_mv;
1298             /* FIXME: ref cost */
1299         }
1300
1301         WEIGHTED_AVG( PIXEL_8x8, pix[0], 8, pix[1], 8 );
1302         i_part_cost_bi += h->pixf.mbcmp[PIXEL_8x8]( a->l0.me8x8[i].p_fenc[0], h->mb.pic.i_stride[0], pix[0], 8 )
1303                         + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
1304         a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
1305         a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
1306
1307         i_part_cost = a->l0.me8x8[i].cost;
1308         h->mb.i_sub_partition[i] = D_L0_8x8;
1309         if( a->l1.me8x8[i].cost < i_part_cost )
1310         {
1311             i_part_cost = a->l1.me8x8[i].cost;
1312             h->mb.i_sub_partition[i] = D_L1_8x8;
1313         }
1314         if( i_part_cost_bi < i_part_cost )
1315         {
1316             i_part_cost = i_part_cost_bi;
1317             h->mb.i_sub_partition[i] = D_BI_8x8;
1318         }
1319         if( a->i_cost8x8direct[i] < i_part_cost )
1320         {
1321             i_part_cost = a->i_cost8x8direct[i];
1322             h->mb.i_sub_partition[i] = D_DIRECT_8x8;
1323         }
1324         a->i_cost8x8bi += i_part_cost;
1325
1326         /* XXX Needed for x264_mb_predict_mv */
1327         x264_mb_cache_mv_b8x8( h, a, i, 0 );
1328     }
1329
1330     /* mb type cost */
1331     a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
1332 }
1333
1334 static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
1335 {
1336     uint8_t **p_fref[2] =
1337         { h->mb.pic.p_fref[0][a->l0.i_ref],
1338           h->mb.pic.p_fref[1][a->l1.i_ref] };
1339     uint8_t pix[2][16*8];
1340     int mvc[2][2];
1341     int i, l;
1342
1343     h->mb.i_partition = D_16x8;
1344     a->i_cost16x8bi = 0;
1345
1346     for( i = 0; i < 2; i++ )
1347     {
1348         int i_part_cost;
1349         int i_part_cost_bi = 0;
1350
1351         /* TODO: check only the list(s) that were used in b8x8? */
1352         for( l = 0; l < 2; l++ )
1353         {
1354             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1355             x264_me_t *m = &lX->me16x8[i];
1356
1357             m->i_pixel = PIXEL_16x8;
1358             m->p_cost_mv = a->p_cost_mv;
1359
1360             LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
1361             LOAD_HPELS( m, p_fref[l], 0, 8*i );
1362
1363             mvc[0][0] = lX->me8x8[2*i].mv[0];
1364             mvc[0][1] = lX->me8x8[2*i].mv[1];
1365             mvc[1][0] = lX->me8x8[2*i+1].mv[0];
1366             mvc[1][1] = lX->me8x8[2*i+1].mv[1];
1367
1368             x264_mb_predict_mv( h, 0, 8*i, 2, m->mvp );
1369             x264_me_search( h, m, mvc, 2 );
1370
1371             /* BI mode */
1372             h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 16,
1373                             m->mv[0], m->mv[1], 16, 8 );
1374             /* FIXME: ref cost */
1375             i_part_cost_bi += m->cost_mv;
1376         }
1377
1378         WEIGHTED_AVG( PIXEL_16x8, pix[0], 16, pix[1], 16 );
1379         i_part_cost_bi += h->pixf.mbcmp[PIXEL_16x8]( a->l0.me16x8[i].p_fenc[0], h->mb.pic.i_stride[0], pix[0], 16 );
1380
1381         i_part_cost = a->l0.me16x8[i].cost;
1382         a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
1383         if( a->l1.me16x8[i].cost < i_part_cost )
1384         {
1385             i_part_cost = a->l1.me16x8[i].cost;
1386             a->i_mb_partition16x8[i] = D_L1_8x8;
1387         }
1388         if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1389         {
1390             i_part_cost = i_part_cost_bi;
1391             a->i_mb_partition16x8[i] = D_BI_8x8;
1392         }
1393         a->i_cost16x8bi += i_part_cost;
1394
1395         if( i == 0 )
1396             x264_mb_cache_mv_b16x8( h, a, i, 0 );
1397     }
1398
1399     /* mb type cost */
1400     a->i_mb_type16x8 = B_L0_L0
1401         + (a->i_mb_partition16x8[0]>>2) * 3
1402         + (a->i_mb_partition16x8[1]>>2);
1403     a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
1404 }
1405 static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
1406 {
1407     uint8_t **p_fref[2] =
1408         { h->mb.pic.p_fref[0][a->l0.i_ref],
1409           h->mb.pic.p_fref[1][a->l1.i_ref] };
1410     uint8_t pix[2][8*16];
1411     int mvc[2][2];
1412     int i, l;
1413
1414     h->mb.i_partition = D_8x16;
1415     a->i_cost8x16bi = 0;
1416
1417     for( i = 0; i < 2; i++ )
1418     {
1419         int i_part_cost;
1420         int i_part_cost_bi = 0;
1421
1422         for( l = 0; l < 2; l++ )
1423         {
1424             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1425             x264_me_t *m = &lX->me8x16[i];
1426
1427             m->i_pixel = PIXEL_8x16;
1428             m->p_cost_mv = a->p_cost_mv;
1429
1430             LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
1431             LOAD_HPELS( m, p_fref[l], 8*i, 0 );
1432
1433             mvc[0][0] = lX->me8x8[i].mv[0];
1434             mvc[0][1] = lX->me8x8[i].mv[1];
1435             mvc[1][0] = lX->me8x8[i+2].mv[0];
1436             mvc[1][1] = lX->me8x8[i+2].mv[1];
1437
1438             x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
1439             x264_me_search( h, m, mvc, 2 );
1440
1441             /* BI mode */
1442             h->mc.mc_luma( m->p_fref, m->i_stride[0], pix[l], 8,
1443                             m->mv[0], m->mv[1], 8, 16 );
1444             /* FIXME: ref cost */
1445             i_part_cost_bi += m->cost_mv;
1446         }
1447
1448         WEIGHTED_AVG( PIXEL_8x16, pix[0], 8, pix[1], 8 );
1449         i_part_cost_bi += h->pixf.mbcmp[PIXEL_8x16]( a->l0.me8x16[i].p_fenc[0], h->mb.pic.i_stride[0], pix[0], 8 );
1450
1451         i_part_cost = a->l0.me8x16[i].cost;
1452         a->i_mb_partition8x16[i] = D_L0_8x8;
1453         if( a->l1.me8x16[i].cost < i_part_cost )
1454         {
1455             i_part_cost = a->l1.me8x16[i].cost;
1456             a->i_mb_partition8x16[i] = D_L1_8x8;
1457         }
1458         if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1459         {
1460             i_part_cost = i_part_cost_bi;
1461             a->i_mb_partition8x16[i] = D_BI_8x8;
1462         }
1463         a->i_cost8x16bi += i_part_cost;
1464
1465         if( i == 0 )
1466             x264_mb_cache_mv_b8x16( h, a, i, 0 );
1467     }
1468
1469     /* mb type cost */
1470     a->i_mb_type8x16 = B_L0_L0
1471         + (a->i_mb_partition8x16[0]>>2) * 3
1472         + (a->i_mb_partition8x16[1]>>2);
1473     a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
1474 }
1475
1476 static inline void x264_mb_analyse_transform( x264_t *h )
1477 {
1478     h->mb.cache.b_transform_8x8_allowed =
1479         h->param.analyse.b_transform_8x8
1480         && !IS_INTRA( h->mb.i_type ) && x264_mb_transform_8x8_allowed( h );
1481
1482     if( h->mb.cache.b_transform_8x8_allowed )
1483     {
1484         int i_cost4, i_cost8;
1485         /* FIXME only luma mc is needed */
1486         x264_mb_mc( h );
1487
1488         i_cost8 = h->pixf.sa8d[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
1489                                              h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
1490         i_cost4 = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
1491                                              h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
1492
1493         h->mb.b_transform_8x8 = i_cost8 < i_cost4;
1494     }
1495 }
1496
1497 static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *a, int *i_cost )
1498 {
1499     h->mb.cache.b_transform_8x8_allowed =
1500         h->param.analyse.b_transform_8x8 && x264_mb_transform_8x8_allowed( h );
1501
1502     if( h->mb.cache.b_transform_8x8_allowed )
1503     {
1504         int i_cost8;
1505         x264_analyse_update_cache( h, a );
1506         h->mb.b_transform_8x8 = !h->mb.b_transform_8x8;
1507         /* FIXME only luma is needed, but the score for comparison already includes chroma */
1508         i_cost8 = x264_rd_cost_mb( h, a->i_lambda2 );
1509
1510         if( *i_cost >= i_cost8 )
1511         {
1512             if( *i_cost > 0 )
1513                 a->i_best_satd = (int64_t)a->i_best_satd * i_cost8 / *i_cost;
1514             *i_cost = i_cost8;
1515         }
1516         else
1517             h->mb.b_transform_8x8 = !h->mb.b_transform_8x8;
1518     }
1519 }
1520
1521
1522 /*****************************************************************************
1523  * x264_macroblock_analyse:
1524  *****************************************************************************/
1525 void x264_macroblock_analyse( x264_t *h )
1526 {
1527     x264_mb_analysis_t analysis;
1528     int i;
1529
1530     h->mb.i_qp =
1531     h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp( h );
1532
1533     /* init analysis */
1534     x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
1535
1536     /*--------------------------- Do the analysis ---------------------------*/
1537     if( h->sh.i_type == SLICE_TYPE_I )
1538     {
1539         int i_cost;
1540         x264_mb_analyse_intra( h, &analysis, COST_MAX );
1541
1542         i_cost = analysis.i_sad_i16x16;
1543         h->mb.i_type = I_16x16;
1544         if( analysis.i_sad_i4x4 < i_cost )
1545         {
1546             i_cost = analysis.i_sad_i4x4;
1547             h->mb.i_type = I_4x4;
1548         }
1549         if( analysis.i_sad_i8x8 < i_cost )
1550             h->mb.i_type = I_8x8;
1551     }
1552     else if( h->sh.i_type == SLICE_TYPE_P )
1553     {
1554         int b_skip = 0;
1555         int i_cost;
1556         int i_intra_cost, i_intra_type;
1557
1558         /* Fast P_SKIP detection */
1559         if( !h->mb.b_lossless &&
1560            (( h->mb.i_mb_type_left == P_SKIP ) ||
1561             ( h->mb.i_mb_type_top == P_SKIP ) ||
1562             ( h->mb.i_mb_type_topleft == P_SKIP ) ||
1563             ( h->mb.i_mb_type_topright == P_SKIP )))
1564         {
1565             b_skip = x264_macroblock_probe_pskip( h );
1566         }
1567
1568         if( b_skip )
1569         {
1570             h->mb.i_type = P_SKIP;
1571             h->mb.i_partition = D_16x16;
1572         }
1573         else
1574         {
1575             const unsigned int flags = h->param.analyse.inter;
1576             int i_type;
1577             int i_partition;
1578             int i_thresh16x8;
1579
1580             x264_mb_analyse_load_costs( h, &analysis );
1581
1582             x264_mb_analyse_inter_p16x16( h, &analysis );
1583             if( flags & X264_ANALYSE_PSUB16x16 )
1584                 x264_mb_analyse_inter_p8x8( h, &analysis );
1585
1586             /* Select best inter mode */
1587             i_type = P_L0;
1588             i_partition = D_16x16;
1589             i_cost = analysis.l0.me16x16.cost;
1590
1591             if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1592                 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
1593             {
1594                 int i;
1595
1596                 i_type = P_8x8;
1597                 i_partition = D_8x8;
1598                 h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
1599                 h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
1600
1601                 i_cost = analysis.l0.i_cost8x8;
1602
1603                 /* Do sub 8x8 */
1604                 if( flags & X264_ANALYSE_PSUB8x8 )
1605                 {
1606                     int i_cost_bak = i_cost;
1607                     int b_sub8x8 = 0;
1608                     for( i = 0; i < 4; i++ )
1609                     {
1610                         x264_mb_analyse_inter_p4x4( h, &analysis, i );
1611                         if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
1612                         {
1613                             int i_cost8x8 = analysis.l0.i_cost4x4[i];
1614                             h->mb.i_sub_partition[i] = D_L0_4x4;
1615
1616                             x264_mb_analyse_inter_p8x4( h, &analysis, i );
1617                             if( analysis.l0.i_cost8x4[i] < i_cost8x8 )
1618                             {
1619                                 h->mb.i_sub_partition[i] = D_L0_8x4;
1620                                 i_cost8x8 = analysis.l0.i_cost8x4[i];
1621                             }
1622
1623                             x264_mb_analyse_inter_p4x8( h, &analysis, i );
1624                             if( analysis.l0.i_cost4x8[i] < i_cost8x8 )
1625                             {
1626                                 h->mb.i_sub_partition[i] = D_L0_4x8;
1627                                 i_cost8x8 = analysis.l0.i_cost4x8[i];
1628                             }
1629
1630                             i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
1631                             b_sub8x8 = 1;
1632                         }
1633                         x264_mb_cache_mv_p8x8( h, &analysis, i );
1634                     }
1635                     /* TODO: RD per subpartition */
1636                     if( b_sub8x8 && analysis.b_mbrd )
1637                     {
1638                         i_cost = x264_rd_cost_mb( h, analysis.i_lambda2 );
1639                         if( i_cost > i_cost_bak )
1640                         {
1641                             i_cost = i_cost_bak;
1642                             h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
1643                             h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
1644                         }
1645                     }
1646                 }
1647             }
1648
1649             /* Now do 16x8/8x16 */
1650             i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv;
1651             if( analysis.b_mbrd )
1652                 i_thresh16x8 = i_thresh16x8 * analysis.i_lambda2 / analysis.i_lambda;
1653             if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1654                 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 )
1655             {
1656                 x264_mb_analyse_inter_p16x8( h, &analysis );
1657                 if( analysis.l0.i_cost16x8 < i_cost )
1658                 {
1659                     i_type = P_L0;
1660                     i_partition = D_16x8;
1661                     i_cost = analysis.l0.i_cost16x8;
1662                 }
1663
1664                 x264_mb_analyse_inter_p8x16( h, &analysis );
1665                 if( analysis.l0.i_cost8x16 < i_cost )
1666                 {
1667                     i_type = P_L0;
1668                     i_partition = D_8x16;
1669                     i_cost = analysis.l0.i_cost8x16;
1670                 }
1671             }
1672
1673             h->mb.i_partition = i_partition;
1674
1675             /* refine qpel */
1676             //FIXME mb_type costs?
1677             if( analysis.b_mbrd )
1678             {
1679                 h->mb.i_type = i_type;
1680                 x264_mb_analyse_transform_rd( h, &analysis, &i_cost );
1681             }
1682             else if( i_partition == D_16x16 )
1683             {
1684                 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1685                 i_cost = analysis.l0.me16x16.cost;
1686             }
1687             else if( i_partition == D_16x8 )
1688             {
1689                 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
1690                 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
1691                 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
1692             }
1693             else if( i_partition == D_8x16 )
1694             {
1695                 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
1696                 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
1697                 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
1698             }
1699             else if( i_partition == D_8x8 )
1700             {
1701                 int i8x8;
1702                 i_cost = 0;
1703                 for( i8x8 = 0; i8x8 < 4; i8x8++ )
1704                 {
1705                     switch( h->mb.i_sub_partition[i8x8] )
1706                     {
1707                         case D_L0_8x8:
1708                             x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1709                             i_cost += analysis.l0.me8x8[i8x8].cost;
1710                             break;
1711                         case D_L0_8x4:
1712                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1713                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1714                             i_cost += analysis.l0.me8x4[i8x8][0].cost +
1715                                       analysis.l0.me8x4[i8x8][1].cost;
1716                             break;
1717                         case D_L0_4x8:
1718                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1719                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1720                             i_cost += analysis.l0.me4x8[i8x8][0].cost +
1721                                       analysis.l0.me4x8[i8x8][1].cost;
1722                             break;
1723
1724                         case D_L0_4x4:
1725                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1726                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1727                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1728                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1729                             i_cost += analysis.l0.me4x4[i8x8][0].cost +
1730                                       analysis.l0.me4x4[i8x8][1].cost +
1731                                       analysis.l0.me4x4[i8x8][2].cost +
1732                                       analysis.l0.me4x4[i8x8][3].cost;
1733                             break;
1734                         default:
1735                             x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" );
1736                             break;
1737                     }
1738                 }
1739             }
1740
1741             x264_mb_analyse_intra( h, &analysis, i_cost );
1742             if( h->mb.b_chroma_me && !analysis.b_mbrd &&
1743                 ( analysis.i_sad_i16x16 < i_cost
1744                || analysis.i_sad_i8x8 < i_cost
1745                || analysis.i_sad_i4x4 < i_cost ))
1746             {
1747                 x264_mb_analyse_intra_chroma( h, &analysis );
1748                 analysis.i_sad_i16x16 += analysis.i_sad_i8x8chroma;
1749                 analysis.i_sad_i8x8 += analysis.i_sad_i8x8chroma;
1750                 analysis.i_sad_i4x4 += analysis.i_sad_i8x8chroma;
1751             }
1752
1753             i_intra_type = I_16x16;
1754             i_intra_cost = analysis.i_sad_i16x16;
1755
1756             if( analysis.i_sad_i8x8 < i_intra_cost )
1757             {
1758                 i_intra_type = I_8x8;
1759                 i_intra_cost = analysis.i_sad_i8x8;
1760             }
1761             if( analysis.i_sad_i4x4 < i_intra_cost )
1762             {
1763                 i_intra_type = I_4x4;
1764                 i_intra_cost = analysis.i_sad_i4x4;
1765             }
1766
1767             if( i_intra_cost < i_cost )
1768             {
1769                 i_type = i_intra_type;
1770                 i_cost = i_intra_cost;
1771             }
1772
1773             h->mb.i_type = i_type;
1774             h->stat.frame.i_intra_cost += i_intra_cost;
1775             h->stat.frame.i_inter_cost += i_cost;
1776         }
1777     }
1778     else if( h->sh.i_type == SLICE_TYPE_B )
1779     {
1780         int b_skip = 0;
1781
1782         analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
1783         if( analysis.b_direct_available )
1784         {
1785             h->mb.i_type = B_SKIP;
1786             x264_mb_mc( h );
1787
1788             /* Conditioning the probe on neighboring block types
1789              * doesn't seem to help speed or quality. */
1790             b_skip = !h->mb.b_lossless && x264_macroblock_probe_bskip( h );
1791         }
1792
1793         if( !b_skip )
1794         {
1795             const unsigned int flags = h->param.analyse.inter;
1796             int i_partition;
1797             int i_cost;
1798
1799             x264_mb_analyse_load_costs( h, &analysis );
1800
1801             /* select best inter mode */
1802             /* direct must be first */
1803             if( analysis.b_direct_available )
1804                 x264_mb_analyse_inter_direct( h, &analysis );
1805
1806             x264_mb_analyse_inter_b16x16( h, &analysis );
1807
1808             h->mb.i_type = B_L0_L0;
1809             i_partition = D_16x16;
1810             i_cost = analysis.l0.me16x16.cost;
1811             if( analysis.l1.me16x16.cost < i_cost )
1812             {
1813                 h->mb.i_type = B_L1_L1;
1814                 i_cost = analysis.l1.me16x16.cost;
1815             }
1816             if( analysis.i_cost16x16bi < i_cost )
1817             {
1818                 h->mb.i_type = B_BI_BI;
1819                 i_cost = analysis.i_cost16x16bi;
1820             }
1821             if( analysis.i_cost16x16direct < i_cost )
1822             {
1823                 h->mb.i_type = B_DIRECT;
1824                 i_cost = analysis.i_cost16x16direct;
1825             }
1826
1827             if( flags & X264_ANALYSE_BSUB16x16 )
1828             {
1829                 x264_mb_analyse_inter_b8x8( h, &analysis );
1830                 if( analysis.i_cost8x8bi < i_cost )
1831                 {
1832                     h->mb.i_type = B_8x8;
1833                     i_partition = D_8x8;
1834                     i_cost = analysis.i_cost8x8bi;
1835
1836                     if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[1] ||
1837                         h->mb.i_sub_partition[2] == h->mb.i_sub_partition[3] )
1838                     {
1839                         x264_mb_analyse_inter_b16x8( h, &analysis );
1840                         if( analysis.i_cost16x8bi < i_cost )
1841                         {
1842                             i_partition = D_16x8;
1843                             i_cost = analysis.i_cost16x8bi;
1844                             h->mb.i_type = analysis.i_mb_type16x8;
1845                         }
1846                     }
1847                     if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
1848                         h->mb.i_sub_partition[1] == h->mb.i_sub_partition[3] )
1849                     {
1850                         x264_mb_analyse_inter_b8x16( h, &analysis );
1851                         if( analysis.i_cost8x16bi < i_cost )
1852                         {
1853                             i_partition = D_8x16;
1854                             i_cost = analysis.i_cost8x16bi;
1855                             h->mb.i_type = analysis.i_mb_type8x16;
1856                         }
1857                     }
1858                 }
1859             }
1860
1861             h->mb.i_partition = i_partition;
1862
1863             /* refine qpel */
1864             if( i_partition == D_16x16 )
1865             {
1866                 analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1867                 analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1868                 if( h->mb.i_type == B_L0_L0 )
1869                 {
1870                     x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1871                     i_cost = analysis.l0.me16x16.cost
1872                            + analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1873                 }
1874                 else if( h->mb.i_type == B_L1_L1 )
1875                 {
1876                     x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1877                     i_cost = analysis.l1.me16x16.cost
1878                            + analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1879                 }
1880                 else if( h->mb.i_type == B_BI_BI )
1881                 {
1882                     x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1883                     x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1884                 }
1885             }
1886             else if( i_partition == D_16x8 )
1887             {
1888                 for( i=0; i<2; i++ )
1889                 {
1890                     if( analysis.i_mb_partition16x8[i] != D_L1_8x8 )
1891                         x264_me_refine_qpel( h, &analysis.l0.me16x8[i] );
1892                     if( analysis.i_mb_partition16x8[i] != D_L0_8x8 )
1893                         x264_me_refine_qpel( h, &analysis.l1.me16x8[i] );
1894                 }
1895             }
1896             else if( i_partition == D_8x16 )
1897             {
1898                 for( i=0; i<2; i++ )
1899                 {
1900                     if( analysis.i_mb_partition8x16[i] != D_L1_8x8 )
1901                         x264_me_refine_qpel( h, &analysis.l0.me8x16[i] );
1902                     if( analysis.i_mb_partition8x16[i] != D_L0_8x8 )
1903                         x264_me_refine_qpel( h, &analysis.l1.me8x16[i] );
1904                 }
1905             }
1906             else if( i_partition == D_8x8 )
1907             {
1908                 for( i=0; i<4; i++ )
1909                 {
1910                     x264_me_t *m;
1911                     int i_part_cost_old;
1912                     int i_type_cost;
1913                     int i_part_type = h->mb.i_sub_partition[i];
1914                     int b_bidir = (i_part_type == D_BI_8x8);
1915
1916                     if( i_part_type == D_DIRECT_8x8 )
1917                         continue;
1918                     if( x264_mb_partition_listX_table[0][i_part_type] )
1919                     {
1920                         m = &analysis.l0.me8x8[i];
1921                         i_part_cost_old = m->cost;
1922                         i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
1923                         m->cost -= i_type_cost;
1924                         x264_me_refine_qpel( h, m );
1925                         if( !b_bidir )
1926                             analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
1927                     }
1928                     if( x264_mb_partition_listX_table[1][i_part_type] )
1929                     {
1930                         m = &analysis.l1.me8x8[i];
1931                         i_part_cost_old = m->cost;
1932                         i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
1933                         m->cost -= i_type_cost;
1934                         x264_me_refine_qpel( h, m );
1935                         if( !b_bidir )
1936                             analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
1937                     }
1938                     /* TODO: update mvp? */
1939                 }
1940             }
1941
1942             /* best intra mode */
1943             x264_mb_analyse_intra( h, &analysis, i_cost );
1944
1945             if( analysis.i_sad_i16x16 < i_cost )
1946             {
1947                 h->mb.i_type = I_16x16;
1948                 i_cost = analysis.i_sad_i16x16;
1949             }
1950             if( analysis.i_sad_i8x8 < i_cost )
1951             {
1952                 h->mb.i_type = I_8x8;
1953                 i_cost = analysis.i_sad_i8x8;
1954             }
1955             if( analysis.i_sad_i4x4 < i_cost )
1956             {
1957                 h->mb.i_type = I_4x4;
1958                 i_cost = analysis.i_sad_i4x4;
1959             }
1960         }
1961     }
1962
1963     x264_analyse_update_cache( h, &analysis );
1964
1965     if( !analysis.b_mbrd )
1966         x264_mb_analyse_transform( h );
1967 }
1968
1969 /*-------------------- Update MB from the analysis ----------------------*/
1970 static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
1971 {
1972     int i;
1973
1974     switch( h->mb.i_type )
1975     {
1976         case I_4x4:
1977             for( i = 0; i < 16; i++ )
1978             {
1979                 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1980                     a->i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1981             }
1982
1983             x264_mb_analyse_intra_chroma( h, a );
1984             break;
1985         case I_8x8:
1986             for( i = 0; i < 4; i++ )
1987                 x264_macroblock_cache_intra8x8_pred( h, 2*(i&1), 2*(i>>1),
1988                     a->i_predict8x8[i&1][i>>1] );
1989
1990             x264_mb_analyse_intra_chroma( h, a );
1991             break;
1992         case I_16x16:
1993             h->mb.i_intra16x16_pred_mode = a->i_predict16x16;
1994             x264_mb_analyse_intra_chroma( h, a );
1995             break;
1996
1997         case P_L0:
1998             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
1999             switch( h->mb.i_partition )
2000             {
2001                 case D_16x16:
2002                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
2003                     break;
2004
2005                 case D_16x8:
2006                     x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv[0], a->l0.me16x8[0].mv[1] );
2007                     x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv[0], a->l0.me16x8[1].mv[1] );
2008                     break;
2009
2010                 case D_8x16:
2011                     x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv[0], a->l0.me8x16[0].mv[1] );
2012                     x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv[0], a->l0.me8x16[1].mv[1] );
2013                     break;
2014
2015                 default:
2016                     x264_log( h, X264_LOG_ERROR, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
2017                     break;
2018             }
2019             break;
2020
2021         case P_8x8:
2022             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
2023             for( i = 0; i < 4; i++ )
2024                 x264_mb_cache_mv_p8x8( h, a, i );
2025             break;
2026
2027         case P_SKIP:
2028         {
2029             int mvp[2];
2030             x264_mb_predict_mv_pskip( h, mvp );
2031             /* */
2032             h->mb.i_partition = D_16x16;
2033             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
2034             x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
2035             break;
2036         }
2037
2038         case B_SKIP:
2039             /* nothing has changed since x264_macroblock_probe_bskip */
2040             break;
2041         case B_DIRECT:
2042             x264_mb_load_mv_direct8x8( h, 0 );
2043             x264_mb_load_mv_direct8x8( h, 1 );
2044             x264_mb_load_mv_direct8x8( h, 2 );
2045             x264_mb_load_mv_direct8x8( h, 3 );
2046             break;
2047
2048         case B_8x8:
2049             /* optimize: cache might not need to be rewritten */
2050             for( i = 0; i < 4; i++ )
2051                 x264_mb_cache_mv_b8x8( h, a, i, 1 );
2052             break;
2053
2054         default: /* the rest of the B types */
2055             switch( h->mb.i_partition )
2056             {
2057             case D_16x16:
2058                 switch( h->mb.i_type )
2059                 {
2060                 case B_L0_L0:
2061                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
2062                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
2063
2064                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
2065                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1,  0, 0 );
2066                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1,  0, 0 );
2067                     break;
2068                 case B_L1_L1:
2069                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
2070                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0,  0, 0 );
2071                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0,  0, 0 );
2072
2073                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
2074                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
2075                     break;
2076                 case B_BI_BI:
2077                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
2078                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
2079
2080                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
2081                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
2082                     break;
2083                 }
2084                 break;
2085             case D_16x8:
2086                 x264_mb_cache_mv_b16x8( h, a, 0, 1 );
2087                 x264_mb_cache_mv_b16x8( h, a, 1, 1 );
2088                 break;
2089             case D_8x16:
2090                 x264_mb_cache_mv_b8x16( h, a, 0, 1 );
2091                 x264_mb_cache_mv_b8x16( h, a, 1, 1 );
2092                 break;
2093             default:
2094                 x264_log( h, X264_LOG_ERROR, "internal error (invalid MB type)\n" );
2095                 break;
2096             }
2097     }
2098 }
2099
2100 #include "slicetype_decision.c"
2101