git.sesse.net Git - x264/blob - encoder/analyse.c

   1 /*****************************************************************************
   2  * analyse.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27 #include <math.h>
  28 #include <limits.h>
  29
  30 #include "../common/common.h"
  31 #include "../common/macroblock.h"
  32 #include "macroblock.h"
  33 #include "me.h"
  34 #include "ratecontrol.h"
  35
  36 typedef struct
  37 {
  38     /* 16x16 */
  39     int i_ref;
  40     x264_me_t me16x16;
  41
  42     /* 8x8 */
  43     int       i_cost8x8;
  44     x264_me_t me8x8[4];
  45
  46     /* Sub 4x4 */
  47     int       i_cost4x4[4]; /* cost per 8x8 partition */
  48     x264_me_t me4x4[4][4];
  49
  50     /* Sub 8x4 */
  51     int       i_cost8x4[4]; /* cost per 8x8 partition */
  52     x264_me_t me8x4[4][2];
  53
  54     /* Sub 4x8 */
  55     int       i_cost4x8[4]; /* cost per 8x8 partition */
  56     x264_me_t me4x8[4][4];
  57
  58     /* 16x8 */
  59     int       i_cost16x8;
  60     x264_me_t me16x8[2];
  61
  62     /* 8x16 */
  63     int       i_cost8x16;
  64     x264_me_t me8x16[2];
  65
  66 } x264_mb_analysis_list_t;
  67
  68 typedef struct
  69 {
  70     /* conduct the analysis using this lamda and QP */
  71     int i_lambda;
  72     int i_qp;
  73
  74
  75     /* I: Intra part */
  76     /* Luma part 16x16 and 4x4 modes stats */
  77     int i_sad_i16x16;
  78     int i_predict16x16;
  79
  80     int i_sad_i4x4;
  81     int i_predict4x4[4][4];
  82
  83     /* Chroma part */
  84     int i_sad_i8x8;
  85     int i_predict8x8;
  86
  87     /* II: Inter part P/B frame */
  88     int i_mv_range;
  89
  90     x264_mb_analysis_list_t l0;
  91     x264_mb_analysis_list_t l1;
  92
  93     int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
  94     int i_cost16x16direct;
  95     int i_cost8x8bi;
  96     int i_cost8x8direct[4];
  97     int i_cost16x8bi;
  98     int i_cost8x16bi;
  99
 100     int i_mb_partition16x8[2]; /* mb_partition_e */
 101     int i_mb_partition8x16[2];
 102     int i_mb_type16x8; /* mb_class_e */
 103     int i_mb_type8x16;
 104
 105     int b_direct_available;
 106
 107 } x264_mb_analysis_t;
 108
 109 static const int i_qp0_cost_table[52] = {
 110    1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
 111    1, 1, 1, 1,              /*  8-11 */
 112    1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
 113    3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
 114    6, 7, 8, 9,10,11,13,14,  /* 28-35 */
 115   16,18,20,23,25,29,32,36,  /* 36-43 */
 116   40,45,51,57,64,72,81,91   /* 44-51 */
 117 };
 118
 119 static const uint8_t block_idx_x[16] = {
 120     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
 121 };
 122 static const uint8_t block_idx_y[16] = {
 123     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
 124 };
 125
 126 /* TODO: calculate CABAC costs */
 127 static const int i_mb_b_cost_table[18] = {
 128     9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
 129 };
 130 static const int i_mb_b16x8_cost_table[16] = {
 131     0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
 132 };
 133 static const int i_sub_mb_b_cost_table[13] = {
 134     7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
 135 };
 136 static const int i_sub_mb_p_cost_table[4] = {
 137     5, 3, 3, 1
 138 };
 139
 140 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
 141 {
 142     memset( a, 0, sizeof( x264_mb_analysis_t ) );
 143
 144     /* conduct the analysis using this lamda and QP */
 145     a->i_qp = i_qp;
 146     a->i_lambda = i_qp0_cost_table[i_qp];
 147
 148     /* I: Intra part */
 149     a->i_sad_i16x16 = -1;
 150     a->i_sad_i4x4   = -1;
 151     a->i_sad_i8x8   = -1;
 152
 153     /* II: Inter part P/B frame */
 154     if( h->sh.i_type != SLICE_TYPE_I )
 155     {
 156         int dmb;
 157         int i;
 158
 159         /* Calculate max start MV range */
 160         dmb = h->mb.i_mb_x;
 161         if( h->mb.i_mb_y < dmb )
 162             dmb = h->mb.i_mb_y;
 163         if( h->sps->i_mb_width - h->mb.i_mb_x < dmb )
 164             dmb = h->sps->i_mb_width - h->mb.i_mb_x;
 165         if( h->sps->i_mb_height - h->mb.i_mb_y < dmb )
 166             dmb = h->sps->i_mb_height - h->mb.i_mb_y;
 167
 168         a->i_mv_range = 16*dmb + 8;
 169
 170         a->l0.me16x16.cost = -1;
 171         a->l0.i_cost8x8    = -1;
 172
 173         for( i = 0; i < 4; i++ )
 174         {
 175             a->l0.i_cost4x4[i] = -1;
 176             a->l0.i_cost8x4[i] = -1;
 177             a->l0.i_cost4x8[i] = -1;
 178         }
 179
 180         a->l0.i_cost16x8   = -1;
 181         a->l0.i_cost8x16   = -1;
 182         if( h->sh.i_type == SLICE_TYPE_B )
 183         {
 184             a->l1.me16x16.cost = -1;
 185             a->l1.i_cost8x8    = -1;
 186
 187             for( i = 0; i < 4; i++ )
 188             {
 189                 a->l1.i_cost4x4[i] = -1;
 190                 a->l1.i_cost8x4[i] = -1;
 191                 a->l1.i_cost4x8[i] = -1;
 192                 a->i_cost8x8direct[i] = -1;
 193             }
 194
 195             a->l1.i_cost16x8   = -1;
 196             a->l1.i_cost8x16   = -1;
 197
 198             a->i_cost16x16bi   = -1;
 199             a->i_cost16x16direct = -1;
 200             a->i_cost8x8bi     = -1;
 201             a->i_cost16x8bi    = -1;
 202             a->i_cost8x16bi    = -1;
 203         }
 204     }
 205 }
 206
 207
 208
 209 /*
 210  * Handle intra mb
 211  */
 212 /* Max = 4 */
 213 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 214 {
 215     if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 216     {
 217         /* top and left avaible */
 218         *mode++ = I_PRED_16x16_V;
 219         *mode++ = I_PRED_16x16_H;
 220         *mode++ = I_PRED_16x16_DC;
 221         *mode++ = I_PRED_16x16_P;
 222         *pi_count = 4;
 223     }
 224     else if( ( i_neighbour & MB_LEFT ) )
 225     {
 226         /* left available*/
 227         *mode++ = I_PRED_16x16_DC_LEFT;
 228         *mode++ = I_PRED_16x16_H;
 229         *pi_count = 2;
 230     }
 231     else if( ( i_neighbour & MB_TOP ) )
 232     {
 233         /* top available*/
 234         *mode++ = I_PRED_16x16_DC_TOP;
 235         *mode++ = I_PRED_16x16_V;
 236         *pi_count = 2;
 237     }
 238     else
 239     {
 240         /* none avaible */
 241         *mode = I_PRED_16x16_DC_128;
 242         *pi_count = 1;
 243     }
 244 }
 245
 246 /* Max = 4 */
 247 static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 248 {
 249     if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 250     {
 251         /* top and left avaible */
 252         *mode++ = I_PRED_CHROMA_V;
 253         *mode++ = I_PRED_CHROMA_H;
 254         *mode++ = I_PRED_CHROMA_DC;
 255         *mode++ = I_PRED_CHROMA_P;
 256         *pi_count = 4;
 257     }
 258     else if( ( i_neighbour & MB_LEFT ) )
 259     {
 260         /* left available*/
 261         *mode++ = I_PRED_CHROMA_DC_LEFT;
 262         *mode++ = I_PRED_CHROMA_H;
 263         *pi_count = 2;
 264     }
 265     else if( ( i_neighbour & MB_TOP ) )
 266     {
 267         /* top available*/
 268         *mode++ = I_PRED_CHROMA_DC_TOP;
 269         *mode++ = I_PRED_CHROMA_V;
 270         *pi_count = 2;
 271     }
 272     else
 273     {
 274         /* none avaible */
 275         *mode = I_PRED_CHROMA_DC_128;
 276         *pi_count = 1;
 277     }
 278 }
 279
 280 /* MAX = 8 */
 281 static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
 282 {
 283     int b_a, b_b, b_c;
 284     static const unsigned int needmb[16] =
 285     {
 286         MB_LEFT|MB_TOP, MB_TOP,
 287         MB_LEFT,        MB_PRIVATE,
 288         MB_TOP,         MB_TOP|MB_TOPRIGHT,
 289         0,              MB_PRIVATE,
 290         MB_LEFT,        0,
 291         MB_LEFT,        MB_PRIVATE,
 292         0,              MB_PRIVATE,
 293         0,              MB_PRIVATE
 294     };
 295
 296     /* FIXME even when b_c == 0 there is some case where missing pixels
 297      * are emulated and thus more mode are available TODO
 298      * analysis and encode should be fixed too */
 299     b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
 300     b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
 301     b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
 302
 303     if( b_a && b_b )
 304     {
 305         *mode++ = I_PRED_4x4_DC;
 306         *mode++ = I_PRED_4x4_H;
 307         *mode++ = I_PRED_4x4_V;
 308         *mode++ = I_PRED_4x4_DDR;
 309         *mode++ = I_PRED_4x4_VR;
 310         *mode++ = I_PRED_4x4_HD;
 311         *mode++ = I_PRED_4x4_HU;
 312
 313         *pi_count = 7;
 314
 315         if( b_c )
 316         {
 317             *mode++ = I_PRED_4x4_DDL;
 318             *mode++ = I_PRED_4x4_VL;
 319             (*pi_count) += 2;
 320         }
 321     }
 322     else if( b_a && !b_b )
 323     {
 324         *mode++ = I_PRED_4x4_DC_LEFT;
 325         *mode++ = I_PRED_4x4_H;
 326         *mode++ = I_PRED_4x4_HU;
 327         *pi_count = 3;
 328     }
 329     else if( !b_a && b_b )
 330     {
 331         *mode++ = I_PRED_4x4_DC_TOP;
 332         *mode++ = I_PRED_4x4_V;
 333         *pi_count = 2;
 334     }
 335     else
 336     {
 337         *mode++ = I_PRED_4x4_DC_128;
 338         *pi_count = 1;
 339     }
 340 }
 341
 342 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res )
 343 {
 344     const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
 345     const int i_stride = h->mb.pic.i_stride[0];
 346     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 347     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 348
 349     int i, idx;
 350
 351     int i_max;
 352     int predict_mode[9];
 353
 354     /*---------------- Try all mode and calculate their score ---------------*/
 355
 356     /* 16x16 prediction selection */
 357     predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 358     for( i = 0; i < i_max; i++ )
 359     {
 360         int i_sad;
 361         int i_mode;
 362
 363         i_mode = predict_mode[i];
 364
 365         /* we do the prediction */
 366         h->predict_16x16[i_mode]( p_dst, i_stride );
 367
 368         /* we calculate the diff and get the square sum of the diff */
 369         i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
 370                 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
 371         /* if i_score is lower it is better */
 372         if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
 373         {
 374             res->i_predict16x16 = i_mode;
 375             res->i_sad_i16x16     = i_sad;
 376         }
 377     }
 378
 379     /* 4x4 prediction selection */
 380     if( flags & X264_ANALYSE_I4x4 )
 381     {
 382         res->i_sad_i4x4 = 0;
 383         for( idx = 0; idx < 16; idx++ )
 384         {
 385             uint8_t *p_src_by;
 386             uint8_t *p_dst_by;
 387             int     i_best;
 388             int x, y;
 389             int i_pred_mode;
 390
 391             i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
 392             x = block_idx_x[idx];
 393             y = block_idx_y[idx];
 394
 395             p_src_by = p_src + 4 * x + 4 * y * i_stride;
 396             p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
 397
 398             i_best = -1;
 399             predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
 400             for( i = 0; i < i_max; i++ )
 401             {
 402                 int i_sad;
 403                 int i_mode;
 404
 405                 i_mode = predict_mode[i];
 406
 407                 /* we do the prediction */
 408                 h->predict_4x4[i_mode]( p_dst_by, i_stride );
 409
 410                 /* we calculate diff and get the square sum of the diff */
 411                 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
 412                                                  p_src_by, i_stride );
 413
 414                 i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
 415
 416                 /* if i_score is lower it is better */
 417                 if( i_best == -1 || i_best > i_sad )
 418                 {
 419                     res->i_predict4x4[x][y] = i_mode;
 420                     i_best = i_sad;
 421                 }
 422             }
 423             res->i_sad_i4x4 += i_best;
 424
 425             /* we need to encode this mb now (for next ones) */
 426             h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
 427             x264_mb_encode_i4x4( h, idx, res->i_qp );
 428
 429             /* we need to store the 'fixed' version */
 430             h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
 431                 x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
 432         }
 433         res->i_sad_i4x4 += res->i_lambda * 24;    /* from JVT (SATD0) */
 434     }
 435 }
 436
 437 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
 438 {
 439     int i;
 440
 441     int i_max;
 442     int predict_mode[9];
 443
 444     uint8_t *p_dstc[2], *p_srcc[2];
 445     int      i_stride[2];
 446
 447     /* 8x8 prediction selection for chroma */
 448     p_dstc[0] = h->mb.pic.p_fdec[1];
 449     p_dstc[1] = h->mb.pic.p_fdec[2];
 450     p_srcc[0] = h->mb.pic.p_fenc[1];
 451     p_srcc[1] = h->mb.pic.p_fenc[2];
 452
 453     i_stride[0] = h->mb.pic.i_stride[1];
 454     i_stride[1] = h->mb.pic.i_stride[2];
 455
 456     predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 457     res->i_sad_i8x8 = -1;
 458     for( i = 0; i < i_max; i++ )
 459     {
 460         int i_sad;
 461         int i_mode;
 462
 463         i_mode = predict_mode[i];
 464
 465         /* we do the prediction */
 466         h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
 467         h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
 468
 469         /* we calculate the cost */
 470         i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
 471                                          p_srcc[0], i_stride[0] ) +
 472                 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
 473                                          p_srcc[1], i_stride[1] ) +
 474                 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
 475
 476         /* if i_score is lower it is better */
 477         if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
 478         {
 479             res->i_predict8x8 = i_mode;
 480             res->i_sad_i8x8     = i_sad;
 481         }
 482     }
 483 }
 484
 485 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
 486 {
 487     x264_me_t m;
 488     int i_ref;
 489     int mvc[4][2], i_mvc;
 490     int i_fullpel_thresh = INT_MAX;
 491     int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
 492
 493     /* 16x16 Search on all ref frame */
 494     m.i_pixel = PIXEL_16x16;
 495     m.lm      = a->i_lambda;
 496     m.p_fenc  = h->mb.pic.p_fenc[0];
 497     m.i_stride= h->mb.pic.i_stride[0];
 498     m.i_mv_range = a->i_mv_range;
 499
 500     a->l0.me16x16.cost = INT_MAX;
 501     for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
 502     {
 503         const int i_ref_cost = m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
 504         i_fullpel_thresh -= i_ref_cost;
 505
 506         /* search with ref */
 507         m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
 508         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
 509         x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
 510         x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
 511
 512         m.cost += i_ref_cost;
 513         i_fullpel_thresh += i_ref_cost;
 514
 515         if( m.cost < a->l0.me16x16.cost )
 516         {
 517             a->l0.i_ref = i_ref;
 518             a->l0.me16x16 = m;
 519         }
 520
 521         /* save mv for predicting neighbors */
 522         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
 523         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
 524     }
 525
 526     /* subtract ref cost, so we don't have to add it for the other P types */
 527     a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
 528
 529     /* Set global ref, needed for all others modes */
 530     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
 531 }
 532
 533 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
 534 {
 535     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 536     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 537     int mvc[5][2], i_mvc;
 538     int i;
 539
 540     /* XXX Needed for x264_mb_predict_mv */
 541     h->mb.i_partition = D_8x8;
 542
 543     i_mvc = 1;
 544     mvc[0][0] = a->l0.me16x16.mv[0];
 545     mvc[0][1] = a->l0.me16x16.mv[1];
 546
 547     for( i = 0; i < 4; i++ )
 548     {
 549         x264_me_t *m = &a->l0.me8x8[i];
 550         const int x8 = i%2;
 551         const int y8 = i/2;
 552
 553         m->i_pixel = PIXEL_8x8;
 554         m->lm      = a->i_lambda;
 555
 556         m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
 557         m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
 558         m->i_stride= h->mb.pic.i_stride[0];
 559         m->i_mv_range = a->i_mv_range;
 560
 561         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 562         x264_me_search( h, m, mvc, i_mvc );
 563
 564         x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
 565
 566         mvc[i_mvc][0] = m->mv[0];
 567         mvc[i_mvc][1] = m->mv[1];
 568         i_mvc++;
 569
 570         /* mb type cost */
 571         m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
 572     }
 573
 574     a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
 575                    a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
 576 }
 577
 578 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
 579 {
 580     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 581     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 582     int mvc[2][2];
 583     int i;
 584
 585     /* XXX Needed for x264_mb_predict_mv */
 586     h->mb.i_partition = D_16x8;
 587
 588     for( i = 0; i < 2; i++ )
 589     {
 590         x264_me_t *m = &a->l0.me16x8[i];
 591
 592         m->i_pixel = PIXEL_16x8;
 593         m->lm      = a->i_lambda;
 594
 595         m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
 596         m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
 597         m->i_stride= h->mb.pic.i_stride[0];
 598         m->i_mv_range = a->i_mv_range;
 599
 600         mvc[0][0] = a->l0.me8x8[2*i].mv[0];
 601         mvc[0][1] = a->l0.me8x8[2*i].mv[1];
 602         mvc[1][0] = a->l0.me8x8[2*i+1].mv[0];
 603         mvc[1][1] = a->l0.me8x8[2*i+1].mv[1];
 604
 605         x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
 606         x264_me_search( h, m, mvc, 2 );
 607
 608         x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
 609     }
 610
 611     a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
 612 }
 613
 614 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
 615 {
 616     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 617     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 618     int mvc[2][2];
 619     int i;
 620
 621     /* XXX Needed for x264_mb_predict_mv */
 622     h->mb.i_partition = D_8x16;
 623
 624     for( i = 0; i < 2; i++ )
 625     {
 626         x264_me_t *m = &a->l0.me8x16[i];
 627
 628         m->i_pixel = PIXEL_8x16;
 629         m->lm      = a->i_lambda;
 630
 631         m->p_fenc  = &p_fenc[8*i];
 632         m->p_fref  = &p_fref[8*i];
 633         m->i_stride= h->mb.pic.i_stride[0];
 634         m->i_mv_range = a->i_mv_range;
 635
 636         mvc[0][0] = a->l0.me8x8[i].mv[0];
 637         mvc[0][1] = a->l0.me8x8[i].mv[1];
 638         mvc[1][0] = a->l0.me8x8[i+2].mv[0];
 639         mvc[1][1] = a->l0.me8x8[i+2].mv[1];
 640
 641         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 642         x264_me_search( h, m, mvc, 2 );
 643
 644         x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
 645     }
 646
 647     a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
 648 }
 649
 650 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 651 {
 652     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 653     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 654
 655     int i4x4;
 656
 657     /* XXX Needed for x264_mb_predict_mv */
 658     h->mb.i_partition = D_8x8;
 659
 660     for( i4x4 = 0; i4x4 < 4; i4x4++ )
 661     {
 662         const int idx = 4*i8x8 + i4x4;
 663         const int x4 = block_idx_x[idx];
 664         const int y4 = block_idx_y[idx];
 665         const int i_mvc = (i4x4 == 0);
 666
 667         x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
 668
 669         m->i_pixel = PIXEL_4x4;
 670         m->lm      = a->i_lambda;
 671
 672         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 673         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 674         m->i_stride= h->mb.pic.i_stride[0];
 675         m->i_mv_range = a->i_mv_range;
 676
 677         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
 678         x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
 679
 680         x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
 681     }
 682
 683     a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
 684                          a->l0.me4x4[i8x8][1].cost +
 685                          a->l0.me4x4[i8x8][2].cost +
 686                          a->l0.me4x4[i8x8][3].cost +
 687                          a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x4];
 688 }
 689
 690 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 691 {
 692     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 693     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 694
 695     int i8x4;
 696
 697     /* XXX Needed for x264_mb_predict_mv */
 698     h->mb.i_partition = D_8x8;
 699
 700     for( i8x4 = 0; i8x4 < 2; i8x4++ )
 701     {
 702         const int idx = 4*i8x8 + 2*i8x4;
 703         const int x4 = block_idx_x[idx];
 704         const int y4 = block_idx_y[idx];
 705         const int i_mvc = (i8x4 == 0);
 706
 707         x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
 708
 709         m->i_pixel = PIXEL_8x4;
 710         m->lm      = a->i_lambda;
 711
 712         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 713         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 714         m->i_stride= h->mb.pic.i_stride[0];
 715         m->i_mv_range = a->i_mv_range;
 716
 717         x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
 718         x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
 719
 720         x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
 721     }
 722
 723     a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +
 724                             a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x4];
 725 }
 726
 727 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 728 {
 729     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 730     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 731
 732     int i4x8;
 733
 734     /* XXX Needed for x264_mb_predict_mv */
 735     h->mb.i_partition = D_8x8;
 736
 737     for( i4x8 = 0; i4x8 < 2; i4x8++ )
 738     {
 739         const int idx = 4*i8x8 + i4x8;
 740         const int x4 = block_idx_x[idx];
 741         const int y4 = block_idx_y[idx];
 742         const int i_mvc = (i4x8 == 0);
 743
 744         x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
 745
 746         m->i_pixel = PIXEL_4x8;
 747         m->lm      = a->i_lambda;
 748
 749         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 750         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 751         m->i_stride= h->mb.pic.i_stride[0];
 752         m->i_mv_range = a->i_mv_range;
 753
 754         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
 755         x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
 756
 757         x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
 758     }
 759
 760     a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +
 761                             a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x8];
 762 }
 763
 764 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
 765 {
 766     /* Assumes that fdec still contains the results of
 767      * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
 768
 769     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
 770     uint8_t *p_fdec = h->mb.pic.p_fdec[0];
 771     int i_stride= h->mb.pic.i_stride[0];
 772     int i;
 773
 774     a->i_cost16x16direct = 0;
 775     for( i = 0; i < 4; i++ )
 776     {
 777         const int x8 = i%2;
 778         const int y8 = i/2;
 779         const int off = 8 * x8 + 8 * i_stride * y8;
 780         a->i_cost16x16direct +=
 781         a->i_cost8x8direct[i] =
 782             h->pixf.satd[PIXEL_8x8]( &p_fenc[off], i_stride, &p_fdec[off], i_stride );
 783
 784         /* mb type cost */
 785         a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
 786     }
 787
 788     a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
 789 }
 790
 791 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
 792 {
 793     uint8_t pix1[16*16], pix2[16*16];
 794
 795     x264_me_t m;
 796     int i_ref;
 797
 798     /* 16x16 Search on all ref frame */
 799     m.i_pixel = PIXEL_16x16;
 800     m.lm      = a->i_lambda;
 801     m.p_fenc  = h->mb.pic.p_fenc[0];
 802     m.i_stride= h->mb.pic.i_stride[0];
 803     m.i_mv_range = a->i_mv_range;
 804
 805     /* ME for List 0 */
 806     a->l0.me16x16.cost = INT_MAX;
 807     for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
 808     {
 809         /* search with ref */
 810         m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
 811         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
 812         x264_me_search( h, &m, NULL, 0 );
 813
 814         /* add ref cost */
 815         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
 816
 817         if( m.cost < a->l0.me16x16.cost )
 818         {
 819             a->l0.i_ref = i_ref;
 820             a->l0.me16x16 = m;
 821         }
 822     }
 823     /* subtract ref cost, so we don't have to add it for the other MB types */
 824     a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
 825
 826     /* ME for list 1 */
 827     a->l1.me16x16.cost = INT_MAX;
 828     for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
 829     {
 830         /* search with ref */
 831         m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
 832         x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
 833         x264_me_search( h, &m, NULL, 0 );
 834
 835         /* add ref cost */
 836         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
 837
 838         if( m.cost < a->l1.me16x16.cost )
 839         {
 840             a->l1.i_ref = i_ref;
 841             a->l1.me16x16 = m;
 842         }
 843     }
 844     /* subtract ref cost, so we don't have to add it for the other MB types */
 845     a->l1.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref );
 846
 847     /* Set global ref, needed for other modes? */
 848     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
 849     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
 850
 851     /* get cost of BI mode */
 852     h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
 853                     pix1, 16,
 854                     a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
 855                     16, 16 );
 856     h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
 857                     pix2, 16,
 858                     a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
 859                     16, 16 );
 860     h->pixf.avg[PIXEL_16x16]( pix1, 16, pix2, 16 );
 861
 862     a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 ) +
 863                        a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref ) +
 864                                        bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) +
 865                                        bs_size_se( a->l0.me16x16.mv[0] - a->l0.me16x16.mvp[0] ) +
 866                                        bs_size_se( a->l0.me16x16.mv[1] - a->l0.me16x16.mvp[1] ) +
 867                                        bs_size_se( a->l1.me16x16.mv[0] - a->l1.me16x16.mvp[0] ) +
 868                                        bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
 869
 870     /* mb type cost */
 871     a->i_cost16x16bi   += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
 872     a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
 873     a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
 874 }
 875
 876 #define CACHE_MV_BI(x,y,dx,dy,me0,me1,part) \
 877     if( x264_mb_partition_listX_table[0][part] ) \
 878     { \
 879         x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
 880         x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
 881     } \
 882     else \
 883     { \
 884         x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
 885         x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, 0, 0 ); \
 886         if( b_mvd ) \
 887             x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
 888     } \
 889     if( x264_mb_partition_listX_table[1][part] ) \
 890     { \
 891         x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
 892         x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
 893     } \
 894     else \
 895     { \
 896         x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
 897         x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, 0, 0 ); \
 898         if( b_mvd ) \
 899             x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
 900     }
 901
 902 static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 903 {
 904     int x = (i%2)*2;
 905     int y = (i/2)*2;
 906     if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
 907     {
 908         x264_mb_load_mv_direct8x8( h, i );
 909         if( b_mvd )
 910         {
 911             x264_macroblock_cache_mvd(  h, x, y, 2, 2, 0, 0, 0 );
 912             x264_macroblock_cache_mvd(  h, x, y, 2, 2, 1, 0, 0 );
 913             x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
 914         }
 915     }
 916     else
 917     {
 918         CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
 919     }
 920 }
 921 static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 922 {
 923     CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
 924 }
 925 static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 926 {
 927     CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
 928 }
 929 #undef CACHE_MV_BI
 930
 931 static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
 932 {
 933     uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
 934                            h->mb.pic.p_fref[1][a->l1.i_ref][0] };
 935     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
 936     uint8_t pix[2][8*8];
 937     int i, l;
 938
 939     /* XXX Needed for x264_mb_predict_mv */
 940     h->mb.i_partition = D_8x8;
 941
 942     a->i_cost8x8bi = 0;
 943
 944     for( i = 0; i < 4; i++ )
 945     {
 946         const int x8 = i%2;
 947         const int y8 = i/2;
 948         uint8_t *p_fenc_i = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
 949         int i_part_cost;
 950         int i_part_cost_bi = 0;
 951
 952         for( l = 0; l < 2; l++ )
 953         {
 954             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
 955             x264_me_t *m = &lX->me8x8[i];
 956
 957             m->i_pixel = PIXEL_8x8;
 958             m->lm      = a->i_lambda;
 959
 960             m->p_fenc = p_fenc_i;
 961             m->p_fref = &p_fref[l][8*(y8*h->mb.pic.i_stride[0]+x8)];
 962             m->i_stride = h->mb.pic.i_stride[0];
 963             m->i_mv_range = a->i_mv_range;
 964
 965             x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
 966             x264_me_search( h, m, &lX->me16x16.mv, 1 );
 967
 968             x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
 969             lX->i_cost8x8 += m->cost;
 970
 971             /* BI mode */
 972             h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
 973                             m->mv[0], m->mv[1], 8, 8 );
 974             /* FIXME: ref cost */
 975             i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
 976                                               bs_size_se( m->mv[1] - m->mvp[1] ) +
 977                                               i_sub_mb_b_cost_table[D_L0_8x8] );
 978         }
 979
 980         h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 );
 981         i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 )
 982                         + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
 983
 984         i_part_cost = a->l0.me8x8[i].cost;
 985         h->mb.i_sub_partition[i] = D_L0_8x8;
 986         if( a->l1.me8x8[i].cost < i_part_cost )
 987         {
 988             i_part_cost = a->l1.me8x8[i].cost;
 989             h->mb.i_sub_partition[i] = D_L1_8x8;
 990         }
 991         if( i_part_cost_bi < i_part_cost )
 992         {
 993             i_part_cost = i_part_cost_bi;
 994             h->mb.i_sub_partition[i] = D_BI_8x8;
 995         }
 996         if( a->i_cost8x8direct[i] < i_part_cost && a->i_cost8x8direct[i] >= 0)
 997         {
 998             i_part_cost = a->i_cost8x8direct[i];
 999             h->mb.i_sub_partition[i] = D_DIRECT_8x8;
1000         }
1001         a->i_cost8x8bi += i_part_cost;
1002
1003         /* XXX Needed for x264_mb_predict_mv */
1004         x264_mb_cache_mv_b8x8( h, a, i, 0 );
1005     }
1006
1007     /* mb type cost */
1008     a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
1009 }
1010
1011 static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
1012 {
1013     uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1014                            h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1015     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1016     uint8_t pix[2][8*8];
1017     int i_ref_stride = h->mb.pic.i_stride[0];
1018     int mvc[2][2];
1019     int i, l;
1020
1021     h->mb.i_partition = D_16x8;
1022     a->i_cost16x8bi = 0;
1023
1024     for( i = 0; i < 2; i++ )
1025     {
1026         uint8_t *p_fenc_i = &p_fenc[8*i*i_ref_stride];
1027         int i_part_cost;
1028         int i_part_cost_bi = 0;
1029
1030         /* TODO: check only the list(s) that were used in b8x8? */
1031         for( l = 0; l < 2; l++ )
1032         {
1033             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1034             x264_me_t *m = &lX->me16x8[i];
1035
1036             m->i_pixel = PIXEL_16x8;
1037             m->lm      = a->i_lambda;
1038
1039             m->p_fenc  = p_fenc_i;
1040             m->i_stride= i_ref_stride;
1041             m->p_fref  = &p_fref[l][8*i*i_ref_stride];
1042             m->i_mv_range = a->i_mv_range;
1043
1044             mvc[0][0] = lX->me8x8[2*i].mv[0];
1045             mvc[0][1] = lX->me8x8[2*i].mv[1];
1046             mvc[1][0] = lX->me8x8[2*i+1].mv[0];
1047             mvc[1][1] = lX->me8x8[2*i+1].mv[1];
1048
1049             x264_mb_predict_mv( h, 0, 8*i, 2, m->mvp );
1050             x264_me_search( h, m, mvc, 2 );
1051
1052             /* BI mode */
1053             h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1054                             m->mv[0], m->mv[1], 8, 8 );
1055             /* FIXME: ref cost */
1056             i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1057                                               bs_size_se( m->mv[1] - m->mvp[1] ) );
1058         }
1059
1060         h->pixf.avg[PIXEL_16x8]( pix[0], 8, pix[1], 8 );
1061         i_part_cost_bi += h->pixf.satd[PIXEL_16x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1062
1063         i_part_cost = a->l0.me16x8[i].cost;
1064         a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
1065         if( a->l1.me16x8[i].cost < i_part_cost )
1066         {
1067             i_part_cost = a->l1.me16x8[i].cost;
1068             a->i_mb_partition16x8[i] = D_L1_8x8;
1069         }
1070         if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1071         {
1072             i_part_cost = i_part_cost_bi;
1073             a->i_mb_partition16x8[i] = D_BI_8x8;
1074         }
1075         a->i_cost16x8bi += i_part_cost;
1076
1077         if( i == 0 )
1078             x264_mb_cache_mv_b16x8( h, a, i, 0 );
1079     }
1080
1081     /* mb type cost */
1082     a->i_mb_type16x8 = B_L0_L0
1083         + (a->i_mb_partition16x8[0]>>2) * 3
1084         + (a->i_mb_partition16x8[1]>>2);
1085     a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
1086 }
1087 static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
1088 {
1089     uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1090                            h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1091     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1092     uint8_t pix[2][8*8];
1093     int i_ref_stride = h->mb.pic.i_stride[0];
1094     int mvc[2][2];
1095     int i, l;
1096
1097     h->mb.i_partition = D_8x16;
1098     a->i_cost8x16bi = 0;
1099
1100     for( i = 0; i < 2; i++ )
1101     {
1102         uint8_t *p_fenc_i = &p_fenc[8*i];
1103         int i_part_cost;
1104         int i_part_cost_bi = 0;
1105
1106         for( l = 0; l < 2; l++ )
1107         {
1108             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1109             x264_me_t *m = &lX->me8x16[i];
1110
1111             m->i_pixel = PIXEL_8x16;
1112             m->lm      = a->i_lambda;
1113
1114             m->p_fenc  = p_fenc_i;
1115             m->p_fref  = &p_fref[l][8*i];
1116             m->i_stride= i_ref_stride;
1117             m->i_mv_range = a->i_mv_range;
1118
1119             mvc[0][0] = lX->me8x8[i].mv[0];
1120             mvc[0][1] = lX->me8x8[i].mv[1];
1121             mvc[1][0] = lX->me8x8[i+2].mv[0];
1122             mvc[1][1] = lX->me8x8[i+2].mv[1];
1123
1124             x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
1125             x264_me_search( h, m, mvc, 2 );
1126
1127             /* BI mode */
1128             h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1129                             m->mv[0], m->mv[1], 8, 8 );
1130             /* FIXME: ref cost */
1131             i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1132                                               bs_size_se( m->mv[1] - m->mvp[1] ) );
1133         }
1134
1135         h->pixf.avg[PIXEL_8x16]( pix[0], 8, pix[1], 8 );
1136         i_part_cost_bi += h->pixf.satd[PIXEL_8x16]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1137
1138         i_part_cost = a->l0.me8x16[i].cost;
1139         a->i_mb_partition8x16[i] = D_L0_8x8;
1140         if( a->l1.me8x16[i].cost < i_part_cost )
1141         {
1142             i_part_cost = a->l1.me8x16[i].cost;
1143             a->i_mb_partition8x16[i] = D_L1_8x8;
1144         }
1145         if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1146         {
1147             i_part_cost = i_part_cost_bi;
1148             a->i_mb_partition8x16[i] = D_BI_8x8;
1149         }
1150         a->i_cost8x16bi += i_part_cost;
1151
1152         if( i == 0 )
1153             x264_mb_cache_mv_b8x16( h, a, i, 0 );
1154     }
1155
1156     /* mb type cost */
1157     a->i_mb_type8x16 = B_L0_L0
1158         + (a->i_mb_partition8x16[0]>>2) * 3
1159         + (a->i_mb_partition8x16[1]>>2);
1160     a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
1161 }
1162
1163 /*****************************************************************************
1164  * x264_macroblock_analyse:
1165  *****************************************************************************/
1166 void x264_macroblock_analyse( x264_t *h )
1167 {
1168     x264_mb_analysis_t analysis;
1169     int i;
1170
1171     h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp(h);
1172
1173     /* FIXME check if it's 12 */
1174     if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp < -12 )
1175         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp - 12;
1176     else if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp > 12 )
1177         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp + 12;
1178
1179     /* init analysis */
1180     x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
1181
1182     /*--------------------------- Do the analysis ---------------------------*/
1183     if( h->sh.i_type == SLICE_TYPE_I )
1184     {
1185         x264_mb_analyse_intra( h, &analysis );
1186
1187         if( analysis.i_sad_i4x4 >= 0 &&  analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
1188             h->mb.i_type = I_4x4;
1189         else
1190             h->mb.i_type = I_16x16;
1191     }
1192     else if( h->sh.i_type == SLICE_TYPE_P )
1193     {
1194         const unsigned int i_neighbour = h->mb.i_neighbour;
1195
1196         int b_skip = 0;
1197         int i_cost;
1198         int i_intra_cost, i_intra_type;
1199
1200         /* Fast P_SKIP detection */
1201         if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) ||
1202             ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) ||
1203             ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) ||
1204             ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) )
1205         {
1206             b_skip = x264_macroblock_probe_pskip( h );
1207         }
1208
1209         if( b_skip )
1210         {
1211             h->mb.i_type = P_SKIP;
1212             h->mb.i_partition = D_16x16;
1213         }
1214         else
1215         {
1216             const unsigned int flags = h->param.analyse.inter;
1217             int i_type;
1218             int i_partition;
1219
1220             x264_mb_analyse_inter_p16x16( h, &analysis );
1221             if( flags & X264_ANALYSE_PSUB16x16 )
1222                 x264_mb_analyse_inter_p8x8( h, &analysis );
1223
1224             /* Select best inter mode */
1225             i_type = P_L0;
1226             i_partition = D_16x16;
1227             i_cost = analysis.l0.me16x16.cost;
1228
1229             if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1230                 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
1231             {
1232                 int i;
1233
1234                 i_type = P_8x8;
1235                 i_partition = D_8x8;
1236                 h->mb.i_sub_partition[0] = D_L0_8x8;
1237                 h->mb.i_sub_partition[1] = D_L0_8x8;
1238                 h->mb.i_sub_partition[2] = D_L0_8x8;
1239                 h->mb.i_sub_partition[3] = D_L0_8x8;
1240
1241                 i_cost = analysis.l0.i_cost8x8;
1242
1243                 /* Do sub 8x8 */
1244                 if( flags & X264_ANALYSE_PSUB8x8 )
1245                 {
1246                     for( i = 0; i < 4; i++ )
1247                     {
1248                         x264_mb_analyse_inter_p4x4( h, &analysis, i );
1249                         if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
1250                         {
1251                             int i_cost8x8;
1252
1253                             h->mb.i_sub_partition[i] = D_L0_4x4;
1254                             i_cost8x8 = analysis.l0.i_cost4x4[i];
1255
1256                             x264_mb_analyse_inter_p8x4( h, &analysis, i );
1257                             if( analysis.l0.i_cost8x4[i] < analysis.l0.i_cost4x4[i] )
1258                             {
1259                                 h->mb.i_sub_partition[i] = D_L0_8x4;
1260                                 i_cost8x8 = analysis.l0.i_cost8x4[i];
1261                             }
1262
1263                             x264_mb_analyse_inter_p4x8( h, &analysis, i );
1264                             if( analysis.l0.i_cost4x8[i] < analysis.l0.i_cost4x4[i] )
1265                             {
1266                                 h->mb.i_sub_partition[i] = D_L0_4x8;
1267                                 i_cost8x8 = analysis.l0.i_cost4x8[i];
1268                             }
1269
1270                             i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
1271                         }
1272                     }
1273                 }
1274
1275                 /* Now do sub 16x8/8x16 */
1276                 x264_mb_analyse_inter_p16x8( h, &analysis );
1277                 if( analysis.l0.i_cost16x8 < i_cost )
1278                 {
1279                     i_type = P_L0;
1280                     i_partition = D_16x8;
1281                     i_cost = analysis.l0.i_cost16x8;
1282                 }
1283
1284                 x264_mb_analyse_inter_p8x16( h, &analysis );
1285                 if( analysis.l0.i_cost8x16 < i_cost )
1286                 {
1287                     i_type = P_L0;
1288                     i_partition = D_8x16;
1289                     i_cost = analysis.l0.i_cost8x16;
1290                 }
1291             }
1292
1293             h->mb.i_type = i_type;
1294             h->mb.i_partition = i_partition;
1295
1296             /* refine qpel */
1297             if( h->mb.i_partition == D_16x16 )
1298             {
1299                 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1300                 i_cost = analysis.l0.me16x16.cost;
1301             }
1302             else if( h->mb.i_partition == D_16x8 )
1303             {
1304                 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
1305                 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
1306                 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
1307             }
1308             else if( h->mb.i_partition == D_8x16 )
1309             {
1310                 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
1311                 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
1312                 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
1313             }
1314             else if( h->mb.i_partition == D_8x8 )
1315             {
1316                 int i8x8;
1317                 i_cost = 0;
1318                 for( i8x8 = 0; i8x8 < 4; i8x8++ )
1319                 {
1320                     switch( h->mb.i_sub_partition[i8x8] )
1321                     {
1322                         case D_L0_8x8:
1323                             x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1324                             i_cost += analysis.l0.me8x8[i8x8].cost;
1325                             break;
1326                         case D_L0_8x4:
1327                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1328                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1329                             i_cost += analysis.l0.me8x4[i8x8][0].cost +
1330                                       analysis.l0.me8x4[i8x8][1].cost;
1331                             break;
1332                         case D_L0_4x8:
1333                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1334                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1335                             i_cost += analysis.l0.me4x8[i8x8][0].cost +
1336                                       analysis.l0.me4x8[i8x8][1].cost;
1337                             break;
1338
1339                         case D_L0_4x4:
1340                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1341                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1342                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1343                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1344                             i_cost += analysis.l0.me4x4[i8x8][0].cost +
1345                                       analysis.l0.me4x4[i8x8][1].cost +
1346                                       analysis.l0.me4x4[i8x8][2].cost +
1347                                       analysis.l0.me4x4[i8x8][3].cost;
1348                             break;
1349                         default:
1350                             fprintf( stderr, "internal error (!8x8 && !4x4)" );
1351                             break;
1352                     }
1353                 }
1354             }
1355
1356             x264_mb_analyse_intra( h, &analysis );
1357             i_intra_type = I_16x16;
1358             i_intra_cost = analysis.i_sad_i16x16;
1359
1360             if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_intra_cost )
1361             {
1362                 i_intra_type = I_4x4;
1363                 i_intra_cost = analysis.i_sad_i4x4;
1364             }
1365
1366             if( i_intra_cost >= 0 && i_intra_cost < i_cost )
1367             {
1368                 h->mb.i_type = i_intra_type;
1369                 i_cost = i_intra_cost;
1370             }
1371
1372             h->stat.frame.i_intra_cost += i_intra_cost;
1373             h->stat.frame.i_inter_cost += i_cost;
1374         }
1375     }
1376     else if( h->sh.i_type == SLICE_TYPE_B )
1377     {
1378         int b_skip = 0;
1379
1380         analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
1381         if( analysis.b_direct_available )
1382         {
1383             h->mb.i_type = B_SKIP;
1384             x264_mb_mc( h );
1385
1386             /* Conditioning the probe on neighboring block types
1387              * doesn't seem to help speed or quality. */
1388             b_skip = x264_macroblock_probe_bskip( h );
1389         }
1390
1391         if( !b_skip )
1392         {
1393             const unsigned int flags = h->param.analyse.inter;
1394             int i_partition;
1395             int i_cost;
1396
1397             /* select best inter mode */
1398             /* direct must be first */
1399             if( analysis.b_direct_available )
1400                 x264_mb_analyse_inter_direct( h, &analysis );
1401
1402             x264_mb_analyse_inter_b16x16( h, &analysis );
1403
1404             h->mb.i_type = B_L0_L0;
1405             i_partition = D_16x16;
1406             i_cost = analysis.l0.me16x16.cost;
1407             if( analysis.l1.me16x16.cost < i_cost )
1408             {
1409                 h->mb.i_type = B_L1_L1;
1410                 i_cost = analysis.l1.me16x16.cost;
1411             }
1412             if( analysis.i_cost16x16bi < i_cost )
1413             {
1414                 h->mb.i_type = B_BI_BI;
1415                 i_cost = analysis.i_cost16x16bi;
1416             }
1417             if( analysis.i_cost16x16direct < i_cost && analysis.i_cost16x16direct >= 0 )
1418             {
1419                 h->mb.i_type = B_DIRECT;
1420                 i_cost = analysis.i_cost16x16direct;
1421             }
1422
1423             if( flags & X264_ANALYSE_BSUB16x16 )
1424             {
1425                 x264_mb_analyse_inter_b8x8( h, &analysis );
1426                 if( analysis.i_cost8x8bi < i_cost )
1427                 {
1428                     h->mb.i_type = B_8x8;
1429                     i_partition = D_8x8;
1430                     i_cost = analysis.i_cost8x8bi;
1431
1432                     if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[1] ||
1433                         h->mb.i_sub_partition[2] == h->mb.i_sub_partition[3] )
1434                     {
1435                         x264_mb_analyse_inter_b16x8( h, &analysis );
1436                         if( analysis.i_cost16x8bi < i_cost )
1437                         {
1438                             i_partition = D_16x8;
1439                             i_cost = analysis.i_cost16x8bi;
1440                             h->mb.i_type = analysis.i_mb_type16x8;
1441                         }
1442                     }
1443                     if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
1444                         h->mb.i_sub_partition[1] == h->mb.i_sub_partition[3] )
1445                     {
1446                         x264_mb_analyse_inter_b8x16( h, &analysis );
1447                         if( analysis.i_cost8x16bi < i_cost )
1448                         {
1449                             i_partition = D_8x16;
1450                             i_cost = analysis.i_cost8x16bi;
1451                             h->mb.i_type = analysis.i_mb_type8x16;
1452                         }
1453                     }
1454                 }
1455             }
1456
1457             h->mb.i_partition = i_partition;
1458
1459             /* refine qpel */
1460             if( i_partition == D_16x16 )
1461             {
1462                 if( h->mb.i_type == B_L0_L0 )
1463                 {
1464                     analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1465                     x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1466                     analysis.l0.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1467                     i_cost = analysis.l0.me16x16.cost;
1468                 }
1469                 else if( h->mb.i_type == B_L1_L1 )
1470                 {
1471                     analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1472                     x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1473                     analysis.l1.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1474                     i_cost = analysis.l1.me16x16.cost;
1475                 }
1476             }
1477             /* TODO: refine bidir, 8x8 */
1478
1479             /* best intra mode */
1480             x264_mb_analyse_intra( h, &analysis );
1481             /* mb type cost */
1482             analysis.i_sad_i16x16 += analysis.i_lambda * i_mb_b_cost_table[I_16x16];
1483             analysis.i_sad_i4x4   += analysis.i_lambda * i_mb_b_cost_table[I_4x4];
1484
1485             if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1486             {
1487                 h->mb.i_type = I_16x16;
1488                 i_cost = analysis.i_sad_i16x16;
1489             }
1490             if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1491             {
1492                 h->mb.i_type = I_4x4;
1493                 i_cost = analysis.i_sad_i4x4;
1494             }
1495         }
1496     }
1497
1498     /*-------------------- Update MB from the analysis ----------------------*/
1499     h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
1500     switch( h->mb.i_type )
1501     {
1502         case I_4x4:
1503             for( i = 0; i < 16; i++ )
1504             {
1505                 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1506                     analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1507             }
1508
1509             x264_mb_analyse_intra_chroma( h, &analysis );
1510             h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1511             break;
1512         case I_16x16:
1513             h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
1514
1515             x264_mb_analyse_intra_chroma( h, &analysis );
1516             h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1517             break;
1518
1519         case P_L0:
1520             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1521             switch( h->mb.i_partition )
1522             {
1523                 case D_16x16:
1524                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1525                     break;
1526
1527                 case D_16x8:
1528                     x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].mv[0], analysis.l0.me16x8[0].mv[1] );
1529                     x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].mv[0], analysis.l0.me16x8[1].mv[1] );
1530                     break;
1531
1532                 case D_8x16:
1533                     x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].mv[0], analysis.l0.me8x16[0].mv[1] );
1534                     x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].mv[0], analysis.l0.me8x16[1].mv[1] );
1535                     break;
1536
1537                 default:
1538                     fprintf( stderr, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
1539                     break;
1540             }
1541             break;
1542
1543         case P_8x8:
1544             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1545             for( i = 0; i < 4; i++ )
1546             {
1547                 const int x = 2*(i%2);
1548                 const int y = 2*(i/2);
1549
1550                 switch( h->mb.i_sub_partition[i] )
1551                 {
1552                     case D_L0_8x8:
1553                         x264_macroblock_cache_mv( h, x, y, 2, 2, 0, analysis.l0.me8x8[i].mv[0], analysis.l0.me8x8[i].mv[1] );
1554                         break;
1555                     case D_L0_8x4:
1556                         x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, analysis.l0.me8x4[i][0].mv[0], analysis.l0.me8x4[i][0].mv[1] );
1557                         x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, analysis.l0.me8x4[i][1].mv[0], analysis.l0.me8x4[i][1].mv[1] );
1558                         break;
1559                     case D_L0_4x8:
1560                         x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, analysis.l0.me4x8[i][0].mv[0], analysis.l0.me4x8[i][0].mv[1] );
1561                         x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, analysis.l0.me4x8[i][1].mv[0], analysis.l0.me4x8[i][1].mv[1] );
1562                         break;
1563                     case D_L0_4x4:
1564                         x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, analysis.l0.me4x4[i][0].mv[0], analysis.l0.me4x4[i][0].mv[1] );
1565                         x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, analysis.l0.me4x4[i][1].mv[0], analysis.l0.me4x4[i][1].mv[1] );
1566                         x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, analysis.l0.me4x4[i][2].mv[0], analysis.l0.me4x4[i][2].mv[1] );
1567                         x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, analysis.l0.me4x4[i][3].mv[0], analysis.l0.me4x4[i][3].mv[1] );
1568                         break;
1569                     default:
1570                         fprintf( stderr, "internal error\n" );
1571                         break;
1572                 }
1573             }
1574             break;
1575
1576         case P_SKIP:
1577         {
1578             int mvp[2];
1579             x264_mb_predict_mv_pskip( h, mvp );
1580             /* */
1581             h->mb.i_partition = D_16x16;
1582             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
1583             x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
1584             break;
1585         }
1586
1587         case B_SKIP:
1588             /* nothing has changed since x264_macroblock_probe_bskip */
1589             break;
1590         case B_DIRECT:
1591             x264_mb_load_mv_direct8x8( h, 0 );
1592             x264_mb_load_mv_direct8x8( h, 1 );
1593             x264_mb_load_mv_direct8x8( h, 2 );
1594             x264_mb_load_mv_direct8x8( h, 3 );
1595             break;
1596
1597         case B_8x8:
1598             /* optimize: cache might not need to be rewritten */
1599             for( i = 0; i < 4; i++ )
1600                 x264_mb_cache_mv_b8x8( h, &analysis, i, 1 );
1601             break;
1602
1603         default: /* the rest of the B types */
1604             switch( h->mb.i_partition )
1605             {
1606             case D_16x16:
1607                 switch( h->mb.i_type )
1608                 {
1609                 case B_L0_L0:
1610                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1611                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1612
1613                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
1614                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1,  0, 0 );
1615                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1,  0, 0 );
1616                     break;
1617                 case B_L1_L1:
1618                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
1619                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0,  0, 0 );
1620                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0,  0, 0 );
1621
1622                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1623                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1624                     break;
1625                 case B_BI_BI:
1626                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1627                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1628
1629                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1630                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1631                     break;
1632                 }
1633                 break;
1634             case D_16x8:
1635                 x264_mb_cache_mv_b16x8( h, &analysis, 0, 1 );
1636                 x264_mb_cache_mv_b16x8( h, &analysis, 1, 1 );
1637                 break;
1638             case D_8x16:
1639                 x264_mb_cache_mv_b8x16( h, &analysis, 0, 1 );
1640                 x264_mb_cache_mv_b8x16( h, &analysis, 1, 1 );
1641                 break;
1642             default:
1643                 fprintf( stderr, "internal error (invalid MB type)\n" );
1644                 break;
1645             }
1646     }
1647 }
1648