git.sesse.net Git - x264/blob - encoder/analyse.c

   1 /*****************************************************************************
   2  * analyse.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27 #include <math.h>
  28 #include <limits.h>
  29
  30 #include "../common/common.h"
  31 #include "../common/macroblock.h"
  32 #include "macroblock.h"
  33 #include "me.h"
  34 #include "ratecontrol.h"
  35
  36 typedef struct
  37 {
  38     /* 16x16 */
  39     int i_ref;
  40     x264_me_t me16x16;
  41
  42     /* 8x8 */
  43     int       i_cost8x8;
  44     x264_me_t me8x8[4];
  45
  46     /* Sub 4x4 */
  47     int       i_cost4x4[4]; /* cost per 8x8 partition */
  48     x264_me_t me4x4[4][4];
  49
  50     /* Sub 8x4 */
  51     int       i_cost8x4[4]; /* cost per 8x8 partition */
  52     x264_me_t me8x4[4][2];
  53
  54     /* Sub 4x8 */
  55     int       i_cost4x8[4]; /* cost per 8x8 partition */
  56     x264_me_t me4x8[4][4];
  57
  58     /* 16x8 */
  59     int       i_cost16x8;
  60     x264_me_t me16x8[2];
  61
  62     /* 8x16 */
  63     int       i_cost8x16;
  64     x264_me_t me8x16[2];
  65
  66 } x264_mb_analysis_list_t;
  67
  68 typedef struct
  69 {
  70     /* conduct the analysis using this lamda and QP */
  71     int i_lambda;
  72     int i_qp;
  73
  74
  75     /* I: Intra part */
  76     /* Luma part 16x16 and 4x4 modes stats */
  77     int i_sad_i16x16;
  78     int i_predict16x16;
  79
  80     int i_sad_i4x4;
  81     int i_predict4x4[4][4];
  82
  83     /* Chroma part */
  84     int i_sad_i8x8;
  85     int i_predict8x8;
  86
  87     /* II: Inter part P/B frame */
  88     int i_mv_range;
  89
  90     x264_mb_analysis_list_t l0;
  91     x264_mb_analysis_list_t l1;
  92
  93     int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
  94     int i_cost16x16direct;
  95     int i_cost8x8bi;
  96     int i_cost8x8direct[4];
  97     int i_cost16x8bi;
  98     int i_cost8x16bi;
  99
 100     int i_mb_partition16x8[2]; /* mb_partition_e */
 101     int i_mb_partition8x16[2];
 102     int i_mb_type16x8; /* mb_class_e */
 103     int i_mb_type8x16;
 104
 105     int b_direct_available;
 106
 107 } x264_mb_analysis_t;
 108
 109 static const int i_qp0_cost_table[52] = {
 110    1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
 111    1, 1, 1, 1,              /*  8-11 */
 112    1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
 113    3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
 114    6, 7, 8, 9,10,11,13,14,  /* 28-35 */
 115   16,18,20,23,25,29,32,36,  /* 36-43 */
 116   40,45,51,57,64,72,81,91   /* 44-51 */
 117 };
 118
 119 static const uint8_t block_idx_x[16] = {
 120     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
 121 };
 122 static const uint8_t block_idx_y[16] = {
 123     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
 124 };
 125
 126 /* TODO: calculate CABAC costs */
 127 static const int i_mb_b_cost_table[18] = {
 128     9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
 129 };
 130 static const int i_mb_b16x8_cost_table[16] = {
 131     0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
 132 };
 133 static const int i_sub_mb_b_cost_table[13] = {
 134     7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
 135 };
 136
 137 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
 138 {
 139     memset( a, 0, sizeof( x264_mb_analysis_t ) );
 140
 141     /* conduct the analysis using this lamda and QP */
 142     a->i_qp = i_qp;
 143     a->i_lambda = i_qp0_cost_table[i_qp];
 144
 145     /* I: Intra part */
 146     a->i_sad_i16x16 = -1;
 147     a->i_sad_i4x4   = -1;
 148     a->i_sad_i8x8   = -1;
 149
 150     /* II: Inter part P/B frame */
 151     if( h->sh.i_type != SLICE_TYPE_I )
 152     {
 153         int dmb;
 154         int i;
 155
 156         /* Calculate max start MV range */
 157         dmb = h->mb.i_mb_x;
 158         if( h->mb.i_mb_y < dmb )
 159             dmb = h->mb.i_mb_y;
 160         if( h->sps->i_mb_width - h->mb.i_mb_x < dmb )
 161             dmb = h->sps->i_mb_width - h->mb.i_mb_x;
 162         if( h->sps->i_mb_height - h->mb.i_mb_y < dmb )
 163             dmb = h->sps->i_mb_height - h->mb.i_mb_y;
 164
 165         a->i_mv_range = 16*dmb + 8;
 166
 167         a->l0.me16x16.cost = -1;
 168         a->l0.i_cost8x8    = -1;
 169
 170         for( i = 0; i < 4; i++ )
 171         {
 172             a->l0.i_cost4x4[i] = -1;
 173             a->l0.i_cost8x4[i] = -1;
 174             a->l0.i_cost4x8[i] = -1;
 175         }
 176
 177         a->l0.i_cost16x8   = -1;
 178         a->l0.i_cost8x16   = -1;
 179         if( h->sh.i_type == SLICE_TYPE_B )
 180         {
 181             a->l1.me16x16.cost = -1;
 182             a->l1.i_cost8x8    = -1;
 183
 184             for( i = 0; i < 4; i++ )
 185             {
 186                 a->l1.i_cost4x4[i] = -1;
 187                 a->l1.i_cost8x4[i] = -1;
 188                 a->l1.i_cost4x8[i] = -1;
 189                 a->i_cost8x8direct[i] = -1;
 190             }
 191
 192             a->l1.i_cost16x8   = -1;
 193             a->l1.i_cost8x16   = -1;
 194
 195             a->i_cost16x16bi   = -1;
 196             a->i_cost16x16direct = -1;
 197             a->i_cost8x8bi     = -1;
 198             a->i_cost16x8bi    = -1;
 199             a->i_cost8x16bi    = -1;
 200         }
 201     }
 202 }
 203
 204
 205
 206 /*
 207  * Handle intra mb
 208  */
 209 /* Max = 4 */
 210 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 211 {
 212     if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 213     {
 214         /* top and left avaible */
 215         *mode++ = I_PRED_16x16_V;
 216         *mode++ = I_PRED_16x16_H;
 217         *mode++ = I_PRED_16x16_DC;
 218         *mode++ = I_PRED_16x16_P;
 219         *pi_count = 4;
 220     }
 221     else if( ( i_neighbour & MB_LEFT ) )
 222     {
 223         /* left available*/
 224         *mode++ = I_PRED_16x16_DC_LEFT;
 225         *mode++ = I_PRED_16x16_H;
 226         *pi_count = 2;
 227     }
 228     else if( ( i_neighbour & MB_TOP ) )
 229     {
 230         /* top available*/
 231         *mode++ = I_PRED_16x16_DC_TOP;
 232         *mode++ = I_PRED_16x16_V;
 233         *pi_count = 2;
 234     }
 235     else
 236     {
 237         /* none avaible */
 238         *mode = I_PRED_16x16_DC_128;
 239         *pi_count = 1;
 240     }
 241 }
 242
 243 /* Max = 4 */
 244 static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 245 {
 246     if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 247     {
 248         /* top and left avaible */
 249         *mode++ = I_PRED_CHROMA_V;
 250         *mode++ = I_PRED_CHROMA_H;
 251         *mode++ = I_PRED_CHROMA_DC;
 252         *mode++ = I_PRED_CHROMA_P;
 253         *pi_count = 4;
 254     }
 255     else if( ( i_neighbour & MB_LEFT ) )
 256     {
 257         /* left available*/
 258         *mode++ = I_PRED_CHROMA_DC_LEFT;
 259         *mode++ = I_PRED_CHROMA_H;
 260         *pi_count = 2;
 261     }
 262     else if( ( i_neighbour & MB_TOP ) )
 263     {
 264         /* top available*/
 265         *mode++ = I_PRED_CHROMA_DC_TOP;
 266         *mode++ = I_PRED_CHROMA_V;
 267         *pi_count = 2;
 268     }
 269     else
 270     {
 271         /* none avaible */
 272         *mode = I_PRED_CHROMA_DC_128;
 273         *pi_count = 1;
 274     }
 275 }
 276
 277 /* MAX = 8 */
 278 static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
 279 {
 280     int b_a, b_b, b_c;
 281     static const unsigned int needmb[16] =
 282     {
 283         MB_LEFT|MB_TOP, MB_TOP,
 284         MB_LEFT,        MB_PRIVATE,
 285         MB_TOP,         MB_TOP|MB_TOPRIGHT,
 286         0,              MB_PRIVATE,
 287         MB_LEFT,        0,
 288         MB_LEFT,        MB_PRIVATE,
 289         0,              MB_PRIVATE,
 290         0,              MB_PRIVATE
 291     };
 292
 293     /* FIXME even when b_c == 0 there is some case where missing pixels
 294      * are emulated and thus more mode are available TODO
 295      * analysis and encode should be fixed too */
 296     b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
 297     b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
 298     b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
 299
 300     if( b_a && b_b )
 301     {
 302         *mode++ = I_PRED_4x4_DC;
 303         *mode++ = I_PRED_4x4_H;
 304         *mode++ = I_PRED_4x4_V;
 305         *mode++ = I_PRED_4x4_DDR;
 306         *mode++ = I_PRED_4x4_VR;
 307         *mode++ = I_PRED_4x4_HD;
 308         *mode++ = I_PRED_4x4_HU;
 309
 310         *pi_count = 7;
 311
 312         if( b_c )
 313         {
 314             *mode++ = I_PRED_4x4_DDL;
 315             *mode++ = I_PRED_4x4_VL;
 316             (*pi_count) += 2;
 317         }
 318     }
 319     else if( b_a && !b_b )
 320     {
 321         *mode++ = I_PRED_4x4_DC_LEFT;
 322         *mode++ = I_PRED_4x4_H;
 323         *mode++ = I_PRED_4x4_HU;
 324         *pi_count = 3;
 325     }
 326     else if( !b_a && b_b )
 327     {
 328         *mode++ = I_PRED_4x4_DC_TOP;
 329         *mode++ = I_PRED_4x4_V;
 330         *pi_count = 2;
 331     }
 332     else
 333     {
 334         *mode++ = I_PRED_4x4_DC_128;
 335         *pi_count = 1;
 336     }
 337 }
 338
 339 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res )
 340 {
 341     const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
 342     const int i_stride = h->mb.pic.i_stride[0];
 343     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 344     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 345
 346     int i, idx;
 347
 348     int i_max;
 349     int predict_mode[9];
 350
 351     /*---------------- Try all mode and calculate their score ---------------*/
 352
 353     /* 16x16 prediction selection */
 354     predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 355     for( i = 0; i < i_max; i++ )
 356     {
 357         int i_sad;
 358         int i_mode;
 359
 360         i_mode = predict_mode[i];
 361
 362         /* we do the prediction */
 363         h->predict_16x16[i_mode]( p_dst, i_stride );
 364
 365         /* we calculate the diff and get the square sum of the diff */
 366         i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
 367                 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
 368         /* if i_score is lower it is better */
 369         if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
 370         {
 371             res->i_predict16x16 = i_mode;
 372             res->i_sad_i16x16     = i_sad;
 373         }
 374     }
 375
 376     /* 4x4 prediction selection */
 377     if( flags & X264_ANALYSE_I4x4 )
 378     {
 379         res->i_sad_i4x4 = 0;
 380         for( idx = 0; idx < 16; idx++ )
 381         {
 382             uint8_t *p_src_by;
 383             uint8_t *p_dst_by;
 384             int     i_best;
 385             int x, y;
 386             int i_pred_mode;
 387
 388             i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
 389             x = block_idx_x[idx];
 390             y = block_idx_y[idx];
 391
 392             p_src_by = p_src + 4 * x + 4 * y * i_stride;
 393             p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
 394
 395             i_best = -1;
 396             predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
 397             for( i = 0; i < i_max; i++ )
 398             {
 399                 int i_sad;
 400                 int i_mode;
 401
 402                 i_mode = predict_mode[i];
 403
 404                 /* we do the prediction */
 405                 h->predict_4x4[i_mode]( p_dst_by, i_stride );
 406
 407                 /* we calculate diff and get the square sum of the diff */
 408                 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
 409                                                  p_src_by, i_stride );
 410
 411                 i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
 412
 413                 /* if i_score is lower it is better */
 414                 if( i_best == -1 || i_best > i_sad )
 415                 {
 416                     res->i_predict4x4[x][y] = i_mode;
 417                     i_best = i_sad;
 418                 }
 419             }
 420             res->i_sad_i4x4 += i_best;
 421
 422             /* we need to encode this mb now (for next ones) */
 423             h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
 424             x264_mb_encode_i4x4( h, idx, res->i_qp );
 425
 426             /* we need to store the 'fixed' version */
 427             h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
 428                 x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
 429         }
 430         res->i_sad_i4x4 += res->i_lambda * 24;    /* from JVT (SATD0) */
 431     }
 432 }
 433
 434 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
 435 {
 436     int i;
 437
 438     int i_max;
 439     int predict_mode[9];
 440
 441     uint8_t *p_dstc[2], *p_srcc[2];
 442     int      i_stride[2];
 443
 444     /* 8x8 prediction selection for chroma */
 445     p_dstc[0] = h->mb.pic.p_fdec[1];
 446     p_dstc[1] = h->mb.pic.p_fdec[2];
 447     p_srcc[0] = h->mb.pic.p_fenc[1];
 448     p_srcc[1] = h->mb.pic.p_fenc[2];
 449
 450     i_stride[0] = h->mb.pic.i_stride[1];
 451     i_stride[1] = h->mb.pic.i_stride[2];
 452
 453     predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 454     res->i_sad_i8x8 = -1;
 455     for( i = 0; i < i_max; i++ )
 456     {
 457         int i_sad;
 458         int i_mode;
 459
 460         i_mode = predict_mode[i];
 461
 462         /* we do the prediction */
 463         h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
 464         h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
 465
 466         /* we calculate the cost */
 467         i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
 468                                          p_srcc[0], i_stride[0] ) +
 469                 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
 470                                          p_srcc[1], i_stride[1] ) +
 471                 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
 472
 473         /* if i_score is lower it is better */
 474         if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
 475         {
 476             res->i_predict8x8 = i_mode;
 477             res->i_sad_i8x8     = i_sad;
 478         }
 479     }
 480 }
 481
 482 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
 483 {
 484     x264_me_t m;
 485     int i_ref;
 486     int mvc[4][2], i_mvc;
 487
 488     /* 16x16 Search on all ref frame */
 489     m.i_pixel = PIXEL_16x16;
 490     m.lm      = a->i_lambda;
 491     m.p_fenc  = h->mb.pic.p_fenc[0];
 492     m.i_stride= h->mb.pic.i_stride[0];
 493     m.i_mv_range = a->i_mv_range;
 494
 495     a->l0.me16x16.cost = INT_MAX;
 496     for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
 497     {
 498         /* search with ref */
 499         m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
 500         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
 501         x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
 502         x264_me_search( h, &m, mvc, i_mvc );
 503
 504         /* add ref cost */
 505         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
 506
 507         if( m.cost < a->l0.me16x16.cost )
 508         {
 509             a->l0.i_ref = i_ref;
 510             a->l0.me16x16 = m;
 511         }
 512
 513         /* save mv for predicting neighbors */
 514         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
 515         h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
 516     }
 517
 518     /* subtract ref cost, so we don't have to add it for the other P types */
 519     a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
 520
 521     /* Set global ref, needed for all others modes */
 522     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
 523 }
 524
 525 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
 526 {
 527     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 528     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 529     int mvc[5][2], i_mvc;
 530     int i;
 531
 532     /* XXX Needed for x264_mb_predict_mv */
 533     h->mb.i_partition = D_8x8;
 534
 535     i_mvc = 1;
 536     mvc[0][0] = a->l0.me16x16.mv[0];
 537     mvc[0][1] = a->l0.me16x16.mv[1];
 538
 539     for( i = 0; i < 4; i++ )
 540     {
 541         x264_me_t *m = &a->l0.me8x8[i];
 542         const int x8 = i%2;
 543         const int y8 = i/2;
 544
 545         m->i_pixel = PIXEL_8x8;
 546         m->lm      = a->i_lambda;
 547
 548         m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
 549         m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
 550         m->i_stride= h->mb.pic.i_stride[0];
 551         m->i_mv_range = a->i_mv_range;
 552
 553         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 554         x264_me_search( h, m, mvc, i_mvc );
 555
 556         x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
 557
 558         mvc[i_mvc][0] = m->mv[0];
 559         mvc[i_mvc][1] = m->mv[1];
 560         i_mvc++;
 561     }
 562
 563     a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
 564                    a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
 565 }
 566
 567 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
 568 {
 569     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 570     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 571     int mvc[2][2];
 572     int i;
 573
 574     /* XXX Needed for x264_mb_predict_mv */
 575     h->mb.i_partition = D_16x8;
 576
 577     for( i = 0; i < 2; i++ )
 578     {
 579         x264_me_t *m = &a->l0.me16x8[i];
 580
 581         m->i_pixel = PIXEL_16x8;
 582         m->lm      = a->i_lambda;
 583
 584         m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
 585         m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
 586         m->i_stride= h->mb.pic.i_stride[0];
 587         m->i_mv_range = a->i_mv_range;
 588
 589         mvc[0][0] = a->l0.me8x8[2*i].mv[0];
 590         mvc[0][1] = a->l0.me8x8[2*i].mv[1];
 591         mvc[1][0] = a->l0.me8x8[2*i+1].mv[0];
 592         mvc[1][1] = a->l0.me8x8[2*i+1].mv[1];
 593
 594         x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
 595         x264_me_search( h, m, mvc, 2 );
 596
 597         x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
 598     }
 599
 600     a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
 601 }
 602
 603 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
 604 {
 605     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 606     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 607     int mvc[2][2];
 608     int i;
 609
 610     /* XXX Needed for x264_mb_predict_mv */
 611     h->mb.i_partition = D_8x16;
 612
 613     for( i = 0; i < 2; i++ )
 614     {
 615         x264_me_t *m = &a->l0.me8x16[i];
 616
 617         m->i_pixel = PIXEL_8x16;
 618         m->lm      = a->i_lambda;
 619
 620         m->p_fenc  = &p_fenc[8*i];
 621         m->p_fref  = &p_fref[8*i];
 622         m->i_stride= h->mb.pic.i_stride[0];
 623         m->i_mv_range = a->i_mv_range;
 624
 625         mvc[0][0] = a->l0.me8x8[i].mv[0];
 626         mvc[0][1] = a->l0.me8x8[i].mv[1];
 627         mvc[1][0] = a->l0.me8x8[i+2].mv[0];
 628         mvc[1][1] = a->l0.me8x8[i+2].mv[1];
 629
 630         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 631         x264_me_search( h, m, mvc, 2 );
 632
 633         x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
 634     }
 635
 636     a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
 637 }
 638
 639 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 640 {
 641     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 642     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 643
 644     int i4x4;
 645
 646     /* XXX Needed for x264_mb_predict_mv */
 647     h->mb.i_partition = D_8x8;
 648
 649     for( i4x4 = 0; i4x4 < 4; i4x4++ )
 650     {
 651         const int idx = 4*i8x8 + i4x4;
 652         const int x4 = block_idx_x[idx];
 653         const int y4 = block_idx_y[idx];
 654         const int i_mvc = (i4x4 == 0);
 655
 656         x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
 657
 658         m->i_pixel = PIXEL_4x4;
 659         m->lm      = a->i_lambda;
 660
 661         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 662         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 663         m->i_stride= h->mb.pic.i_stride[0];
 664         m->i_mv_range = a->i_mv_range;
 665
 666         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
 667         x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
 668
 669         x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
 670     }
 671
 672     a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
 673                          a->l0.me4x4[i8x8][1].cost +
 674                          a->l0.me4x4[i8x8][2].cost +
 675                          a->l0.me4x4[i8x8][3].cost;
 676 }
 677
 678 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 679 {
 680     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 681     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 682
 683     int i8x4;
 684
 685     /* XXX Needed for x264_mb_predict_mv */
 686     h->mb.i_partition = D_8x8;
 687
 688     for( i8x4 = 0; i8x4 < 2; i8x4++ )
 689     {
 690         const int idx = 4*i8x8 + 2*i8x4;
 691         const int x4 = block_idx_x[idx];
 692         const int y4 = block_idx_y[idx];
 693         const int i_mvc = (i8x4 == 0);
 694
 695         x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
 696
 697         m->i_pixel = PIXEL_8x4;
 698         m->lm      = a->i_lambda;
 699
 700         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 701         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 702         m->i_stride= h->mb.pic.i_stride[0];
 703         m->i_mv_range = a->i_mv_range;
 704
 705         x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
 706         x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
 707
 708         x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
 709     }
 710
 711     a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost;
 712 }
 713
 714 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 715 {
 716     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 717     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 718
 719     int i4x8;
 720
 721     /* XXX Needed for x264_mb_predict_mv */
 722     h->mb.i_partition = D_8x8;
 723
 724     for( i4x8 = 0; i4x8 < 2; i4x8++ )
 725     {
 726         const int idx = 4*i8x8 + i4x8;
 727         const int x4 = block_idx_x[idx];
 728         const int y4 = block_idx_y[idx];
 729         const int i_mvc = (i4x8 == 0);
 730
 731         x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
 732
 733         m->i_pixel = PIXEL_4x8;
 734         m->lm      = a->i_lambda;
 735
 736         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 737         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 738         m->i_stride= h->mb.pic.i_stride[0];
 739         m->i_mv_range = a->i_mv_range;
 740
 741         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
 742         x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
 743
 744         x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
 745     }
 746
 747     a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost;
 748 }
 749
 750 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
 751 {
 752     /* Assumes that fdec still contains the results of
 753      * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
 754
 755     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
 756     uint8_t *p_fdec = h->mb.pic.p_fdec[0];
 757     int i_stride= h->mb.pic.i_stride[0];
 758     int i;
 759
 760     a->i_cost16x16direct = 0;
 761     for( i = 0; i < 4; i++ )
 762     {
 763         const int x8 = i%2;
 764         const int y8 = i/2;
 765         const int off = 8 * x8 + 8 * i_stride * y8;
 766         a->i_cost16x16direct +=
 767         a->i_cost8x8direct[i] =
 768             h->pixf.satd[PIXEL_8x8]( &p_fenc[off], i_stride, &p_fdec[off], i_stride );
 769
 770         /* mb type cost */
 771         a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
 772     }
 773
 774     a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
 775 }
 776
 777 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
 778 {
 779     uint8_t pix1[16*16], pix2[16*16];
 780
 781     x264_me_t m;
 782     int i_ref;
 783
 784     /* 16x16 Search on all ref frame */
 785     m.i_pixel = PIXEL_16x16;
 786     m.lm      = a->i_lambda;
 787     m.p_fenc  = h->mb.pic.p_fenc[0];
 788     m.i_stride= h->mb.pic.i_stride[0];
 789     m.i_mv_range = a->i_mv_range;
 790
 791     /* ME for List 0 */
 792     a->l0.me16x16.cost = INT_MAX;
 793     for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
 794     {
 795         /* search with ref */
 796         m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
 797         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
 798         x264_me_search( h, &m, NULL, 0 );
 799
 800         /* add ref cost */
 801         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
 802
 803         if( m.cost < a->l0.me16x16.cost )
 804         {
 805             a->l0.i_ref = i_ref;
 806             a->l0.me16x16 = m;
 807         }
 808     }
 809     /* subtract ref cost, so we don't have to add it for the other MB types */
 810     a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
 811
 812     /* ME for list 1 */
 813     a->l1.me16x16.cost = INT_MAX;
 814     for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
 815     {
 816         /* search with ref */
 817         m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
 818         x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
 819         x264_me_search( h, &m, NULL, 0 );
 820
 821         /* add ref cost */
 822         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
 823
 824         if( m.cost < a->l1.me16x16.cost )
 825         {
 826             a->l1.i_ref = i_ref;
 827             a->l1.me16x16 = m;
 828         }
 829     }
 830     /* subtract ref cost, so we don't have to add it for the other MB types */
 831     a->l1.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref );
 832
 833     /* Set global ref, needed for other modes? */
 834     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
 835     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
 836
 837     /* get cost of BI mode */
 838     h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
 839                     pix1, 16,
 840                     a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
 841                     16, 16 );
 842     h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
 843                     pix2, 16,
 844                     a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
 845                     16, 16 );
 846     h->pixf.avg[PIXEL_16x16]( pix1, 16, pix2, 16 );
 847
 848     a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 ) +
 849                        a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref ) +
 850                                        bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) +
 851                                        bs_size_se( a->l0.me16x16.mv[0] - a->l0.me16x16.mvp[0] ) +
 852                                        bs_size_se( a->l0.me16x16.mv[1] - a->l0.me16x16.mvp[1] ) +
 853                                        bs_size_se( a->l1.me16x16.mv[0] - a->l1.me16x16.mvp[0] ) +
 854                                        bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
 855
 856     /* mb type cost */
 857     a->i_cost16x16bi   += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
 858     a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
 859     a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
 860 }
 861
 862 #define CACHE_MV_BI(x,y,dx,dy,me0,me1,part) \
 863     if( x264_mb_partition_listX_table[0][part] ) \
 864     { \
 865         x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
 866         x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
 867     } \
 868     else \
 869     { \
 870         x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
 871         x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, 0, 0 ); \
 872         if( b_mvd ) \
 873             x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
 874     } \
 875     if( x264_mb_partition_listX_table[1][part] ) \
 876     { \
 877         x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
 878         x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
 879     } \
 880     else \
 881     { \
 882         x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
 883         x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, 0, 0 ); \
 884         if( b_mvd ) \
 885             x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
 886     }
 887
 888 static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 889 {
 890     int x = (i%2)*2;
 891     int y = (i/2)*2;
 892     if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
 893     {
 894         x264_mb_load_mv_direct8x8( h, i );
 895         if( b_mvd )
 896         {
 897             x264_macroblock_cache_mvd(  h, x, y, 2, 2, 0, 0, 0 );
 898             x264_macroblock_cache_mvd(  h, x, y, 2, 2, 1, 0, 0 );
 899             x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
 900         }
 901     }
 902     else
 903     {
 904         CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
 905     }
 906 }
 907 static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 908 {
 909     CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
 910 }
 911 static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 912 {
 913     CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
 914 }
 915 #undef CACHE_MV_BI
 916
 917 static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
 918 {
 919     uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
 920                            h->mb.pic.p_fref[1][a->l1.i_ref][0] };
 921     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
 922     uint8_t pix[2][8*8];
 923     int i, l;
 924
 925     /* XXX Needed for x264_mb_predict_mv */
 926     h->mb.i_partition = D_8x8;
 927
 928     a->i_cost8x8bi = 0;
 929
 930     for( i = 0; i < 4; i++ )
 931     {
 932         const int x8 = i%2;
 933         const int y8 = i/2;
 934         uint8_t *p_fenc_i = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
 935         int i_part_cost;
 936         int i_part_cost_bi = 0;
 937
 938         for( l = 0; l < 2; l++ )
 939         {
 940             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
 941             x264_me_t *m = &lX->me8x8[i];
 942
 943             m->i_pixel = PIXEL_8x8;
 944             m->lm      = a->i_lambda;
 945
 946             m->p_fenc = p_fenc_i;
 947             m->p_fref = &p_fref[l][8*(y8*h->mb.pic.i_stride[0]+x8)];
 948             m->i_stride = h->mb.pic.i_stride[0];
 949             m->i_mv_range = a->i_mv_range;
 950
 951             x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
 952             x264_me_search( h, m, &lX->me16x16.mv, 1 );
 953
 954             x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
 955             lX->i_cost8x8 += m->cost;
 956
 957             /* BI mode */
 958             h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
 959                             m->mv[0], m->mv[1], 8, 8 );
 960             /* FIXME: ref cost */
 961             i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
 962                                               bs_size_se( m->mv[1] - m->mvp[1] ) +
 963                                               i_sub_mb_b_cost_table[D_L0_8x8] );
 964         }
 965
 966         h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 );
 967         i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 )
 968                         + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
 969
 970         i_part_cost = a->l0.me8x8[i].cost;
 971         h->mb.i_sub_partition[i] = D_L0_8x8;
 972         if( a->l1.me8x8[i].cost < i_part_cost )
 973         {
 974             i_part_cost = a->l1.me8x8[i].cost;
 975             h->mb.i_sub_partition[i] = D_L1_8x8;
 976         }
 977         if( i_part_cost_bi < i_part_cost )
 978         {
 979             i_part_cost = i_part_cost_bi;
 980             h->mb.i_sub_partition[i] = D_BI_8x8;
 981         }
 982         if( a->i_cost8x8direct[i] < i_part_cost && a->i_cost8x8direct[i] >= 0)
 983         {
 984             i_part_cost = a->i_cost8x8direct[i];
 985             h->mb.i_sub_partition[i] = D_DIRECT_8x8;
 986         }
 987         a->i_cost8x8bi += i_part_cost;
 988
 989         /* XXX Needed for x264_mb_predict_mv */
 990         x264_mb_cache_mv_b8x8( h, a, i, 0 );
 991     }
 992
 993     /* mb type cost */
 994     a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
 995 }
 996
 997 static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
 998 {
 999     uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1000                            h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1001     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1002     uint8_t pix[2][8*8];
1003     int i_ref_stride = h->mb.pic.i_stride[0];
1004     int mvc[2][2];
1005     int i, l;
1006
1007     h->mb.i_partition = D_16x8;
1008     a->i_cost16x8bi = 0;
1009
1010     for( i = 0; i < 2; i++ )
1011     {
1012         uint8_t *p_fenc_i = &p_fenc[8*i*i_ref_stride];
1013         int i_part_cost;
1014         int i_part_cost_bi = 0;
1015
1016         /* TODO: check only the list(s) that were used in b8x8? */
1017         for( l = 0; l < 2; l++ )
1018         {
1019             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1020             x264_me_t *m = &lX->me16x8[i];
1021
1022             m->i_pixel = PIXEL_16x8;
1023             m->lm      = a->i_lambda;
1024
1025             m->p_fenc  = p_fenc_i;
1026             m->i_stride= i_ref_stride;
1027             m->p_fref  = &p_fref[l][8*i*i_ref_stride];
1028             m->i_mv_range = a->i_mv_range;
1029
1030             mvc[0][0] = lX->me8x8[2*i].mv[0];
1031             mvc[0][1] = lX->me8x8[2*i].mv[1];
1032             mvc[1][0] = lX->me8x8[2*i+1].mv[0];
1033             mvc[1][1] = lX->me8x8[2*i+1].mv[1];
1034
1035             x264_mb_predict_mv( h, 0, 8*i, 2, m->mvp );
1036             x264_me_search( h, m, mvc, 2 );
1037
1038             /* BI mode */
1039             h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1040                             m->mv[0], m->mv[1], 8, 8 );
1041             /* FIXME: ref cost */
1042             i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1043                                               bs_size_se( m->mv[1] - m->mvp[1] ) );
1044         }
1045
1046         h->pixf.avg[PIXEL_16x8]( pix[0], 8, pix[1], 8 );
1047         i_part_cost_bi += h->pixf.satd[PIXEL_16x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1048
1049         i_part_cost = a->l0.me16x8[i].cost;
1050         a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
1051         if( a->l1.me16x8[i].cost < i_part_cost )
1052         {
1053             i_part_cost = a->l1.me16x8[i].cost;
1054             a->i_mb_partition16x8[i] = D_L1_8x8;
1055         }
1056         if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1057         {
1058             i_part_cost = i_part_cost_bi;
1059             a->i_mb_partition16x8[i] = D_BI_8x8;
1060         }
1061         a->i_cost16x8bi += i_part_cost;
1062
1063         if( i == 0 )
1064             x264_mb_cache_mv_b16x8( h, a, i, 0 );
1065     }
1066
1067     /* mb type cost */
1068     a->i_mb_type16x8 = B_L0_L0
1069         + (a->i_mb_partition16x8[0]>>2) * 3
1070         + (a->i_mb_partition16x8[1]>>2);
1071     a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
1072 }
1073 static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
1074 {
1075     uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
1076                            h->mb.pic.p_fref[1][a->l1.i_ref][0] };
1077     uint8_t *p_fenc = h->mb.pic.p_fenc[0];
1078     uint8_t pix[2][8*8];
1079     int i_ref_stride = h->mb.pic.i_stride[0];
1080     int mvc[2][2];
1081     int i, l;
1082
1083     h->mb.i_partition = D_8x16;
1084     a->i_cost8x16bi = 0;
1085
1086     for( i = 0; i < 2; i++ )
1087     {
1088         uint8_t *p_fenc_i = &p_fenc[8*i];
1089         int i_part_cost;
1090         int i_part_cost_bi = 0;
1091
1092         for( l = 0; l < 2; l++ )
1093         {
1094             x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
1095             x264_me_t *m = &lX->me8x16[i];
1096
1097             m->i_pixel = PIXEL_8x16;
1098             m->lm      = a->i_lambda;
1099
1100             m->p_fenc  = p_fenc_i;
1101             m->p_fref  = &p_fref[l][8*i];
1102             m->i_stride= i_ref_stride;
1103             m->i_mv_range = a->i_mv_range;
1104
1105             mvc[0][0] = lX->me8x8[i].mv[0];
1106             mvc[0][1] = lX->me8x8[i].mv[1];
1107             mvc[1][0] = lX->me8x8[i+2].mv[0];
1108             mvc[1][1] = lX->me8x8[i+2].mv[1];
1109
1110             x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
1111             x264_me_search( h, m, mvc, 2 );
1112
1113             /* BI mode */
1114             h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
1115                             m->mv[0], m->mv[1], 8, 8 );
1116             /* FIXME: ref cost */
1117             i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
1118                                               bs_size_se( m->mv[1] - m->mvp[1] ) );
1119         }
1120
1121         h->pixf.avg[PIXEL_8x16]( pix[0], 8, pix[1], 8 );
1122         i_part_cost_bi += h->pixf.satd[PIXEL_8x16]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
1123
1124         i_part_cost = a->l0.me8x16[i].cost;
1125         a->i_mb_partition8x16[i] = D_L0_8x8;
1126         if( a->l1.me8x16[i].cost < i_part_cost )
1127         {
1128             i_part_cost = a->l1.me8x16[i].cost;
1129             a->i_mb_partition8x16[i] = D_L1_8x8;
1130         }
1131         if( i_part_cost_bi + a->i_lambda * 1 < i_part_cost )
1132         {
1133             i_part_cost = i_part_cost_bi;
1134             a->i_mb_partition8x16[i] = D_BI_8x8;
1135         }
1136         a->i_cost8x16bi += i_part_cost;
1137
1138         if( i == 0 )
1139             x264_mb_cache_mv_b8x16( h, a, i, 0 );
1140     }
1141
1142     /* mb type cost */
1143     a->i_mb_type8x16 = B_L0_L0
1144         + (a->i_mb_partition8x16[0]>>2) * 3
1145         + (a->i_mb_partition8x16[1]>>2);
1146     a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
1147 }
1148
1149 /*****************************************************************************
1150  * x264_macroblock_analyse:
1151  *****************************************************************************/
1152 void x264_macroblock_analyse( x264_t *h )
1153 {
1154     x264_mb_analysis_t analysis;
1155     int i;
1156
1157     h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp(h);
1158
1159     /* FIXME check if it's 12 */
1160     if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp < -12 )
1161         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp - 12;
1162     else if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp > 12 )
1163         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp + 12;
1164
1165     /* init analysis */
1166     x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
1167
1168     /*--------------------------- Do the analysis ---------------------------*/
1169     if( h->sh.i_type == SLICE_TYPE_I )
1170     {
1171         x264_mb_analyse_intra( h, &analysis );
1172
1173         if( analysis.i_sad_i4x4 >= 0 &&  analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
1174             h->mb.i_type = I_4x4;
1175         else
1176             h->mb.i_type = I_16x16;
1177     }
1178     else if( h->sh.i_type == SLICE_TYPE_P )
1179     {
1180         const unsigned int i_neighbour = h->mb.i_neighbour;
1181
1182         int b_skip = 0;
1183         int i_cost;
1184         int i_intra_cost, i_intra_type;
1185
1186         /* Fast P_SKIP detection */
1187         if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) ||
1188             ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) ||
1189             ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) ||
1190             ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) )
1191         {
1192             b_skip = x264_macroblock_probe_pskip( h );
1193         }
1194
1195         if( b_skip )
1196         {
1197             h->mb.i_type = P_SKIP;
1198             h->mb.i_partition = D_16x16;
1199         }
1200         else
1201         {
1202             const unsigned int flags = h->param.analyse.inter;
1203             int i_type;
1204             int i_partition;
1205
1206             x264_mb_analyse_inter_p16x16( h, &analysis );
1207             if( flags & X264_ANALYSE_PSUB16x16 )
1208                 x264_mb_analyse_inter_p8x8( h, &analysis );
1209
1210             /* Select best inter mode */
1211             i_type = P_L0;
1212             i_partition = D_16x16;
1213             i_cost = analysis.l0.me16x16.cost;
1214
1215             if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
1216                 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
1217             {
1218                 int i;
1219
1220                 i_type = P_8x8;
1221                 i_partition = D_8x8;
1222                 h->mb.i_sub_partition[0] = D_L0_8x8;
1223                 h->mb.i_sub_partition[1] = D_L0_8x8;
1224                 h->mb.i_sub_partition[2] = D_L0_8x8;
1225                 h->mb.i_sub_partition[3] = D_L0_8x8;
1226
1227                 i_cost = analysis.l0.i_cost8x8;
1228
1229                 /* Do sub 8x8 */
1230                 if( flags & X264_ANALYSE_PSUB8x8 )
1231                 {
1232                     for( i = 0; i < 4; i++ )
1233                     {
1234                         x264_mb_analyse_inter_p4x4( h, &analysis, i );
1235                         if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
1236                         {
1237                             int i_cost8x8;
1238
1239                             h->mb.i_sub_partition[i] = D_L0_4x4;
1240                             i_cost8x8 = analysis.l0.i_cost4x4[i];
1241
1242                             x264_mb_analyse_inter_p8x4( h, &analysis, i );
1243                             if( analysis.l0.i_cost8x4[i] < analysis.l0.i_cost4x4[i] )
1244                             {
1245                                 h->mb.i_sub_partition[i] = D_L0_8x4;
1246                                 i_cost8x8 = analysis.l0.i_cost8x4[i];
1247                             }
1248
1249                             x264_mb_analyse_inter_p4x8( h, &analysis, i );
1250                             if( analysis.l0.i_cost4x8[i] < analysis.l0.i_cost4x4[i] )
1251                             {
1252                                 h->mb.i_sub_partition[i] = D_L0_4x8;
1253                                 i_cost8x8 = analysis.l0.i_cost4x8[i];
1254                             }
1255
1256                             i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
1257                         }
1258                     }
1259                 }
1260
1261                 /* Now do sub 16x8/8x16 */
1262                 x264_mb_analyse_inter_p16x8( h, &analysis );
1263                 if( analysis.l0.i_cost16x8 < i_cost )
1264                 {
1265                     i_type = P_L0;
1266                     i_partition = D_16x8;
1267                     i_cost = analysis.l0.i_cost16x8;
1268                 }
1269
1270                 x264_mb_analyse_inter_p8x16( h, &analysis );
1271                 if( analysis.l0.i_cost8x16 < i_cost )
1272                 {
1273                     i_type = P_L0;
1274                     i_partition = D_8x16;
1275                     i_cost = analysis.l0.i_cost8x16;
1276                 }
1277             }
1278
1279             h->mb.i_type = i_type;
1280             h->mb.i_partition = i_partition;
1281
1282             /* refine qpel */
1283             if( h->mb.i_partition == D_16x16 )
1284             {
1285                 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1286                 i_cost = analysis.l0.me16x16.cost;
1287             }
1288             else if( h->mb.i_partition == D_16x8 )
1289             {
1290                 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
1291                 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
1292                 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
1293             }
1294             else if( h->mb.i_partition == D_8x16 )
1295             {
1296                 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
1297                 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
1298                 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
1299             }
1300             else if( h->mb.i_partition == D_8x8 )
1301             {
1302                 int i8x8;
1303                 i_cost = 0;
1304                 for( i8x8 = 0; i8x8 < 4; i8x8++ )
1305                 {
1306                     switch( h->mb.i_sub_partition[i8x8] )
1307                     {
1308                         case D_L0_8x8:
1309                             x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1310                             i_cost += analysis.l0.me8x8[i8x8].cost;
1311                             break;
1312                         case D_L0_8x4:
1313                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1314                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1315                             i_cost += analysis.l0.me8x4[i8x8][0].cost +
1316                                       analysis.l0.me8x4[i8x8][1].cost;
1317                             break;
1318                         case D_L0_4x8:
1319                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1320                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1321                             i_cost += analysis.l0.me4x8[i8x8][0].cost +
1322                                       analysis.l0.me4x8[i8x8][1].cost;
1323                             break;
1324
1325                         case D_L0_4x4:
1326                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1327                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1328                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1329                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1330                             i_cost += analysis.l0.me4x4[i8x8][0].cost +
1331                                       analysis.l0.me4x4[i8x8][1].cost +
1332                                       analysis.l0.me4x4[i8x8][2].cost +
1333                                       analysis.l0.me4x4[i8x8][3].cost;
1334                             break;
1335                         default:
1336                             fprintf( stderr, "internal error (!8x8 && !4x4)" );
1337                             break;
1338                     }
1339                 }
1340             }
1341
1342             x264_mb_analyse_intra( h, &analysis );
1343             i_intra_type = I_16x16;
1344             i_intra_cost = analysis.i_sad_i16x16;
1345
1346             if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_intra_cost )
1347             {
1348                 i_intra_type = I_4x4;
1349                 i_intra_cost = analysis.i_sad_i4x4;
1350             }
1351
1352             if( i_intra_cost >= 0 && i_intra_cost < i_cost )
1353             {
1354                 h->mb.i_type = i_intra_type;
1355                 i_cost = i_intra_cost;
1356             }
1357
1358             h->stat.frame.i_intra_cost += i_intra_cost;
1359             h->stat.frame.i_inter_cost += i_cost;
1360         }
1361     }
1362     else if( h->sh.i_type == SLICE_TYPE_B )
1363     {
1364         int b_skip = 0;
1365
1366         analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
1367         if( analysis.b_direct_available )
1368         {
1369             h->mb.i_type = B_SKIP;
1370             x264_mb_mc( h );
1371
1372             /* Conditioning the probe on neighboring block types
1373              * doesn't seem to help speed or quality. */
1374             b_skip = x264_macroblock_probe_bskip( h );
1375         }
1376
1377         if( !b_skip )
1378         {
1379             const unsigned int flags = h->param.analyse.inter;
1380             int i_partition;
1381             int i_cost;
1382
1383             /* select best inter mode */
1384             /* direct must be first */
1385             if( analysis.b_direct_available )
1386                 x264_mb_analyse_inter_direct( h, &analysis );
1387
1388             x264_mb_analyse_inter_b16x16( h, &analysis );
1389
1390             h->mb.i_type = B_L0_L0;
1391             i_partition = D_16x16;
1392             i_cost = analysis.l0.me16x16.cost;
1393             if( analysis.l1.me16x16.cost < i_cost )
1394             {
1395                 h->mb.i_type = B_L1_L1;
1396                 i_cost = analysis.l1.me16x16.cost;
1397             }
1398             if( analysis.i_cost16x16bi < i_cost )
1399             {
1400                 h->mb.i_type = B_BI_BI;
1401                 i_cost = analysis.i_cost16x16bi;
1402             }
1403             if( analysis.i_cost16x16direct < i_cost && analysis.i_cost16x16direct >= 0 )
1404             {
1405                 h->mb.i_type = B_DIRECT;
1406                 i_cost = analysis.i_cost16x16direct;
1407             }
1408
1409             if( flags & X264_ANALYSE_BSUB16x16 )
1410             {
1411                 x264_mb_analyse_inter_b8x8( h, &analysis );
1412                 if( analysis.i_cost8x8bi < i_cost )
1413                 {
1414                     h->mb.i_type = B_8x8;
1415                     i_partition = D_8x8;
1416                     i_cost = analysis.i_cost8x8bi;
1417
1418                     if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[1] ||
1419                         h->mb.i_sub_partition[2] == h->mb.i_sub_partition[3] )
1420                     {
1421                         x264_mb_analyse_inter_b16x8( h, &analysis );
1422                         if( analysis.i_cost16x8bi < i_cost )
1423                         {
1424                             i_partition = D_16x8;
1425                             i_cost = analysis.i_cost16x8bi;
1426                             h->mb.i_type = analysis.i_mb_type16x8;
1427                         }
1428                     }
1429                     if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
1430                         h->mb.i_sub_partition[1] == h->mb.i_sub_partition[3] )
1431                     {
1432                         x264_mb_analyse_inter_b8x16( h, &analysis );
1433                         if( analysis.i_cost8x16bi < i_cost )
1434                         {
1435                             i_partition = D_8x16;
1436                             i_cost = analysis.i_cost8x16bi;
1437                             h->mb.i_type = analysis.i_mb_type8x16;
1438                         }
1439                     }
1440                 }
1441             }
1442
1443             h->mb.i_partition = i_partition;
1444
1445             /* refine qpel */
1446             if( i_partition == D_16x16 )
1447             {
1448                 if( h->mb.i_type == B_L0_L0 )
1449                 {
1450                     analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1451                     x264_me_refine_qpel( h, &analysis.l0.me16x16 );
1452                     analysis.l0.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
1453                     i_cost = analysis.l0.me16x16.cost;
1454                 }
1455                 else if( h->mb.i_type == B_L1_L1 )
1456                 {
1457                     analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1458                     x264_me_refine_qpel( h, &analysis.l1.me16x16 );
1459                     analysis.l1.me16x16.cost += analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
1460                     i_cost = analysis.l1.me16x16.cost;
1461                 }
1462             }
1463             /* TODO: refine bidir, 8x8 */
1464
1465             /* best intra mode */
1466             x264_mb_analyse_intra( h, &analysis );
1467             /* mb type cost */
1468             analysis.i_sad_i16x16 += analysis.i_lambda * i_mb_b_cost_table[I_16x16];
1469             analysis.i_sad_i4x4   += analysis.i_lambda * i_mb_b_cost_table[I_4x4];
1470
1471             if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1472             {
1473                 h->mb.i_type = I_16x16;
1474                 i_cost = analysis.i_sad_i16x16;
1475             }
1476             if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1477             {
1478                 h->mb.i_type = I_4x4;
1479                 i_cost = analysis.i_sad_i4x4;
1480             }
1481         }
1482     }
1483
1484     /*-------------------- Update MB from the analysis ----------------------*/
1485     h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
1486     switch( h->mb.i_type )
1487     {
1488         case I_4x4:
1489             for( i = 0; i < 16; i++ )
1490             {
1491                 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1492                     analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1493             }
1494
1495             x264_mb_analyse_intra_chroma( h, &analysis );
1496             h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1497             break;
1498         case I_16x16:
1499             h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
1500
1501             x264_mb_analyse_intra_chroma( h, &analysis );
1502             h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1503             break;
1504
1505         case P_L0:
1506             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1507             switch( h->mb.i_partition )
1508             {
1509                 case D_16x16:
1510                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1511                     break;
1512
1513                 case D_16x8:
1514                     x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].mv[0], analysis.l0.me16x8[0].mv[1] );
1515                     x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].mv[0], analysis.l0.me16x8[1].mv[1] );
1516                     break;
1517
1518                 case D_8x16:
1519                     x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].mv[0], analysis.l0.me8x16[0].mv[1] );
1520                     x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].mv[0], analysis.l0.me8x16[1].mv[1] );
1521                     break;
1522
1523                 default:
1524                     fprintf( stderr, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
1525                     break;
1526             }
1527             break;
1528
1529         case P_8x8:
1530             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1531             for( i = 0; i < 4; i++ )
1532             {
1533                 const int x = 2*(i%2);
1534                 const int y = 2*(i/2);
1535
1536                 switch( h->mb.i_sub_partition[i] )
1537                 {
1538                     case D_L0_8x8:
1539                         x264_macroblock_cache_mv( h, x, y, 2, 2, 0, analysis.l0.me8x8[i].mv[0], analysis.l0.me8x8[i].mv[1] );
1540                         break;
1541                     case D_L0_8x4:
1542                         x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, analysis.l0.me8x4[i][0].mv[0], analysis.l0.me8x4[i][0].mv[1] );
1543                         x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, analysis.l0.me8x4[i][1].mv[0], analysis.l0.me8x4[i][1].mv[1] );
1544                         break;
1545                     case D_L0_4x8:
1546                         x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, analysis.l0.me4x8[i][0].mv[0], analysis.l0.me4x8[i][0].mv[1] );
1547                         x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, analysis.l0.me4x8[i][1].mv[0], analysis.l0.me4x8[i][1].mv[1] );
1548                         break;
1549                     case D_L0_4x4:
1550                         x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, analysis.l0.me4x4[i][0].mv[0], analysis.l0.me4x4[i][0].mv[1] );
1551                         x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, analysis.l0.me4x4[i][1].mv[0], analysis.l0.me4x4[i][1].mv[1] );
1552                         x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, analysis.l0.me4x4[i][2].mv[0], analysis.l0.me4x4[i][2].mv[1] );
1553                         x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, analysis.l0.me4x4[i][3].mv[0], analysis.l0.me4x4[i][3].mv[1] );
1554                         break;
1555                     default:
1556                         fprintf( stderr, "internal error\n" );
1557                         break;
1558                 }
1559             }
1560             break;
1561
1562         case P_SKIP:
1563         {
1564             int mvp[2];
1565             x264_mb_predict_mv_pskip( h, mvp );
1566             /* */
1567             h->mb.i_partition = D_16x16;
1568             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
1569             x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
1570             break;
1571         }
1572
1573         case B_SKIP:
1574             /* nothing has changed since x264_macroblock_probe_bskip */
1575             break;
1576         case B_DIRECT:
1577             x264_mb_load_mv_direct8x8( h, 0 );
1578             x264_mb_load_mv_direct8x8( h, 1 );
1579             x264_mb_load_mv_direct8x8( h, 2 );
1580             x264_mb_load_mv_direct8x8( h, 3 );
1581             break;
1582
1583         case B_8x8:
1584             /* optimize: cache might not need to be rewritten */
1585             for( i = 0; i < 4; i++ )
1586                 x264_mb_cache_mv_b8x8( h, &analysis, i, 1 );
1587             break;
1588
1589         default: /* the rest of the B types */
1590             switch( h->mb.i_partition )
1591             {
1592             case D_16x16:
1593                 switch( h->mb.i_type )
1594                 {
1595                 case B_L0_L0:
1596                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1597                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1598
1599                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
1600                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1,  0, 0 );
1601                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1,  0, 0 );
1602                     break;
1603                 case B_L1_L1:
1604                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
1605                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0,  0, 0 );
1606                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0,  0, 0 );
1607
1608                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1609                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1610                     break;
1611                 case B_BI_BI:
1612                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1613                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1614
1615                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1616                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1617                     break;
1618                 }
1619                 break;
1620             case D_16x8:
1621                 x264_mb_cache_mv_b16x8( h, &analysis, 0, 1 );
1622                 x264_mb_cache_mv_b16x8( h, &analysis, 1, 1 );
1623                 break;
1624             case D_8x16:
1625                 x264_mb_cache_mv_b8x16( h, &analysis, 0, 1 );
1626                 x264_mb_cache_mv_b8x16( h, &analysis, 1, 1 );
1627                 break;
1628             default:
1629                 fprintf( stderr, "internal error (invalid MB type)\n" );
1630                 break;
1631             }
1632     }
1633 }
1634