git.sesse.net Git - x264/blob - encoder/analyse.c

   1 /*****************************************************************************
   2  * analyse.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27 #include <math.h>
  28
  29 #include "../core/common.h"
  30 #include "../core/macroblock.h"
  31 #include "macroblock.h"
  32 #include "me.h"
  33 #include "ratecontrol.h"
  34
  35 typedef struct
  36 {
  37     /* 16x16 */
  38     int i_ref;
  39     x264_me_t me16x16;
  40
  41     /* 8x8 */
  42     int       i_cost8x8;
  43     x264_me_t me8x8[4];
  44
  45     /* Sub 4x4 */
  46     int       i_cost4x4[4]; /* cost per 8x8 partition */
  47     x264_me_t me4x4[4][4];
  48
  49     /* Sub 8x4 */
  50     int       i_cost8x4[4]; /* cost per 8x8 partition */
  51     x264_me_t me8x4[4][2];
  52
  53     /* Sub 4x8 */
  54     int       i_cost4x8[4]; /* cost per 8x8 partition */
  55     x264_me_t me4x8[4][4];
  56
  57     /* 16x8 */
  58     int       i_cost16x8;
  59     x264_me_t me16x8[2];
  60
  61     /* 8x16 */
  62     int       i_cost8x16;
  63     x264_me_t me8x16[2];
  64
  65 } x264_mb_analysis_list_t;
  66
  67 typedef struct
  68 {
  69     /* conduct the analysis using this lamda and QP */
  70     int i_lambda;
  71     int i_qp;
  72
  73
  74     /* I: Intra part */
  75     /* Luma part 16x16 and 4x4 modes stats */
  76     int i_sad_i16x16;
  77     int i_predict16x16;
  78
  79     int i_sad_i4x4;
  80     int i_predict4x4[4][4];
  81
  82     /* Chroma part */
  83     int i_sad_i8x8;
  84     int i_predict8x8;
  85
  86     /* II: Inter part P/B frame */
  87     int i_mv_range;
  88
  89     x264_mb_analysis_list_t l0;
  90     x264_mb_analysis_list_t l1;
  91
  92     int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
  93
  94 } x264_mb_analysis_t;
  95
  96 static const int i_qp0_cost_table[52] = {
  97    1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
  98    1, 1, 1, 1,              /*  8-11 */
  99    1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
 100    3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
 101    6, 7, 8, 9,10,11,13,14,  /* 28-35 */
 102   16,18,20,23,25,29,32,36,  /* 36-43 */
 103   40,45,51,57,64,72,81,91   /* 44-51 */
 104 };
 105
 106 static const uint8_t block_idx_x[16] = {
 107     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
 108 };
 109 static const uint8_t block_idx_y[16] = {
 110     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
 111 };
 112
 113 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
 114 {
 115     memset( a, 0, sizeof( x264_mb_analysis_t ) );
 116
 117     /* conduct the analysis using this lamda and QP */
 118     a->i_qp = i_qp;
 119     a->i_lambda = i_qp0_cost_table[i_qp];
 120
 121     /* I: Intra part */
 122     a->i_sad_i16x16 = -1;
 123     a->i_sad_i4x4   = -1;
 124     a->i_sad_i8x8   = -1;
 125
 126     /* II: Inter part P/B frame */
 127     if( h->sh.i_type != SLICE_TYPE_I )
 128     {
 129         int dmb;
 130         int i;
 131
 132         /* Calculate max start MV range */
 133         dmb = h->mb.i_mb_x;
 134         if( h->mb.i_mb_y < dmb )
 135             dmb = h->mb.i_mb_y;
 136         if( h->sps->i_mb_width - h->mb.i_mb_x < dmb )
 137             dmb = h->sps->i_mb_width - h->mb.i_mb_x;
 138         if( h->sps->i_mb_height - h->mb.i_mb_y < dmb )
 139             dmb = h->sps->i_mb_height - h->mb.i_mb_y;
 140
 141         a->i_mv_range = 16*dmb + 8;
 142
 143         a->l0.me16x16.cost = -1;
 144         a->l0.i_cost8x8    = -1;
 145
 146         for( i = 0; i < 4; i++ )
 147         {
 148             a->l0.i_cost4x4[i] = -1;
 149             a->l0.i_cost8x4[i] = -1;
 150             a->l0.i_cost4x8[i] = -1;
 151         }
 152
 153         a->l0.i_cost16x8   = -1;
 154         a->l0.i_cost8x16   = -1;
 155         if( h->sh.i_type == SLICE_TYPE_B )
 156         {
 157             a->l1.me16x16.cost = -1;
 158             a->l1.i_cost8x8    = -1;
 159
 160             for( i = 0; i < 4; i++ )
 161             {
 162                 a->l1.i_cost4x4[i] = -1;
 163                 a->l1.i_cost8x4[i] = -1;
 164                 a->l1.i_cost4x8[i] = -1;
 165             }
 166
 167             a->l1.i_cost16x8   = -1;
 168             a->l1.i_cost8x16   = -1;
 169
 170             a->i_cost16x16bi   = -1;
 171         }
 172     }
 173 }
 174
 175
 176
 177 /*
 178  * Handle intra mb
 179  */
 180 /* Max = 4 */
 181 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 182 {
 183     if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 184     {
 185         /* top and left avaible */
 186         *mode++ = I_PRED_16x16_V;
 187         *mode++ = I_PRED_16x16_H;
 188         *mode++ = I_PRED_16x16_DC;
 189         *mode++ = I_PRED_16x16_P;
 190         *pi_count = 4;
 191     }
 192     else if( ( i_neighbour & MB_LEFT ) )
 193     {
 194         /* left available*/
 195         *mode++ = I_PRED_16x16_DC_LEFT;
 196         *mode++ = I_PRED_16x16_H;
 197         *pi_count = 2;
 198     }
 199     else if( ( i_neighbour & MB_TOP ) )
 200     {
 201         /* top available*/
 202         *mode++ = I_PRED_16x16_DC_TOP;
 203         *mode++ = I_PRED_16x16_V;
 204         *pi_count = 2;
 205     }
 206     else
 207     {
 208         /* none avaible */
 209         *mode = I_PRED_16x16_DC_128;
 210         *pi_count = 1;
 211     }
 212 }
 213
 214 /* Max = 4 */
 215 static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 216 {
 217     if( ( i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 218     {
 219         /* top and left avaible */
 220         *mode++ = I_PRED_CHROMA_V;
 221         *mode++ = I_PRED_CHROMA_H;
 222         *mode++ = I_PRED_CHROMA_DC;
 223         *mode++ = I_PRED_CHROMA_P;
 224         *pi_count = 4;
 225     }
 226     else if( ( i_neighbour & MB_LEFT ) )
 227     {
 228         /* left available*/
 229         *mode++ = I_PRED_CHROMA_DC_LEFT;
 230         *mode++ = I_PRED_CHROMA_H;
 231         *pi_count = 2;
 232     }
 233     else if( ( i_neighbour & MB_TOP ) )
 234     {
 235         /* top available*/
 236         *mode++ = I_PRED_CHROMA_DC_TOP;
 237         *mode++ = I_PRED_CHROMA_V;
 238         *pi_count = 2;
 239     }
 240     else
 241     {
 242         /* none avaible */
 243         *mode = I_PRED_CHROMA_DC_128;
 244         *pi_count = 1;
 245     }
 246 }
 247
 248 /* MAX = 8 */
 249 static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
 250 {
 251     int b_a, b_b, b_c;
 252     static const unsigned int needmb[16] =
 253     {
 254         MB_LEFT|MB_TOP, MB_TOP,
 255         MB_LEFT,        MB_PRIVATE,
 256         MB_TOP,         MB_TOP|MB_TOPRIGHT,
 257         0,              MB_PRIVATE,
 258         MB_LEFT,        0,
 259         MB_LEFT,        MB_PRIVATE,
 260         0,              MB_PRIVATE,
 261         0,              MB_PRIVATE
 262     };
 263
 264     /* FIXME even when b_c == 0 there is some case where missing pixels
 265      * are emulated and thus more mode are available TODO
 266      * analysis and encode should be fixed too */
 267     b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
 268     b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
 269     b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
 270
 271     if( b_a && b_b )
 272     {
 273         *mode++ = I_PRED_4x4_DC;
 274         *mode++ = I_PRED_4x4_H;
 275         *mode++ = I_PRED_4x4_V;
 276         *mode++ = I_PRED_4x4_DDR;
 277         *mode++ = I_PRED_4x4_VR;
 278         *mode++ = I_PRED_4x4_HD;
 279         *mode++ = I_PRED_4x4_HU;
 280
 281         *pi_count = 7;
 282
 283         if( b_c )
 284         {
 285             *mode++ = I_PRED_4x4_DDL;
 286             *mode++ = I_PRED_4x4_VL;
 287             (*pi_count) += 2;
 288         }
 289     }
 290     else if( b_a && !b_b )
 291     {
 292         *mode++ = I_PRED_4x4_DC_LEFT;
 293         *mode++ = I_PRED_4x4_H;
 294         *pi_count = 2;
 295     }
 296     else if( !b_a && b_b )
 297     {
 298         *mode++ = I_PRED_4x4_DC_TOP;
 299         *mode++ = I_PRED_4x4_V;
 300         *pi_count = 2;
 301     }
 302     else
 303     {
 304         *mode++ = I_PRED_4x4_DC_128;
 305         *pi_count = 1;
 306     }
 307 }
 308
 309 static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res )
 310 {
 311     const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
 312     const int i_stride = h->mb.pic.i_stride[0];
 313     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 314     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 315
 316     int i, idx;
 317
 318     int i_max;
 319     int predict_mode[9];
 320
 321     /*---------------- Try all mode and calculate their score ---------------*/
 322
 323     /* 16x16 prediction selection */
 324     predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 325     for( i = 0; i < i_max; i++ )
 326     {
 327         int i_sad;
 328         int i_mode;
 329
 330         i_mode = predict_mode[i];
 331
 332         /* we do the prediction */
 333         h->predict_16x16[i_mode]( p_dst, i_stride );
 334
 335         /* we calculate the diff and get the square sum of the diff */
 336         i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
 337                 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
 338         /* if i_score is lower it is better */
 339         if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
 340         {
 341             res->i_predict16x16 = i_mode;
 342             res->i_sad_i16x16     = i_sad;
 343         }
 344     }
 345
 346     /* 4x4 prediction selection */
 347     if( flags & X264_ANALYSE_I4x4 )
 348     {
 349         res->i_sad_i4x4 = 0;
 350         for( idx = 0; idx < 16; idx++ )
 351         {
 352             uint8_t *p_src_by;
 353             uint8_t *p_dst_by;
 354             int     i_best;
 355             int x, y;
 356             int i_pred_mode;
 357
 358             i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
 359             x = block_idx_x[idx];
 360             y = block_idx_y[idx];
 361
 362             p_src_by = p_src + 4 * x + 4 * y * i_stride;
 363             p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
 364
 365             i_best = -1;
 366             predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
 367             for( i = 0; i < i_max; i++ )
 368             {
 369                 int i_sad;
 370                 int i_mode;
 371
 372                 i_mode = predict_mode[i];
 373
 374                 /* we do the prediction */
 375                 h->predict_4x4[i_mode]( p_dst_by, i_stride );
 376
 377                 /* we calculate diff and get the square sum of the diff */
 378                 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
 379                                                  p_src_by, i_stride );
 380
 381                 i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
 382
 383                 /* if i_score is lower it is better */
 384                 if( i_best == -1 || i_best > i_sad )
 385                 {
 386                     res->i_predict4x4[x][y] = i_mode;
 387                     i_best = i_sad;
 388                 }
 389             }
 390             res->i_sad_i4x4 += i_best;
 391
 392             /* we need to encode this mb now (for next ones) */
 393             h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
 394             x264_mb_encode_i4x4( h, idx, res->i_qp );
 395
 396             /* we need to store the 'fixed' version */
 397             h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
 398                 x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
 399         }
 400         res->i_sad_i4x4 += res->i_lambda * 24;    /* from JVT (SATD0) */
 401     }
 402 }
 403
 404 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
 405 {
 406     int i;
 407
 408     int i_max;
 409     int predict_mode[9];
 410
 411     uint8_t *p_dstc[2], *p_srcc[2];
 412     int      i_stride[2];
 413
 414     /* 8x8 prediction selection for chroma */
 415     p_dstc[0] = h->mb.pic.p_fdec[1];
 416     p_dstc[1] = h->mb.pic.p_fdec[2];
 417     p_srcc[0] = h->mb.pic.p_fenc[1];
 418     p_srcc[1] = h->mb.pic.p_fenc[2];
 419
 420     i_stride[0] = h->mb.pic.i_stride[1];
 421     i_stride[1] = h->mb.pic.i_stride[2];
 422
 423     predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
 424     res->i_sad_i8x8 = -1;
 425     for( i = 0; i < i_max; i++ )
 426     {
 427         int i_sad;
 428         int i_mode;
 429
 430         i_mode = predict_mode[i];
 431
 432         /* we do the prediction */
 433         h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
 434         h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
 435
 436         /* we calculate the cost */
 437         i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
 438                                          p_srcc[0], i_stride[0] ) +
 439                 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
 440                                          p_srcc[1], i_stride[1] ) +
 441                 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
 442
 443         /* if i_score is lower it is better */
 444         if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
 445         {
 446             res->i_predict8x8 = i_mode;
 447             res->i_sad_i8x8     = i_sad;
 448         }
 449     }
 450 }
 451
 452 static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
 453 {
 454     x264_me_t m;
 455     int i_ref;
 456
 457     /* 16x16 Search on all ref frame */
 458     m.i_pixel = PIXEL_16x16;
 459     m.lm      = a->i_lambda;
 460     m.p_fenc  = h->mb.pic.p_fenc[0];
 461     m.i_stride= h->mb.pic.i_stride[0];
 462     m.i_mv_range = a->i_mv_range;
 463     m.b_mvc   = 0;
 464 //    m.mvc[0]  = 0;
 465 //    m.mvc[1]  = 0;
 466
 467     /* ME for ref 0 */
 468     m.p_fref = h->mb.pic.p_fref[0][0][0];
 469     x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
 470     x264_me_search( h, &m );
 471
 472     a->l0.i_ref = 0;
 473     a->l0.me16x16 = m;
 474
 475     for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
 476     {
 477         /* search with ref */
 478         m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
 479         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
 480         x264_me_search( h, &m );
 481
 482         /* add ref cost */
 483         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
 484
 485         if( m.cost < a->l0.me16x16.cost )
 486         {
 487             a->l0.i_ref = i_ref;
 488             a->l0.me16x16 = m;
 489         }
 490     }
 491
 492     /* Set global ref, needed for all others modes */
 493     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
 494 }
 495
 496 static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
 497 {
 498     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 499     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 500
 501     int i;
 502
 503     /* XXX Needed for x264_mb_predict_mv */
 504     h->mb.i_partition = D_8x8;
 505
 506     for( i = 0; i < 4; i++ )
 507     {
 508         x264_me_t *m = &a->l0.me8x8[i];
 509         const int x8 = i%2;
 510         const int y8 = i/2;
 511
 512         m->i_pixel = PIXEL_8x8;
 513         m->lm      = a->i_lambda;
 514
 515         m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
 516         m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
 517         m->i_stride= h->mb.pic.i_stride[0];
 518         m->i_mv_range = a->i_mv_range;
 519
 520         if( i == 0 )
 521         {
 522             m->b_mvc   = 1;
 523             m->mvc[0] = a->l0.me16x16.mv[0];
 524             m->mvc[1] = a->l0.me16x16.mv[1];
 525         }
 526         else
 527         {
 528             m->b_mvc   = 0;
 529         }
 530
 531         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 532         x264_me_search( h, m );
 533
 534         x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
 535     }
 536
 537     a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
 538                    a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
 539 }
 540
 541 static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
 542 {
 543     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 544     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 545
 546     int i;
 547
 548     /* XXX Needed for x264_mb_predict_mv */
 549     h->mb.i_partition = D_16x8;
 550
 551     for( i = 0; i < 2; i++ )
 552     {
 553         x264_me_t *m = &a->l0.me16x8[i];
 554
 555         m->i_pixel = PIXEL_16x8;
 556         m->lm      = a->i_lambda;
 557
 558         m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
 559         m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
 560         m->i_stride= h->mb.pic.i_stride[0];
 561         m->i_mv_range = a->i_mv_range;
 562
 563         m->b_mvc   = 1;
 564         m->mvc[0] = a->l0.me8x8[2*i].mv[0];
 565         m->mvc[1] = a->l0.me8x8[2*i].mv[1];
 566
 567         x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
 568         x264_me_search( h, m );
 569
 570         x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
 571     }
 572
 573     a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
 574 }
 575
 576 static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
 577 {
 578     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 579     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 580
 581     int i;
 582
 583     /* XXX Needed for x264_mb_predict_mv */
 584     h->mb.i_partition = D_8x16;
 585
 586     for( i = 0; i < 2; i++ )
 587     {
 588         x264_me_t *m = &a->l0.me8x16[i];
 589
 590         m->i_pixel = PIXEL_8x16;
 591         m->lm      = a->i_lambda;
 592
 593         m->p_fenc  = &p_fenc[8*i];
 594         m->p_fref  = &p_fref[8*i];
 595         m->i_stride= h->mb.pic.i_stride[0];
 596         m->i_mv_range = a->i_mv_range;
 597
 598         m->b_mvc   = 1;
 599         m->mvc[0] = a->l0.me8x8[i].mv[0];
 600         m->mvc[1] = a->l0.me8x8[i].mv[1];
 601
 602         x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
 603         x264_me_search( h, m );
 604
 605         x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
 606     }
 607
 608     a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
 609 }
 610
 611 static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 612 {
 613     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 614     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 615
 616     int i4x4;
 617
 618     /* XXX Needed for x264_mb_predict_mv */
 619     h->mb.i_partition = D_8x8;
 620
 621     for( i4x4 = 0; i4x4 < 4; i4x4++ )
 622     {
 623         const int idx = 4*i8x8 + i4x4;
 624         const int x4 = block_idx_x[idx];
 625         const int y4 = block_idx_y[idx];
 626
 627         x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
 628
 629         m->i_pixel = PIXEL_4x4;
 630         m->lm      = a->i_lambda;
 631
 632         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 633         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 634         m->i_stride= h->mb.pic.i_stride[0];
 635         m->i_mv_range = a->i_mv_range;
 636
 637         if( i4x4 == 0 )
 638         {
 639             m->b_mvc   = 1;
 640             m->mvc[0] = a->l0.me8x8[i8x8].mv[0];
 641             m->mvc[1] = a->l0.me8x8[i8x8].mv[1];
 642         }
 643         else
 644         {
 645             m->b_mvc   = 0;
 646         }
 647
 648         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
 649         x264_me_search( h, m );
 650
 651         x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
 652     }
 653
 654     a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
 655                          a->l0.me4x4[i8x8][1].cost +
 656                          a->l0.me4x4[i8x8][2].cost +
 657                          a->l0.me4x4[i8x8][3].cost;
 658 }
 659
 660 static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 661 {
 662     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 663     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 664
 665     int i8x4;
 666
 667     /* XXX Needed for x264_mb_predict_mv */
 668     h->mb.i_partition = D_8x8;
 669
 670     for( i8x4 = 0; i8x4 < 2; i8x4++ )
 671     {
 672         const int idx = 4*i8x8 + 2*i8x4;
 673         const int x4 = block_idx_x[idx];
 674         const int y4 = block_idx_y[idx];
 675
 676         x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
 677
 678         m->i_pixel = PIXEL_8x4;
 679         m->lm      = a->i_lambda;
 680
 681         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 682         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 683         m->i_stride= h->mb.pic.i_stride[0];
 684         m->i_mv_range = a->i_mv_range;
 685
 686         if( i8x4 == 0 )
 687         {
 688             m->b_mvc   = 1;
 689             m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0];
 690             m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1];
 691         }
 692         else
 693         {
 694             m->b_mvc   = 0;
 695         }
 696
 697         x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
 698         x264_me_search( h, m );
 699
 700         x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
 701     }
 702
 703     a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost;
 704 }
 705
 706 static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 707 {
 708     uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
 709     uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
 710
 711     int i4x8;
 712
 713     /* XXX Needed for x264_mb_predict_mv */
 714     h->mb.i_partition = D_8x8;
 715
 716     for( i4x8 = 0; i4x8 < 2; i4x8++ )
 717     {
 718         const int idx = 4*i8x8 + i4x8;
 719         const int x4 = block_idx_x[idx];
 720         const int y4 = block_idx_y[idx];
 721
 722         x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
 723
 724         m->i_pixel = PIXEL_4x8;
 725         m->lm      = a->i_lambda;
 726
 727         m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
 728         m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
 729         m->i_stride= h->mb.pic.i_stride[0];
 730         m->i_mv_range = a->i_mv_range;
 731
 732         if( i4x8 == 0 )
 733         {
 734             m->b_mvc   = 1;
 735             m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0];
 736             m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1];
 737         }
 738         else
 739         {
 740             m->b_mvc   = 0;
 741         }
 742
 743         x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
 744         x264_me_search( h, m );
 745
 746         x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
 747     }
 748
 749     a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost;
 750 }
 751
 752
 753 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
 754 {
 755     uint8_t pix1[16*16], pix2[16*16];
 756
 757     x264_me_t m;
 758     int i_ref;
 759
 760     /* 16x16 Search on all ref frame */
 761     m.i_pixel = PIXEL_16x16;
 762     m.lm      = a->i_lambda;
 763     m.p_fenc  = h->mb.pic.p_fenc[0];
 764     m.i_stride= h->mb.pic.i_stride[0];
 765     m.b_mvc   = 0;
 766     m.i_mv_range = a->i_mv_range;
 767
 768     /* ME for List 0 ref 0 */
 769     m.p_fref = h->mb.pic.p_fref[0][0][0];
 770     x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
 771     x264_me_search( h, &m );
 772
 773     a->l0.i_ref = 0;
 774     a->l0.me16x16 = m;
 775
 776     for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
 777     {
 778         /* search with ref */
 779         m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
 780         x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
 781         x264_me_search( h, &m );
 782
 783         /* add ref cost */
 784         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
 785
 786         if( m.cost < a->l0.me16x16.cost )
 787         {
 788             a->l0.i_ref = i_ref;
 789             a->l0.me16x16 = m;
 790         }
 791     }
 792
 793     /* ME for list 1 ref 0 */
 794     m.p_fref = h->mb.pic.p_fref[1][0][0];
 795     x264_mb_predict_mv_16x16( h, 1, 0, m.mvp );
 796     x264_me_search( h, &m );
 797
 798     a->l1.i_ref = 0;
 799     a->l1.me16x16 = m;
 800
 801     for( i_ref = 1; i_ref < h->i_ref1; i_ref++ )
 802     {
 803         /* search with ref */
 804         m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
 805         x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
 806         x264_me_search( h, &m );
 807
 808         /* add ref cost */
 809         m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
 810
 811         if( m.cost < a->l1.me16x16.cost )
 812         {
 813             a->l1.i_ref = i_ref;
 814             a->l1.me16x16 = m;
 815         }
 816     }
 817
 818     /* Set global ref, needed for all others modes FIXME some work for mixed block mode */
 819     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
 820     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
 821
 822     /* get cost of BI mode */
 823     h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
 824                     pix1, 16,
 825                     a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
 826                     16, 16 );
 827     h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
 828                     pix2, 16,
 829                     a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
 830                     16, 16 );
 831     h->pixf.avg[PIXEL_16x16]( pix1, 16, pix2, 16 );
 832
 833     a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 ) +
 834                        a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref ) +
 835                                        bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, a->l1.i_ref ) +
 836                                        bs_size_se( a->l0.me16x16.mv[0] - a->l0.me16x16.mvp[0] ) +
 837                                        bs_size_se( a->l0.me16x16.mv[1] - a->l0.me16x16.mvp[1] ) +
 838                                        bs_size_se( a->l1.me16x16.mv[0] - a->l1.me16x16.mvp[0] ) +
 839                                        bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
 840 }
 841
 842 /*****************************************************************************
 843  * x264_macroblock_analyse:
 844  *****************************************************************************/
 845 void x264_macroblock_analyse( x264_t *h )
 846 {
 847     x264_mb_analysis_t analysis;
 848     int i;
 849
 850     h->mb.qp[h->mb.i_mb_xy] = x264_ratecontrol_qp(h);
 851
 852     /* FIXME check if it's 12 */
 853     if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp < -12 )
 854         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp - 12;
 855     else if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp > 12 )
 856         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp + 12;
 857
 858     /* init analysis */
 859     x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] );
 860
 861     /*--------------------------- Do the analysis ---------------------------*/
 862     if( h->sh.i_type == SLICE_TYPE_I )
 863     {
 864         x264_mb_analyse_intra( h, &analysis );
 865
 866         if( analysis.i_sad_i4x4 >= 0 &&  analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
 867             h->mb.i_type = I_4x4;
 868         else
 869             h->mb.i_type = I_16x16;
 870     }
 871     else if( h->sh.i_type == SLICE_TYPE_P )
 872     {
 873         const unsigned int i_neighbour = h->mb.i_neighbour;
 874
 875         int b_skip = 0;
 876         int i_cost;
 877
 878         /* Fast P_SKIP detection */
 879         if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) ||
 880             ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) ||
 881             ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) ||
 882             ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) )
 883         {
 884             b_skip = x264_macroblock_probe_pskip( h );
 885         }
 886
 887         if( b_skip )
 888         {
 889             h->mb.i_type = P_SKIP;
 890             h->mb.i_partition = D_16x16;
 891         }
 892         else
 893         {
 894             const unsigned int flags = h->param.analyse.inter;
 895             int i_type;
 896             int i_partition;
 897
 898             x264_mb_analyse_inter_p16x16( h, &analysis );
 899             if( flags & X264_ANALYSE_PSUB16x16 )
 900                 x264_mb_analyse_inter_p8x8( h, &analysis );
 901
 902             /* Select best inter mode */
 903             i_type = P_L0;
 904             i_partition = D_16x16;
 905             i_cost = analysis.l0.me16x16.cost;
 906
 907             if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
 908                 analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
 909             {
 910                 int i;
 911
 912                 i_type = P_8x8;
 913                 i_partition = D_8x8;
 914                 h->mb.i_sub_partition[0] = D_L0_8x8;
 915                 h->mb.i_sub_partition[1] = D_L0_8x8;
 916                 h->mb.i_sub_partition[2] = D_L0_8x8;
 917                 h->mb.i_sub_partition[3] = D_L0_8x8;
 918
 919                 i_cost = analysis.l0.i_cost8x8;
 920
 921                 /* Do sub 8x8 */
 922                 if( flags & X264_ANALYSE_PSUB8x8 )
 923                 {
 924                     for( i = 0; i < 4; i++ )
 925                     {
 926                         x264_mb_analyse_inter_p4x4( h, &analysis, i );
 927                         if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
 928                         {
 929                             int i_cost8x8;
 930
 931                             h->mb.i_sub_partition[i] = D_L0_4x4;
 932                             i_cost8x8 = analysis.l0.i_cost4x4[i];
 933
 934                             x264_mb_analyse_inter_p8x4( h, &analysis, i );
 935                             if( analysis.l0.i_cost8x4[i] < analysis.l0.i_cost4x4[i] )
 936                             {
 937                                 h->mb.i_sub_partition[i] = D_L0_8x4;
 938                                 i_cost8x8 = analysis.l0.i_cost8x4[i];
 939                             }
 940
 941                             x264_mb_analyse_inter_p4x8( h, &analysis, i );
 942                             if( analysis.l0.i_cost4x8[i] < analysis.l0.i_cost4x4[i] )
 943                             {
 944                                 h->mb.i_sub_partition[i] = D_L0_4x8;
 945                                 i_cost8x8 = analysis.l0.i_cost4x8[i];
 946                             }
 947
 948                             i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
 949                         }
 950                     }
 951                 }
 952
 953                 /* Now do sub 16x8/8x16 */
 954                 x264_mb_analyse_inter_p16x8( h, &analysis );
 955                 if( analysis.l0.i_cost16x8 < i_cost )
 956                 {
 957                     i_type = P_L0;
 958                     i_partition = D_16x8;
 959                     i_cost = analysis.l0.i_cost16x8;
 960                 }
 961
 962                 x264_mb_analyse_inter_p8x16( h, &analysis );
 963                 if( analysis.l0.i_cost8x16 < i_cost )
 964                 {
 965                     i_type = P_L0;
 966                     i_partition = D_8x16;
 967                     i_cost = analysis.l0.i_cost8x16;
 968                 }
 969             }
 970
 971             h->mb.i_type = i_type;
 972             h->mb.i_partition = i_partition;
 973
 974             /* refine qpel */
 975             if( h->mb.i_partition == D_16x16 )
 976             {
 977                 x264_me_refine_qpel( h, &analysis.l0.me16x16 );
 978                 i_cost = analysis.l0.me16x16.cost;
 979             }
 980             else if( h->mb.i_partition == D_16x8 )
 981             {
 982                 x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
 983                 x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
 984                 i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
 985             }
 986             else if( h->mb.i_partition == D_8x16 )
 987             {
 988                 x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
 989                 x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
 990                 i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
 991             }
 992             else if( h->mb.i_partition == D_8x8 )
 993             {
 994                 int i8x8;
 995                 i_cost = 0;
 996                 for( i8x8 = 0; i8x8 < 4; i8x8++ )
 997                 {
 998                     switch( h->mb.i_sub_partition[i8x8] )
 999                     {
1000                         case D_L0_8x8:
1001                             x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
1002                             i_cost += analysis.l0.me8x8[i8x8].cost;
1003                             break;
1004                         case D_L0_8x4:
1005                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
1006                             x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
1007                             i_cost += analysis.l0.me8x4[i8x8][0].cost +
1008                                       analysis.l0.me8x4[i8x8][1].cost;
1009                             break;
1010                         case D_L0_4x8:
1011                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
1012                             x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
1013                             i_cost += analysis.l0.me4x8[i8x8][0].cost +
1014                                       analysis.l0.me4x8[i8x8][1].cost;
1015                             break;
1016
1017                         case D_L0_4x4:
1018                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
1019                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
1020                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
1021                             x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
1022                             i_cost += analysis.l0.me4x4[i8x8][0].cost +
1023                                       analysis.l0.me4x4[i8x8][1].cost +
1024                                       analysis.l0.me4x4[i8x8][2].cost +
1025                                       analysis.l0.me4x4[i8x8][3].cost;
1026                             break;
1027                         default:
1028                             fprintf( stderr, "internal error (!8x8 && !4x4)" );
1029                             break;
1030                     }
1031                 }
1032             }
1033
1034             x264_mb_analyse_intra( h, &analysis );
1035             if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1036             {
1037                 h->mb.i_type = I_16x16;
1038                 i_cost = analysis.i_sad_i16x16;
1039             }
1040
1041             if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1042             {
1043                 h->mb.i_type = I_4x4;
1044                 i_cost = analysis.i_sad_i4x4;
1045             }
1046         }
1047     }
1048     else if( h->sh.i_type == SLICE_TYPE_B )
1049     {
1050         int i_cost;
1051
1052         /* best inter mode */
1053         x264_mb_analyse_inter_b16x16( h, &analysis );
1054         h->mb.i_type = B_L0_L0;
1055         h->mb.i_partition = D_16x16;
1056         i_cost = analysis.l0.me16x16.cost;
1057
1058         if( analysis.l1.me16x16.cost < i_cost )
1059         {
1060             h->mb.i_type = B_L1_L1;
1061             i_cost = analysis.l1.me16x16.cost;
1062         }
1063         if( analysis.i_cost16x16bi < i_cost )
1064         {
1065             h->mb.i_type = B_BI_BI;
1066             i_cost = analysis.i_cost16x16bi;
1067         }
1068
1069         /* best intra mode */
1070         x264_mb_analyse_intra( h, &analysis );
1071         if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
1072         {
1073             h->mb.i_type = I_16x16;
1074             i_cost = analysis.i_sad_i16x16;
1075         }
1076         if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
1077         {
1078             h->mb.i_type = I_4x4;
1079             i_cost = analysis.i_sad_i4x4;
1080         }
1081     }
1082 #undef BEST_TYPE
1083
1084     /*-------------------- Update MB from the analysis ----------------------*/
1085     h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
1086     switch( h->mb.i_type )
1087     {
1088         case I_4x4:
1089             for( i = 0; i < 16; i++ )
1090             {
1091                 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] =
1092                     analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1093             }
1094
1095             x264_mb_analyse_intra_chroma( h, &analysis );
1096             h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1097             break;
1098         case I_16x16:
1099             h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
1100
1101             x264_mb_analyse_intra_chroma( h, &analysis );
1102             h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
1103             break;
1104
1105         case P_L0:
1106             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1107             switch( h->mb.i_partition )
1108             {
1109                 case D_16x16:
1110                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1111                     break;
1112
1113                 case D_16x8:
1114                     x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].mv[0], analysis.l0.me16x8[0].mv[1] );
1115                     x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].mv[0], analysis.l0.me16x8[1].mv[1] );
1116                     break;
1117
1118                 case D_8x16:
1119                     x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].mv[0], analysis.l0.me8x16[0].mv[1] );
1120                     x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].mv[0], analysis.l0.me8x16[1].mv[1] );
1121                     break;
1122
1123                 default:
1124                     fprintf( stderr, "internal error P_L0 and partition=%d\n", h->mb.i_partition );
1125                     break;
1126             }
1127             break;
1128
1129         case P_8x8:
1130             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1131             for( i = 0; i < 4; i++ )
1132             {
1133                 const int x = 2*(i%2);
1134                 const int y = 2*(i/2);
1135
1136                 switch( h->mb.i_sub_partition[i] )
1137                 {
1138                     case D_L0_8x8:
1139                         x264_macroblock_cache_mv( h, x, y, 2, 2, 0, analysis.l0.me8x8[i].mv[0], analysis.l0.me8x8[i].mv[1] );
1140                         break;
1141                     case D_L0_8x4:
1142                         x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, analysis.l0.me8x4[i][0].mv[0], analysis.l0.me8x4[i][0].mv[1] );
1143                         x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, analysis.l0.me8x4[i][1].mv[0], analysis.l0.me8x4[i][1].mv[1] );
1144                         break;
1145                     case D_L0_4x8:
1146                         x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, analysis.l0.me4x8[i][0].mv[0], analysis.l0.me4x8[i][0].mv[1] );
1147                         x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, analysis.l0.me4x8[i][1].mv[0], analysis.l0.me4x8[i][1].mv[1] );
1148                         break;
1149                     case D_L0_4x4:
1150                         x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, analysis.l0.me4x4[i][0].mv[0], analysis.l0.me4x4[i][0].mv[1] );
1151                         x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, analysis.l0.me4x4[i][1].mv[0], analysis.l0.me4x4[i][1].mv[1] );
1152                         x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, analysis.l0.me4x4[i][2].mv[0], analysis.l0.me4x4[i][2].mv[1] );
1153                         x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, analysis.l0.me4x4[i][3].mv[0], analysis.l0.me4x4[i][3].mv[1] );
1154                         break;
1155                     default:
1156                         fprintf( stderr, "internal error\n" );
1157                         break;
1158                 }
1159             }
1160             break;
1161
1162         case P_SKIP:
1163         {
1164             int mvp[2];
1165             x264_mb_predict_mv_pskip( h, mvp );
1166             /* */
1167             h->mb.i_partition = D_16x16;
1168             x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
1169             x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, mvp[0], mvp[1] );
1170             break;
1171         }
1172
1173         case B_L0_L0:
1174             switch( h->mb.i_partition )
1175             {
1176                 case D_16x16:
1177                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1178                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1179
1180                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
1181                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1,  0, 0 );
1182                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1,  0, 0 );
1183                     break;
1184                 default:
1185                     fprintf( stderr, "internal error\n" );
1186                     break;
1187             }
1188             break;
1189         case B_L1_L1:
1190             switch( h->mb.i_partition )
1191             {
1192                 case D_16x16:
1193                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
1194                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0,  0, 0 );
1195                     x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0,  0, 0 );
1196
1197                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1198                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1199                     break;
1200
1201                 default:
1202                     fprintf( stderr, "internal error\n" );
1203                     break;
1204             }
1205             break;
1206         case B_BI_BI:
1207             switch( h->mb.i_partition )
1208             {
1209                 case D_16x16:
1210                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.i_ref );
1211                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.mv[0], analysis.l0.me16x16.mv[1] );
1212
1213                     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, analysis.l1.i_ref );
1214                     x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, analysis.l1.me16x16.mv[0], analysis.l1.me16x16.mv[1] );
1215                     break;
1216
1217                 default:
1218                     fprintf( stderr, "internal error\n" );
1219                     break;
1220             }
1221             break;
1222
1223         default:
1224             fprintf( stderr, "internal error (invalid MB type)\n" );
1225             break;
1226     }
1227 }
1228