git.sesse.net Git - x264/blob - testing/edge-detec.c

   1 /*****************************************************************************
   2  * macroblock.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: edge-detec.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27 #include <stdint.h>
  28 #include <math.h>
  29
  30 #include "common.h"
  31 #include "me.h"
  32 #include "vlc.h"
  33
  34 static inline int x264_median( int a, int b, int c )
  35 {
  36     int min = a, max =a;
  37     if( b < min )
  38     {
  39         min = b;
  40     }
  41     else
  42     {
  43         max = b;    /* no need to do 'b > max' (more consuming than always doing affectation) */
  44     }
  45     if( c < min )
  46     {
  47         min = c;
  48     }
  49     else if( c > max )
  50     {
  51         max = c;
  52     }
  53
  54     return a + b + c - min - max;
  55 }
  56
  57 static const uint8_t intra4x4_cbp_to_golomb[48]=
  58 {
  59   3, 29, 30, 17, 31, 18, 37,  8, 32, 38, 19,  9, 20, 10, 11,  2,
  60  16, 33, 34, 21, 35, 22, 39,  4, 36, 40, 23,  5, 24,  6,  7,  1,
  61  41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15,  0
  62 };
  63 static const uint8_t inter_cbp_to_golomb[48]=
  64 {
  65   0,  2,  3,  7,  4,  8, 17, 13,  5, 18,  9, 14, 10, 15, 16, 11,
  66   1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
  67   6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
  68 };
  69
  70 static const uint8_t block_idx_x[16] =
  71 {
  72     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
  73 };
  74 static const uint8_t block_idx_y[16] =
  75 {
  76     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
  77 };
  78 static const uint8_t block_idx_xy[4][4] =
  79 {
  80     { 0, 2, 8,  10},
  81     { 1, 3, 9,  11},
  82     { 4, 6, 12, 14},
  83     { 5, 7, 13, 15}
  84 };
  85
  86 static const int quant_mf[6][4][4] =
  87 {
  88     {  { 13107, 8066, 13107, 8066}, {  8066, 5243,  8066, 5243},
  89        { 13107, 8066, 13107, 8066}, {  8066, 5243,  8066, 5243}  },
  90     {  { 11916, 7490, 11916, 7490}, {  7490, 4660,  7490, 4660},
  91        { 11916, 7490, 11916, 7490}, {  7490, 4660,  7490, 4660}  },
  92     {  { 10082, 6554, 10082, 6554}, {  6554, 4194,  6554, 4194},
  93        { 10082, 6554, 10082, 6554}, {  6554, 4194,  6554, 4194}  },
  94     {  {  9362, 5825,  9362, 5825}, {  5825, 3647,  5825, 3647},
  95        {  9362, 5825,  9362, 5825}, {  5825, 3647,  5825, 3647}  },
  96     {  {  8192, 5243,  8192, 5243}, {  5243, 3355,  5243, 3355},
  97        {  8192, 5243,  8192, 5243}, {  5243, 3355,  5243, 3355}  },
  98     {  {  7282, 4559,  7282, 4559}, {  4559, 2893,  4559, 2893},
  99        {  7282, 4559,  7282, 4559}, {  4559, 2893,  4559, 2893}  }
 100 };
 101
 102 static const int dequant_mf[6][4][4] =
 103 {
 104     { {10, 13, 10, 13}, {13, 16, 13, 16}, {10, 13, 10, 13}, {13, 16, 13, 16} },
 105     { {11, 14, 11, 14}, {14, 18, 14, 18}, {11, 14, 11, 14}, {14, 18, 14, 18} },
 106     { {13, 16, 13, 16}, {16, 20, 16, 20}, {13, 16, 13, 16}, {16, 20, 16, 20} },
 107     { {14, 18, 14, 18}, {18, 23, 18, 23}, {14, 18, 14, 18}, {18, 23, 18, 23} },
 108     { {16, 20, 16, 20}, {20, 25, 20, 25}, {16, 20, 16, 20}, {20, 25, 20, 25} },
 109     { {18, 23, 18, 23}, {23, 29, 23, 29}, {18, 23, 18, 23}, {23, 29, 23, 29} }
 110 };
 111
 112
 113 static int predict_pred_intra4x4_mode( x264_t *h, x264_macroblock_t *mb, int idx )
 114 {
 115     x264_macroblock_t *mba = mb->context->block[idx].mba;
 116     x264_macroblock_t *mbb = mb->context->block[idx].mbb;
 117
 118     int i_mode_a = I_PRED_4x4_DC;
 119     int i_mode_b = I_PRED_4x4_DC;
 120
 121     if( !mba || !mbb )
 122     {
 123         return I_PRED_4x4_DC;
 124     }
 125
 126     if( mba->i_type == I_4x4 )
 127     {
 128         i_mode_a = mb->context->block[idx].bka->i_intra4x4_pred_mode;
 129     }
 130     if( mbb->i_type == I_4x4 )
 131     {
 132         i_mode_b = mb->context->block[idx].bkb->i_intra4x4_pred_mode;
 133     }
 134
 135     return X264_MIN( i_mode_a, i_mode_b );
 136 }
 137
 138 static int predict_non_zero_code( x264_t *h, x264_macroblock_t *mb, int idx )
 139 {
 140     x264_macroblock_t *mba = mb->context->block[idx].mba;
 141     x264_macroblock_t *mbb = mb->context->block[idx].mbb;
 142
 143     int i_z_a = 0x80, i_z_b = 0x80;
 144     int i_ret;
 145
 146     /* none avail -> 0, one avail -> this one, both -> (a+b+1)>>1 */
 147     if( mba )
 148     {
 149         i_z_a = mb->context->block[idx].bka->i_non_zero_count;
 150     }
 151     if( mbb )
 152     {
 153         i_z_b = mb->context->block[idx].bkb->i_non_zero_count;
 154     }
 155
 156     i_ret = i_z_a+i_z_b;
 157     if( i_ret < 0x80 )
 158     {
 159         i_ret = ( i_ret + 1 ) >> 1;
 160     }
 161     return i_ret & 0x7f;
 162 }
 163
 164
 165 /*
 166  * Handle intra mb
 167  */
 168 /* Max = 4 */
 169 static void predict_16x16_mode_available( x264_macroblock_t *mb, int *mode, int *pi_count )
 170 {
 171     if( ( mb->i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 172     {
 173         /* top and left avaible */
 174         *mode++ = I_PRED_16x16_DC;
 175         *mode++ = I_PRED_16x16_V;
 176         *mode++ = I_PRED_16x16_H;
 177         *mode++ = I_PRED_16x16_P;
 178         *pi_count = 4;
 179     }
 180     else if( ( mb->i_neighbour & MB_LEFT ) )
 181     {
 182         /* left available*/
 183         *mode++ = I_PRED_16x16_DC_LEFT;
 184         *mode++ = I_PRED_16x16_H;
 185         *pi_count = 2;
 186     }
 187     else if( ( mb->i_neighbour & MB_TOP ) )
 188     {
 189         /* top available*/
 190         *mode++ = I_PRED_16x16_DC_TOP;
 191         *mode++ = I_PRED_16x16_V;
 192         *pi_count = 2;
 193     }
 194     else
 195     {
 196         /* none avaible */
 197         *mode = I_PRED_16x16_DC_128;
 198         *pi_count = 1;
 199     }
 200 }
 201
 202 /* Max = 4 */
 203 static void predict_8x8_mode_available( x264_macroblock_t *mb, int *mode, int *pi_count )
 204 {
 205     if( ( mb->i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
 206     {
 207         /* top and left avaible */
 208         *mode++ = I_PRED_CHROMA_DC;
 209         *mode++ = I_PRED_CHROMA_V;
 210         *mode++ = I_PRED_CHROMA_H;
 211         *mode++ = I_PRED_CHROMA_P;
 212         *pi_count = 4;
 213     }
 214     else if( ( mb->i_neighbour & MB_LEFT ) )
 215     {
 216         /* left available*/
 217         *mode++ = I_PRED_CHROMA_DC_LEFT;
 218         *mode++ = I_PRED_CHROMA_H;
 219         *pi_count = 2;
 220     }
 221     else if( ( mb->i_neighbour & MB_TOP ) )
 222     {
 223         /* top available*/
 224         *mode++ = I_PRED_CHROMA_DC_TOP;
 225         *mode++ = I_PRED_CHROMA_V;
 226         *pi_count = 2;
 227     }
 228     else
 229     {
 230         /* none avaible */
 231         *mode = I_PRED_CHROMA_DC_128;
 232         *pi_count = 1;
 233     }
 234 }
 235
 236 /* MAX = 8 */
 237 static void predict_4x4_mode_available( x264_macroblock_t *mb, int idx, int *mode, int *pi_count )
 238 {
 239     int b_a, b_b, b_c;
 240     static const int needmb[16] =
 241     {
 242         MB_LEFT|MB_TOP, MB_TOP,
 243         MB_LEFT,        MB_PRIVATE,
 244         MB_TOP,         MB_TOP|MB_TOPRIGHT,
 245         0,              MB_PRIVATE,
 246         MB_LEFT,        0,
 247         MB_LEFT,        MB_PRIVATE,
 248         0,              MB_PRIVATE,
 249         0,              MB_PRIVATE
 250     };
 251
 252     /* FIXME even when b_c == 0 there is some case where missing pixels
 253      * are emulated and thus more mode are available TODO
 254      * analysis and encode should be fixed too */
 255     b_a = (needmb[idx]&mb->i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
 256     b_b = (needmb[idx]&mb->i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
 257     b_c = (needmb[idx]&mb->i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
 258
 259     if( b_a && b_b )
 260     {
 261         *mode++ = I_PRED_4x4_DC;
 262         *mode++ = I_PRED_4x4_H;
 263         *mode++ = I_PRED_4x4_V;
 264         *mode++ = I_PRED_4x4_DDR;
 265         *mode++ = I_PRED_4x4_VR;
 266         *mode++ = I_PRED_4x4_HD;
 267         *mode++ = I_PRED_4x4_HU;
 268
 269         *pi_count = 7;
 270
 271         if( b_c )
 272         {
 273             *mode++ = I_PRED_4x4_DDL;
 274             *mode++ = I_PRED_4x4_VL;
 275             (*pi_count) += 2;
 276         }
 277     }
 278     else if( b_a && !b_b )
 279     {
 280         *mode++ = I_PRED_4x4_DC_LEFT;
 281         *mode++ = I_PRED_4x4_H;
 282         *pi_count = 2;
 283     }
 284     else if( !b_a && b_b )
 285     {
 286         *mode++ = I_PRED_4x4_DC_TOP;
 287         *mode++ = I_PRED_4x4_V;
 288         *pi_count = 2;
 289     }
 290     else
 291     {
 292         *mode++ = I_PRED_4x4_DC_128;
 293         *pi_count = 1;
 294     }
 295 }
 296
 297 /****************************************************************************
 298  * Scan and Quant functions
 299  ****************************************************************************/
 300 static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
 301 static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
 302
 303 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
 304 {
 305     int i;
 306
 307     for( i = 0; i < 16; i++ )
 308     {
 309         level[i] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
 310     }
 311 }
 312 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
 313 {
 314     int i;
 315
 316     for( i = 1; i < 16; i++ )
 317     {
 318         level[i - 1] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
 319     }
 320 }
 321
 322 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
 323 {
 324     level[0] = dct[0][0];
 325     level[1] = dct[0][1];
 326     level[2] = dct[1][0];
 327     level[3] = dct[1][1];
 328 }
 329
 330
 331 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
 332 {
 333     int i_qbits = 15 + i_qscale / 6;
 334     int i_mf = i_qscale % 6;
 335     int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 336
 337     int x,y;
 338     for( y = 0; y < 4; y++ )
 339     {
 340         for( x = 0; x < 4; x++ )
 341         {
 342             if( dct[y][x] > 0 )
 343             {
 344                 dct[y][x] =( f + (int64_t)dct[y][x]  * (int64_t)quant_mf[i_mf][y][x] ) >> i_qbits;
 345             }
 346             else
 347             {
 348                 dct[y][x] = - ( ( f - (int64_t)dct[y][x]  * (int64_t)quant_mf[i_mf][y][x] ) >> i_qbits );
 349             }
 350         }
 351     }
 352 }
 353 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale, int b_intra )
 354 {
 355     int i_qbits = 15 + i_qscale / 6;
 356     int i_mf = i_qscale % 6;
 357     int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 358
 359     int x,y;
 360     for( y = 0; y < 4; y++ )
 361     {
 362         for( x = 0; x < 4; x++ )
 363         {
 364             if( dct[y][x] > 0 )
 365             {
 366                 dct[y][x] =( 2*f + (int64_t)dct[y][x]  * (int64_t)quant_mf[i_mf][0][0] ) >> ( 1 + i_qbits );
 367             }
 368             else
 369             {
 370                 dct[y][x] = - ( ( 2*f - (int64_t)dct[y][x]  * (int64_t)quant_mf[i_mf][0][0] ) >> (1 + i_qbits ) );
 371             }
 372         }
 373     }
 374 }
 375 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
 376 {
 377     int i_qbits = 15 + i_qscale / 6;
 378     int i_mf = i_qscale % 6;
 379     int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 380
 381     int x,y;
 382     for( y = 0; y < 2; y++ )
 383     {
 384         for( x = 0; x < 2; x++ )
 385         {
 386             /* XXX: is int64_t really needed ? */
 387             if( dct[y][x] > 0 )
 388             {
 389                 dct[y][x] =( 2*f + (int64_t)dct[y][x]  * (int64_t)quant_mf[i_mf][0][0] ) >> ( 1 + i_qbits );
 390             }
 391             else
 392             {
 393                 dct[y][x] = - ( ( 2*f - (int64_t)dct[y][x]  * (int64_t)quant_mf[i_mf][0][0] ) >> (1 + i_qbits ) );
 394             }
 395         }
 396     }
 397 }
 398
 399 static void dequant_4x4_dc( int16_t dct[4][4], int i_qscale )
 400 {
 401     int i_mf = i_qscale%6;
 402     int i_qbits = i_qscale/6;
 403     int f;
 404     int x,y;
 405
 406     if( i_qbits <= 1 )
 407     {
 408         f = 1 << ( 1 - i_qbits );
 409     }
 410     else
 411     {
 412         f = 0;
 413     }
 414
 415     for( y = 0; y < 4; y++ )
 416     {
 417         for( x = 0; x < 4; x++ )
 418         {
 419             if( i_qbits >= 2 )
 420             {
 421                 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] ) << (i_qbits - 2);
 422             }
 423             else
 424             {
 425                 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] + f ) >> ( 2 -i_qbits );
 426             }
 427         }
 428     }
 429 }
 430
 431 static void dequant_2x2_dc( int16_t dct[2][2], int i_qscale )
 432 {
 433     int i_mf = i_qscale%6;
 434     int i_qbits = i_qscale/6;
 435     int x,y;
 436
 437     for( y = 0; y < 2; y++ )
 438     {
 439         for( x = 0; x < 2; x++ )
 440         {
 441             if( i_qbits >= 1 )
 442             {
 443                 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] ) << (i_qbits - 1);
 444             }
 445             else
 446             {
 447                 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] ) >> 1;
 448             }
 449         }
 450     }
 451 }
 452 static void dequant_4x4( int16_t dct[4][4], int i_qscale )
 453 {
 454     int i_mf = i_qscale%6;
 455     int i_qbits = i_qscale/6;
 456     int x,y;
 457
 458     for( y = 0; y < 4; y++ )
 459     {
 460         for( x = 0; x < 4; x++ )
 461         {
 462             dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][x][y] ) << i_qbits;
 463         }
 464     }
 465 }
 466
 467 static inline int array_non_zero_count( int *v, int i_count )
 468 {
 469     int i;
 470     int i_nz;
 471
 472     for( i = 0, i_nz = 0; i < i_count; i++ )
 473     {
 474         if( v[i] )
 475         {
 476             i_nz++;
 477         }
 478     }
 479     return i_nz;
 480 }
 481
 482 /* TODO : use a table instead */
 483 static int mb_partition_count( int i_partition )
 484 {
 485     switch( i_partition )
 486     {
 487         case D_8x8:
 488             return 4;
 489         case D_16x8:
 490         case D_8x16:
 491             return 2;
 492         case D_16x16:
 493             return 1;
 494         default:
 495             /* should never occur */
 496             return 0;
 497     }
 498 }
 499
 500 static int mb_sub_partition_count( int i_partition )
 501 {
 502     switch( i_partition )
 503     {
 504         case D_L0_4x4:
 505         case D_L1_4x4:
 506         case D_BI_4x4:
 507             return 4;
 508         case D_L0_4x8:
 509         case D_L1_4x8:
 510         case D_BI_4x8:
 511         case D_L0_8x4:
 512         case D_L1_8x4:
 513         case D_BI_8x4:
 514             return 2;
 515         case D_L0_8x8:
 516         case D_L1_8x8:
 517         case D_BI_8x8:
 518         case D_DIRECT_8x8:
 519             return 1;
 520         default:
 521             /* should never occur */
 522             return 0;
 523     }
 524 }
 525
 526 static inline void x264_macroblock_partition_getxy( x264_macroblock_t *mb, int i_part, int i_sub, int *x, int *y )
 527 {
 528     if( mb->i_partition == D_16x16 )
 529     {
 530         *x  = 0;
 531         *y  = 0;
 532     }
 533     else if( mb->i_partition == D_16x8 )
 534     {
 535         *x = 0;
 536         *y = 2*i_part;
 537     }
 538     else if( mb->i_partition == D_8x16 )
 539     {
 540         *x = 2*i_part;
 541         *y = 0;
 542     }
 543     else if( mb->i_partition == D_8x8 )
 544     {
 545         *x = 2 * (i_part%2);
 546         *y = 2 * (i_part/2);
 547
 548         if( IS_SUB4x4( mb->i_sub_partition[i_part] ) )
 549         {
 550             (*x) += i_sub%2;
 551             (*y) += i_sub/2;
 552         }
 553         else if( IS_SUB4x8( mb->i_sub_partition[i_part] ) )
 554         {
 555             (*x) += i_sub;
 556         }
 557         else if( IS_SUB8x4( mb->i_sub_partition[i_part] ) )
 558         {
 559             (*y) += i_sub;
 560         }
 561     }
 562 }
 563 static inline void x264_macroblock_partition_size( x264_macroblock_t *mb, int i_part, int i_sub, int *w, int *h )
 564 {
 565     if( mb->i_partition == D_16x16 )
 566     {
 567         *w  = 4;
 568         *h  = 4;
 569     }
 570     else if( mb->i_partition == D_16x8 )
 571     {
 572         *w = 4;
 573         *h = 2;
 574     }
 575     else if( mb->i_partition == D_8x16 )
 576     {
 577         *w = 2;
 578         *h = 4;
 579     }
 580     else if( mb->i_partition == D_8x8 )
 581     {
 582         if( IS_SUB4x4( mb->i_sub_partition[i_part] ) )
 583         {
 584             *w = 1;
 585             *h = 1;
 586         }
 587         else if( IS_SUB4x8( mb->i_sub_partition[i_part] ) )
 588         {
 589             *w = 1;
 590             *h = 2;
 591         }
 592         else if( IS_SUB8x4( mb->i_sub_partition[i_part] ) )
 593         {
 594             *w = 2;
 595             *h = 1;
 596         }
 597         else
 598         {
 599             *w = 2;
 600             *h = 2;
 601         }
 602     }
 603 }
 604
 605 void x264_macroblock_partition_set( x264_macroblock_t *mb, int i_list, int i_part, int i_sub, int i_ref, int mx, int my )
 606 {
 607     int x,  y;
 608     int w,  h;
 609     int dx, dy;
 610
 611     x264_macroblock_partition_getxy( mb, i_part, i_sub, &x, &y );
 612     x264_macroblock_partition_size ( mb, i_part, i_sub, &w, &h );
 613
 614     for( dx = 0; dx < w; dx++ )
 615     {
 616         for( dy = 0; dy < h; dy++ )
 617         {
 618             mb->partition[x+dx][y+dy].i_ref[i_list] = i_ref;
 619             mb->partition[x+dx][y+dy].mv[i_list][0] = mx;
 620             mb->partition[x+dx][y+dy].mv[i_list][1] = my;
 621         }
 622     }
 623 }
 624
 625 void x264_macroblock_partition_get( x264_macroblock_t *mb, int i_list, int i_part, int i_sub, int *pi_ref, int *pi_mx, int *pi_my )
 626 {
 627     int x,y;
 628
 629     x264_macroblock_partition_getxy( mb, i_part, i_sub, &x, &y );
 630
 631     if( pi_ref )
 632     {
 633         *pi_ref = mb->partition[x][y].i_ref[i_list];
 634     }
 635     if( pi_mx && pi_my )
 636     {
 637         *pi_mx  = mb->partition[x][y].mv[i_list][0];
 638         *pi_my  = mb->partition[x][y].mv[i_list][1];
 639     }
 640 }
 641
 642 /* ARrrrg so unbeautifull, and unoptimised for common case */
 643 void x264_macroblock_predict_mv( x264_macroblock_t *mb, int i_list, int i_part, int i_subpart, int *mvxp, int *mvyp )
 644 {
 645     int x, y, xn, yn;
 646     int w, h;
 647     int i_ref;
 648
 649     int i_refa = -1;
 650     int i_refb = -1;
 651     int i_refc = -1;
 652
 653     int mvxa = 0, mvxb = 0, mvxc = 0;
 654     int mvya = 0, mvyb = 0, mvyc = 0;
 655
 656     x264_macroblock_t *mbn;
 657
 658
 659     x264_macroblock_partition_getxy( mb, i_part, i_subpart, &x, &y );
 660     x264_macroblock_partition_size( mb, i_part, i_subpart, &w, &h );
 661     i_ref = mb->partition[x][y].i_ref[i_list];
 662
 663     /* Left  pixel (-1,0)*/
 664     xn = x - 1;
 665     mbn = mb;
 666     if( xn < 0 )
 667     {
 668         xn += 4;
 669         mbn = mb->mba;
 670     }
 671     if( mbn )
 672     {
 673         i_refa = -2;
 674         if( !IS_INTRA( mbn->i_type ) )
 675         {
 676             i_refa = mbn->partition[xn][y].i_ref[i_list];
 677             mvxa   = mbn->partition[xn][y].mv[i_list][0];
 678             mvya   = mbn->partition[xn][y].mv[i_list][1];
 679         }
 680     }
 681
 682     /* Up ( pixel(0,-1)*/
 683     yn = y - 1;
 684     mbn = mb;
 685     if( yn < 0 )
 686     {
 687         yn += 4;
 688         mbn = mb->mbb;
 689     }
 690     if( mbn )
 691     {
 692         i_refb = -2;
 693         if( !IS_INTRA( mbn->i_type ) )
 694         {
 695             i_refb = mbn->partition[x][yn].i_ref[i_list];
 696             mvxb   = mbn->partition[x][yn].mv[i_list][0];
 697             mvyb   = mbn->partition[x][yn].mv[i_list][1];
 698         }
 699     }
 700
 701     /* Up right pixel(width,-1)*/
 702     xn = x + w;
 703     yn = y - 1;
 704
 705     mbn = mb;
 706     if( yn < 0 && xn >= 4 )
 707     {
 708         if( mb->mbc )
 709         {
 710             xn -= 4;
 711             yn += 4;
 712             mbn = mb->mbc;
 713         }
 714         else
 715         {
 716             mbn = NULL;
 717         }
 718     }
 719     else if( yn < 0 )
 720     {
 721         yn += 4;
 722         mbn = mb->mbb;
 723     }
 724     else if( xn >= 4 || ( xn == 2 && ( yn == 0 || yn == 2 ) ) )
 725     {
 726         mbn = NULL; /* not yet decoded */
 727     }
 728
 729     if( mbn == NULL )
 730     {
 731         /* load top left pixel(-1,-1) */
 732         xn = x - 1;
 733         yn = y - 1;
 734
 735         mbn = mb;
 736         if( yn < 0 && xn < 0 )
 737         {
 738             if( mb->mba && mb->mbb )
 739             {
 740                 xn += 4;
 741                 yn += 4;
 742                 mbn = mb->mbb - 1;
 743             }
 744             else
 745             {
 746                 mbn = NULL;
 747             }
 748         }
 749         else if( yn < 0 )
 750         {
 751             yn += 4;
 752             mbn = mb->mbb;
 753         }
 754         else if( xn < 0 )
 755         {
 756             xn += 4;
 757             mbn = mb->mba;
 758         }
 759     }
 760
 761     if( mbn )
 762     {
 763         i_refc = -2;
 764         if( !IS_INTRA( mbn->i_type ) )
 765         {
 766             i_refc = mbn->partition[xn][yn].i_ref[i_list];
 767             mvxc   = mbn->partition[xn][yn].mv[i_list][0];
 768             mvyc   = mbn->partition[xn][yn].mv[i_list][1];
 769         }
 770     }
 771
 772     if( mb->i_partition == D_16x8 && i_part == 0 && i_refb == i_ref )
 773     {
 774         *mvxp = mvxb;
 775         *mvyp = mvyb;
 776     }
 777     else if( mb->i_partition == D_16x8 && i_part == 1 && i_refa == i_ref )
 778     {
 779         *mvxp = mvxa;
 780         *mvyp = mvya;
 781     }
 782     else if( mb->i_partition == D_8x16 && i_part == 0 && i_refa == i_ref )
 783     {
 784         *mvxp = mvxa;
 785         *mvyp = mvya;
 786     }
 787     else if( mb->i_partition == D_8x16 && i_part == 1 && i_refc == i_ref )
 788     {
 789         *mvxp = mvxc;
 790         *mvyp = mvyc;
 791     }
 792     else
 793     {
 794         int i_count;
 795
 796         i_count = 0;
 797         if( i_refa == i_ref ) i_count++;
 798         if( i_refb == i_ref ) i_count++;
 799         if( i_refc == i_ref ) i_count++;
 800
 801         if( i_count > 1 )
 802         {
 803             *mvxp = x264_median( mvxa, mvxb, mvxc );
 804             *mvyp = x264_median( mvya, mvyb, mvyc );
 805         }
 806         else if( i_count == 1 )
 807         {
 808             if( i_refa == i_ref )
 809             {
 810                 *mvxp = mvxa;
 811                 *mvyp = mvya;
 812             }
 813             else if( i_refb == i_ref )
 814             {
 815                 *mvxp = mvxb;
 816                 *mvyp = mvyb;
 817             }
 818             else
 819             {
 820                 *mvxp = mvxc;
 821                 *mvyp = mvyc;
 822             }
 823         }
 824         else if( i_refb == -1 && i_refc == -1 && i_refa != -1 )
 825         {
 826             *mvxp = mvxa;
 827             *mvyp = mvya;
 828         }
 829         else
 830         {
 831             *mvxp = x264_median( mvxa, mvxb, mvxc );
 832             *mvyp = x264_median( mvya, mvyb, mvyc );
 833         }
 834     }
 835 }
 836
 837 void x264_macroblock_predict_mv_pskip( x264_macroblock_t *mb, int *mvxp, int *mvyp )
 838 {
 839     int x, y, xn, yn;
 840
 841     int i_refa = -1;
 842     int i_refb = -1;
 843
 844     int mvxa = 0, mvxb = 0;
 845     int mvya = 0, mvyb = 0;
 846
 847     x264_macroblock_t *mbn;
 848
 849
 850     x264_macroblock_partition_getxy( mb, 0, 0, &x, &y );
 851
 852     /* Left  pixel (-1,0)*/
 853     xn = x - 1;
 854     mbn = mb;
 855     if( xn < 0 )
 856     {
 857         xn += 4;
 858         mbn = mb->mba;
 859     }
 860     if( mbn )
 861     {
 862         i_refa = -2;
 863         if( !IS_INTRA( mbn->i_type ) )
 864         {
 865             i_refa = mbn->partition[xn][y].i_ref[0];
 866             mvxa   = mbn->partition[xn][y].mv[0][0];
 867             mvya   = mbn->partition[xn][y].mv[0][1];
 868         }
 869     }
 870
 871     /* Up ( pixel(0,-1)*/
 872     yn = y - 1;
 873     mbn = mb;
 874     if( yn < 0 )
 875     {
 876         yn += 4;
 877         mbn = mb->mbb;
 878     }
 879     if( mbn )
 880     {
 881         i_refb = -2;
 882         if( !IS_INTRA( mbn->i_type ) )
 883         {
 884             i_refb = mbn->partition[x][yn].i_ref[0];
 885             mvxb   = mbn->partition[x][yn].mv[0][0];
 886             mvyb   = mbn->partition[x][yn].mv[0][1];
 887         }
 888     }
 889
 890     if( i_refa == -1 || i_refb == -1 ||
 891         ( i_refa == 0 && mvxa == 0 && mvya == 0 ) ||
 892         ( i_refb == 0 && mvxb == 0 && mvyb == 0 ) )
 893     {
 894         *mvxp = 0;
 895         *mvyp = 0;
 896     }
 897     else
 898     {
 899         x264_macroblock_predict_mv( mb, 0, 0, 0, mvxp, mvyp );
 900     }
 901 }
 902
 903 static const int i_chroma_qp_table[52] =
 904 {
 905      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
 906     10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
 907     20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
 908     29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
 909     36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
 910     39, 39
 911 };
 912
 913 static void x264_macroblock_mc( x264_t *h, x264_macroblock_t *mb, int b_luma )
 914 {
 915     x264_mb_context_t *ctx = mb->context;
 916
 917     int ch;
 918     int i_ref;
 919     int mx, my;
 920
 921     if( mb->i_type == P_L0 )
 922     {
 923         int i_part;
 924
 925         for( i_part = 0; i_part < mb_partition_count( mb->i_partition ); i_part++ )
 926         {
 927             int i_width, i_height;
 928             int x, y;
 929
 930             x264_macroblock_partition_get( mb, 0, i_part, 0, &i_ref, &mx, &my );
 931             x264_macroblock_partition_getxy( mb, i_part, 0, &x, &y );
 932             x264_macroblock_partition_size(  mb, i_part, 0, &i_width, &i_height );
 933
 934             if( b_luma )
 935             {
 936                 int     i_src = ctx->i_fref0[i_ref][0];
 937                 uint8_t *p_src= ctx->p_fref0[i_ref][0];
 938                 int     i_dst = ctx->i_fdec[0];
 939                 uint8_t *p_dst= ctx->p_fdec[0];
 940
 941                 h->mc[MC_LUMA]( &p_src[4*(x+y*i_src)], i_src,
 942                                 &p_dst[4*(x+y*i_dst)], i_dst,
 943                                 mx, my, 4*i_width, 4*i_height );
 944             }
 945             else
 946             {
 947                 int     i_src,  i_dst;
 948                 uint8_t *p_src, *p_dst;
 949
 950                 for( ch = 0; ch < 2; ch++ )
 951                 {
 952                     i_src = ctx->i_fref0[i_ref][1+ch];
 953                     p_src = ctx->p_fref0[i_ref][1+ch];
 954                     i_dst = ctx->i_fdec[1+ch];
 955                     p_dst = ctx->p_fdec[1+ch];
 956
 957                     h->mc[MC_CHROMA]( &p_src[2*(x+y*i_src)], i_src,
 958                                       &p_dst[2*(x+y*i_dst)], i_dst,
 959                                       mx, my, 2*i_width, 2*i_height );
 960                 }
 961             }
 962         }
 963     }
 964     else if( mb->i_type == P_8x8 )
 965     {
 966         int i_part;
 967
 968         for( i_part = 0; i_part < 4; i_part++ )
 969         {
 970             int i_sub;
 971
 972             for( i_sub = 0; i_sub < mb_sub_partition_count( mb->i_sub_partition[i_part] ); i_sub++ )
 973             {
 974                 int i_width, i_height;
 975                 int x, y;
 976
 977                 x264_macroblock_partition_get(   mb, 0, i_part, i_sub, &i_ref, &mx, &my );
 978                 x264_macroblock_partition_getxy( mb, i_part, i_sub, &x, &y );
 979                 x264_macroblock_partition_size(  mb, i_part, i_sub, &i_width, &i_height );
 980
 981                 if( b_luma )
 982                 {
 983                     int     i_src = ctx->i_fref0[i_ref][0];
 984                     uint8_t *p_src= ctx->p_fref0[i_ref][0];
 985                     int     i_dst = ctx->i_fdec[0];
 986                     uint8_t *p_dst= ctx->p_fdec[0];
 987
 988                     h->mc[MC_LUMA]( &p_src[4*(x+y*i_src)], i_src,
 989                                     &p_dst[4*(x+y*i_dst)], i_dst,
 990                                     mx, my, 4*i_width, 4*i_height );
 991                 }
 992                 else
 993                 {
 994                     int     i_src,  i_dst;
 995                     uint8_t *p_src, *p_dst;
 996
 997                     for( ch = 0; ch < 2; ch++ )
 998                     {
 999                         i_src = ctx->i_fref0[i_ref][1+ch];
1000                         p_src = ctx->p_fref0[i_ref][1+ch];
1001                         i_dst = ctx->i_fdec[1+ch];
1002                         p_dst = ctx->p_fdec[1+ch];
1003
1004                         h->mc[MC_CHROMA]( &p_src[2*(x+y*i_src)], i_src,
1005                                           &p_dst[2*(x+y*i_dst)], i_dst,
1006                                           mx, my, 2*i_width, 2*i_height );
1007                     }
1008                 }
1009             }
1010         }
1011     }
1012 }
1013
1014 /*****************************************************************************
1015  * x264_macroblock_neighbour_load:
1016  *****************************************************************************/
1017 void x264_macroblock_context_load( x264_t *h, x264_macroblock_t *mb, x264_mb_context_t *context )
1018 {
1019     int i;
1020     int x, y;
1021     x264_macroblock_t *a = NULL;
1022     x264_macroblock_t *b = NULL;
1023
1024     if( mb->i_neighbour&MB_LEFT )
1025     {
1026         a = mb - 1;
1027     }
1028     if( mb->i_neighbour&MB_TOP )
1029     {
1030         b = mb - h->sps.i_mb_width;
1031     }
1032 #define LOAD_PTR( dst, src ) \
1033     context->p_##dst[0] = (src)->plane[0] + 16 * ( mb->i_mb_x + mb->i_mb_y * (src)->i_stride[0] ); \
1034     context->p_##dst[1] = (src)->plane[1] +  8 * ( mb->i_mb_x + mb->i_mb_y * (src)->i_stride[1] ); \
1035     context->p_##dst[2] = (src)->plane[2] +  8 * ( mb->i_mb_x + mb->i_mb_y * (src)->i_stride[2] ); \
1036     context->i_##dst[0] = (src)->i_stride[0]; \
1037     context->i_##dst[1] = (src)->i_stride[1]; \
1038     context->i_##dst[2] = (src)->i_stride[2]
1039
1040     LOAD_PTR( img,  h->picture );
1041     LOAD_PTR( fdec, h->fdec );
1042     for( i = 0; i < h->i_ref0; i++ )
1043     {
1044         LOAD_PTR( fref0[i], h->fref0[i] );
1045     }
1046     for( i = 0; i < h->i_ref1; i++ )
1047     {
1048         LOAD_PTR( fref1[i], h->fref1[i] );
1049     }
1050 #undef LOAD_PTR
1051
1052     for( y = 0; y < 4; y++ )
1053     {
1054         for( x = 0; x < 4; x++ )
1055         {
1056             int idx;
1057             int xa, yb;
1058             x264_macroblock_t *mba;
1059             x264_macroblock_t *mbb;
1060
1061             idx = block_idx_xy[x][y];
1062             mba = mb;
1063             mbb = mb;
1064
1065             xa = x - 1;
1066             if (xa < 0 )
1067             {
1068                 xa += 4;
1069                 mba = a;
1070             }
1071             /* up */
1072             yb = y - 1;
1073             if (yb < 0 )
1074             {
1075                 yb += 4;
1076                 mbb = b;
1077             }
1078
1079             context->block[idx].mba = mba;
1080             context->block[idx].mbb = mbb;
1081             context->block[idx].bka = mba ? &mba->block[block_idx_xy[xa][y]] : NULL;
1082             context->block[idx].bkb = mbb ? &mbb->block[block_idx_xy[x][yb]] : NULL;
1083
1084             if( x < 2 && y < 2 )
1085             {
1086                 int ch;
1087                 if( xa > 1 ) xa -= 2;   /* we have wrap but here step is 2 not 4 */
1088                 if( yb > 1 ) yb -= 2;   /* idem */
1089
1090                 for( ch = 0; ch < 2; ch++ )
1091                 {
1092                     context->block[16+4*ch+idx].mba = mba;
1093                     context->block[16+4*ch+idx].mbb = mbb;
1094                     context->block[16+4*ch+idx].bka = mba ? &mba->block[16+4*ch+block_idx_xy[xa][y]] : NULL;
1095                     context->block[16+4*ch+idx].bkb = mbb ? &mbb->block[16+4*ch+block_idx_xy[x][yb]] : NULL;
1096                 }
1097             }
1098         }
1099     }
1100
1101     mb->context = context;
1102 }
1103
1104 /* (ref: JVT-B118)
1105  * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
1106  * to 0 (low score means set it to null)
1107  * Used in inter macroblock (luma and chroma)
1108  *  luma: for a 8x8 block: if score < 4 -> null
1109  *        for the complete mb: if score < 6 -> null
1110  *  chroma: for the complete mb: if score < 7 -> null
1111  */
1112 static int x264_mb_decimate_score( int *dct, int i_max )
1113 {
1114     static const int i_ds_table[16] = { 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1115
1116     int i_score = 0;
1117     int idx = i_max - 1;
1118
1119     while( idx >= 0 && dct[idx] == 0 )
1120     {
1121         idx--;
1122     }
1123
1124     while( idx >= 0 )
1125     {
1126         int i_run;
1127
1128         if( abs( dct[idx--] ) > 1 )
1129         {
1130             return 9;
1131         }
1132
1133         i_run = 0;
1134         while( idx >= 0 && dct[idx] == 0 )
1135         {
1136             idx--;
1137             i_run++;
1138         }
1139         i_score += i_ds_table[i_run];
1140     }
1141
1142     return i_score;
1143 }
1144
1145 static void x264_mb_encode_4x4( x264_t *h, x264_macroblock_t *mb, int idx, int i_qscale )
1146 {
1147     x264_mb_context_t *ctx = mb->context;
1148
1149     uint8_t *p_src = ctx->p_img[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_img[0];
1150     int      i_src = ctx->i_img[0];
1151     uint8_t *p_dst = ctx->p_fdec[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_fdec[0];
1152     int      i_dst = ctx->i_fdec[0];
1153
1154     int16_t luma[4][4];
1155     int16_t dct4x4[4][4];
1156
1157     /* we calculate diff */
1158     h->pixf.sub4x4( luma, p_src, i_src, p_dst, i_dst );
1159
1160     /* calculate dct coeffs */
1161     h->dctf.dct4x4( dct4x4, luma );
1162     quant_4x4( dct4x4, i_qscale, 1 );
1163
1164     scan_zigzag_4x4full( mb->block[idx].luma4x4, dct4x4 );
1165
1166     /* output samples to fdec */
1167     dequant_4x4( dct4x4, i_qscale );
1168     h->dctf.idct4x4( luma, dct4x4 );
1169
1170     /* put pixel to fdec */
1171     h->pixf.add4x4( p_dst, i_dst, luma );
1172 }
1173
1174 static void x264_mb_encode_i16x16( x264_t *h, x264_macroblock_t *mb, int i_qscale )
1175 {
1176     x264_mb_context_t *ctx = mb->context;
1177
1178     uint8_t *p_src = ctx->p_img[0];
1179     int      i_src = ctx->i_img[0];
1180     uint8_t *p_dst = ctx->p_fdec[0];
1181     int      i_dst = ctx->i_fdec[0];
1182
1183     int16_t luma[16][4][4];
1184     int16_t dct4x4[16+1][4][4];
1185
1186     int i;
1187
1188     /* calculate the diff */
1189     h->pixf.sub16x16( luma, p_src, i_src, p_dst, i_dst );
1190
1191     /* calculate dct coeffs */
1192     for( i = 0; i < 16; i++ )
1193     {
1194         h->dctf.dct4x4( dct4x4[i+1], luma[i] );
1195
1196         /* copy dc coeff */
1197         dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
1198
1199         quant_4x4( dct4x4[1+i], i_qscale, 1 );
1200         scan_zigzag_4x4( mb->block[i].residual_ac, dct4x4[1+i] );
1201     }
1202
1203     h->dctf.dct4x4dc( dct4x4[0], dct4x4[0] );
1204     quant_4x4_dc( dct4x4[0], i_qscale, 1 );
1205     scan_zigzag_4x4full( mb->luma16x16_dc, dct4x4[0] );
1206
1207     /* output samples to fdec */
1208     h->dctf.idct4x4dc( dct4x4[0], dct4x4[0] );
1209     dequant_4x4_dc( dct4x4[0], i_qscale );  /* XXX not inversed */
1210
1211     /* calculate dct coeffs */
1212     for( i = 0; i < 16; i++ )
1213     {
1214         dequant_4x4( dct4x4[1+i], i_qscale );
1215
1216         /* copy dc coeff */
1217         dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
1218
1219         h->dctf.idct4x4( luma[i], dct4x4[i+1] );
1220     }
1221     /* put pixels to fdec */
1222     h->pixf.add16x16( p_dst, i_dst, luma );
1223 }
1224
1225 static void x264_mb_encode_8x8( x264_t *h, x264_macroblock_t *mb, int b_inter, int i_qscale )
1226 {
1227     x264_mb_context_t *ctx = mb->context;
1228
1229     uint8_t *p_src, *p_dst;
1230     int      i_src, i_dst;
1231
1232     int i, ch;
1233     int i_decimate_score = 0;
1234
1235     for( ch = 0; ch < 2; ch++ )
1236     {
1237         int16_t chroma[4][4][4];
1238         int16_t dct2x2[2][2];
1239         int16_t dct4x4[4][4][4];
1240
1241         p_src = ctx->p_img[1+ch];
1242         i_src = ctx->i_img[1+ch];
1243         p_dst = ctx->p_fdec[1+ch];
1244         i_dst = ctx->i_fdec[1+ch];
1245
1246         /* calculate the diff */
1247         h->pixf.sub8x8( chroma, p_src, i_src, p_dst, i_dst );
1248
1249         /* calculate dct coeffs */
1250         for( i = 0; i < 4; i++ )
1251         {
1252             h->dctf.dct4x4( dct4x4[i], chroma[i] );
1253
1254             /* copy dc coeff */
1255             dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
1256
1257             quant_4x4( dct4x4[i], i_qscale, 1 );
1258             scan_zigzag_4x4( mb->block[16+i+ch*4].residual_ac, dct4x4[i] );
1259
1260             i_decimate_score += x264_mb_decimate_score( mb->block[16+i+ch*4].residual_ac, 15 );
1261         }
1262
1263         h->dctf.dct2x2dc( dct2x2, dct2x2 );
1264         quant_2x2_dc( dct2x2, i_qscale, 1 );
1265         scan_zigzag_2x2_dc( mb->chroma_dc[ch], dct2x2 );
1266
1267         if( i_decimate_score < 7 && b_inter )
1268         {
1269             /* Near null chroma 8x8 block so make it null (bits saving) */
1270             for( i = 0; i < 4; i++ )
1271             {
1272                 int x, y;
1273                 for( x = 0; x < 15; x++ )
1274                 {
1275                     mb->block[16+i+ch*4].residual_ac[x] = 0;
1276                 }
1277                 for( x = 0; x < 4; x++ )
1278                 {
1279                     for( y = 0; y < 4; y++ )
1280                     {
1281                         dct4x4[i][x][y] = 0;
1282                     }
1283                 }
1284             }
1285         }
1286
1287         /* output samples to fdec */
1288         h->dctf.idct2x2dc( dct2x2, dct2x2 );
1289         dequant_2x2_dc( dct2x2, i_qscale );  /* XXX not inversed */
1290
1291         /* calculate dct coeffs */
1292         for( i = 0; i < 4; i++ )
1293         {
1294             dequant_4x4( dct4x4[i], i_qscale );
1295
1296             /* copy dc coeff */
1297             dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
1298
1299             h->dctf.idct4x4( chroma[i], dct4x4[i] );
1300         }
1301         h->pixf.add8x8( p_dst, i_dst, chroma );
1302     }
1303 }
1304
1305 static int x264_mb_pred_mode4x4_fix( int i_mode )
1306 {
1307     if( i_mode == I_PRED_4x4_DC_LEFT || i_mode == I_PRED_4x4_DC_TOP || i_mode == I_PRED_4x4_DC_128 )
1308     {
1309         return I_PRED_4x4_DC;
1310     }
1311     return i_mode;
1312 }
1313 static int x264_mb_pred_mode16x16_fix( int i_mode )
1314 {
1315     if( i_mode == I_PRED_16x16_DC_LEFT || i_mode == I_PRED_16x16_DC_TOP || i_mode == I_PRED_16x16_DC_128 )
1316     {
1317         return I_PRED_16x16_DC;
1318     }
1319     return i_mode;
1320 }
1321 static int x264_mb_pred_mode8x8_fix( int i_mode )
1322 {
1323     if( i_mode == I_PRED_CHROMA_DC_LEFT || i_mode == I_PRED_CHROMA_DC_TOP || i_mode == I_PRED_CHROMA_DC_128 )
1324     {
1325         return I_PRED_CHROMA_DC;
1326     }
1327     return i_mode;
1328 }
1329
1330 typedef struct
1331 {
1332     /* conduct the analysis using this lamda and QP */
1333     int i_lambda;
1334     int i_qp;
1335
1336     /* Edge histogramme (only luma) */
1337     int i_edge_4x4[4][4][9];    /* mode 2 isn't calculated (DC) */
1338     int i_edge_16x16[4];        /* mode 2 isn't calculated (DC) */
1339
1340     /* I: Intra part */
1341     /* Luma part 16x16 and 4x4 modes stats */
1342     int i_sad_i16x16;
1343     int i_predict16x16;
1344
1345     int i_sad_i4x4;
1346     int i_predict4x4[4][4];
1347
1348     /* Chroma part */
1349     int i_sad_i8x8;
1350     int i_predict8x8;
1351
1352     /* II: Inter part */
1353     int i_sad_p16x16;
1354     int i_ref_p16x16;
1355     int i_mv_p16x16[2];
1356
1357     int i_sad_p16x8;
1358     int i_ref_p16x8;
1359     int i_mv_p16x8[2][2];
1360
1361     int i_sad_p8x16;
1362     int i_ref_p8x16;
1363     int i_mv_p8x16[2][2];
1364
1365     int i_sad_p8x8;
1366     int i_ref_p8x8;
1367     int i_sub_partition_p8x8[4];
1368     int i_mv_p8x8[4][4][2];
1369
1370 } x264_mb_analysis_t;
1371
1372
1373 static const int i_qp0_cost_table[52] =
1374 {
1375    1, 1, 1, 1, 1, 1, 1, 1,
1376    1, 1, 1, 1,
1377    1, 1, 1, 1, 2, 2, 2, 2,
1378    3, 3, 3, 4, 4, 4, 5, 6,
1379    6, 7, 8, 9,10,11,13,14,
1380   16,18,20,23,25,29,32,36,
1381   40,45,51,57,64,72,81,91
1382 };
1383
1384
1385 static void x264_macroblock_analyse_edge( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1386 {
1387     uint8_t *p_img = mb->context->p_img[0];;
1388     int      i_img = mb->context->i_img[0];
1389
1390     int dx, dy;
1391     int x,  y;
1392     int i;
1393
1394 #define FIX8( f ) ( (int)((f) * 256))
1395     /* init stats (16x16) */
1396     for( i = 0; i < 4; i++ )
1397     {
1398         res->i_edge_16x16[i] = 0;
1399     }
1400
1401     for( y = 0; y < 4; y++ )
1402     {
1403         for( x = 0; x < 4; x++ )
1404         {
1405             /* init stats (4x4) */
1406             for( i = 0; i < 9; i++ )
1407             {
1408                 res->i_edge_4x4[y][x][i] = 0;
1409             }
1410
1411             /* FIXME real interval 0-4 except for border mb */
1412             for( dy = (y==0 ? 1:0); dy < (y==3?3:4); dy++ )
1413             {
1414                 for( dx = (x==0?1:0); dx < (x==3?3:4); dx++ )
1415                 {
1416                     uint8_t *pix = &p_img[(y*4+dy)*i_img+(x+dx)];
1417                     int dgx, dgy;
1418                     int Ryx;
1419                     int Ag;
1420                     int Dg;
1421
1422
1423                     dgx = (pix[-1*i_img-1]+2*pix[-1*i_img+0]+pix[-1*i_img+1]) -
1424                           (pix[ 1*i_img-1]+2*pix[ 1*i_img+0]+pix[ 1*i_img+1]);
1425
1426
1427                     dgy = (pix[-1*i_img+1]+2*pix[ 0*i_img+1]+pix[ 1*i_img+1]) -
1428                           (pix[-1*i_img-1]+2*pix[ 0*i_img-1]+pix[ 1*i_img-1]);
1429
1430                     /* XXX angle to test/verify */
1431                     Ag = abs( dgx ) + abs( dgy );
1432
1433                     if( dgx == 0 )
1434                     {
1435                         Ryx = (4*256)<<8;
1436                     }
1437                     else
1438                     {
1439                         Ryx = ( dgy << 8 )/ dgx;
1440                     }
1441
1442                     if( abs(Ryx) >= FIX8(5.027339) )
1443                     {
1444                         Dg = I_PRED_4x4_V;
1445                     }
1446                     else if( abs(Ryx) <= FIX8(0.198912) )
1447                     {
1448                         Dg = I_PRED_4x4_H;
1449                     }
1450                     else if( Ryx > FIX8(0.198912) && Ryx <= FIX8(0.668179) )
1451                     {
1452                         Dg = I_PRED_4x4_HD;
1453                     }
1454                     else if( Ryx > FIX8(0.668179) && Ryx <= FIX8(1.496606) )
1455                     {
1456                         Dg = I_PRED_4x4_DDR;
1457                     }
1458                     else if( Ryx > FIX8(1.496606) && Ryx <= FIX8(5.027339) )
1459                     {
1460                         Dg = I_PRED_4x4_VR;
1461                     }
1462                     else if( Ryx > FIX8(-5.027339) && Ryx <= FIX8(-1.496606) )
1463                     {
1464                         Dg = I_PRED_4x4_VL;
1465                     }
1466                     else if( Ryx > FIX8(-1.496606) && Ryx <= FIX8(-0.668179) )
1467                     {
1468                         Dg = I_PRED_4x4_DDL;
1469                     }
1470                     else if( Ryx > FIX8(-0.668179) && Ryx <= FIX8(-0.198912) )
1471                     {
1472                         Dg = I_PRED_4x4_HU;
1473                     }
1474                     else
1475                     {
1476                         /* Should never occur */
1477                         fprintf( stderr, "mmh bad edge dectection function\n" );
1478                         Dg = I_PRED_4x4_DC;
1479                     }
1480                     res->i_edge_4x4[y][x][Dg] += Ag;
1481
1482                     if( abs(Ryx) > FIX8(2.414214) )
1483                     {
1484                         Dg = I_PRED_16x16_V;
1485                     }
1486                     else if( abs(Ryx) < FIX8(0.414214) )
1487                     {
1488                         Dg = I_PRED_16x16_H;
1489                     }
1490                     else
1491                     {
1492                         Dg = I_PRED_16x16_P;
1493                     }
1494                     res->i_edge_16x16[Dg] += Ag;
1495                 }
1496             }
1497         }
1498     }
1499 #undef FIX8
1500 }
1501
1502 static void x264_macroblock_analyse_i16x16( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1503 {
1504     uint8_t *p_dst = mb->context->p_fdec[0];
1505     uint8_t *p_src = mb->context->p_img[0];
1506     int      i_dst = mb->context->i_fdec[0];
1507     int      i_src = mb->context->i_img[0];
1508
1509     int i;
1510     int i_max;
1511     int predict_mode[4];
1512
1513     res->i_sad_i16x16 = -1;
1514
1515     /* 16x16 prediction selection */
1516     predict_16x16_mode_available( mb, predict_mode, &i_max );
1517     for( i = 0; i < i_max; i++ )
1518     {
1519         int i_sad;
1520         int i_mode;
1521
1522         i_mode = predict_mode[i];
1523
1524         /* we do the prediction */
1525         h->predict_16x16[i_mode]( p_dst, i_dst );
1526
1527         /* we calculate the diff and get the square sum of the diff */
1528         i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_dst, p_src, i_src ) +
1529                 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix(i_mode) );
1530         /* if i_score is lower it is better */
1531         if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
1532         {
1533             res->i_predict16x16 = i_mode;
1534             res->i_sad_i16x16     = i_sad;
1535         }
1536     }
1537 }
1538
1539 static void x264_macroblock_analyse_i4x4( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1540 {
1541     int i, idx;
1542
1543     int i_max;
1544     int predict_mode[9];
1545
1546     uint8_t *p_dst = mb->context->p_fdec[0];
1547     uint8_t *p_src = mb->context->p_img[0];
1548     int      i_dst = mb->context->i_fdec[0];
1549     int      i_src = mb->context->i_img[0];
1550
1551     res->i_sad_i4x4 = 0;
1552
1553     /* 4x4 prediction selection */
1554     for( idx = 0; idx < 16; idx++ )
1555     {
1556         uint8_t *p_src_by;
1557         uint8_t *p_dst_by;
1558         int     i_best;
1559         int x, y;
1560         int i_pred_mode;
1561         int i_th;
1562
1563         i_pred_mode= predict_pred_intra4x4_mode( h, mb, idx );
1564         x = block_idx_x[idx];
1565         y = block_idx_y[idx];
1566
1567         i_th = res->i_edge_4x4[y][x][0];
1568         if( i_th < res->i_edge_4x4[y][x][1] ) i_th = res->i_edge_4x4[y][x][1];
1569         if( i_th < res->i_edge_4x4[y][x][3] ) i_th = res->i_edge_4x4[y][x][3];
1570         if( i_th < res->i_edge_4x4[y][x][4] ) i_th = res->i_edge_4x4[y][x][4];
1571         if( i_th < res->i_edge_4x4[y][x][5] ) i_th = res->i_edge_4x4[y][x][5];
1572         if( i_th < res->i_edge_4x4[y][x][6] ) i_th = res->i_edge_4x4[y][x][6];
1573         if( i_th < res->i_edge_4x4[y][x][7] ) i_th = res->i_edge_4x4[y][x][7];
1574         if( i_th < res->i_edge_4x4[y][x][8] ) i_th = res->i_edge_4x4[y][x][8];
1575         i_th /= 2;
1576
1577         res->i_edge_4x4[y][x][2] = i_th;
1578
1579         p_src_by = p_src + 4 * x + 4 * y * i_src;
1580         p_dst_by = p_dst + 4 * x + 4 * y * i_dst;
1581
1582         i_best = -1;
1583         predict_4x4_mode_available( mb, idx, predict_mode, &i_max );
1584         for( i = 0; i < i_max; i++ )
1585         {
1586             int i_sad;
1587             int i_mode;
1588             int i_fmode;
1589
1590             i_mode = predict_mode[i];
1591             i_fmode = x264_mb_pred_mode4x4_fix( i_mode );
1592
1593             if( res->i_edge_4x4[y][x][i_fmode] < i_th )
1594             {
1595                 continue;
1596             }
1597
1598             /* we do the prediction */
1599             h->predict_4x4[i_mode]( p_dst_by, i_dst );
1600
1601             /* we calculate diff and get the square sum of the diff */
1602             i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_dst, p_src_by, i_src );
1603
1604             i_sad += res->i_lambda * (i_pred_mode == i_fmode ? 1 : 4);
1605
1606             /* if i_score is lower it is better */
1607             if( i_best == -1 || i_best > i_sad )
1608             {
1609                 res->i_predict4x4[x][y] = i_mode;
1610                 i_best = i_sad;
1611             }
1612         }
1613         res->i_sad_i4x4 += i_best;
1614
1615         /* we need to encode this mb now (for next ones) */
1616         mb->block[idx].i_intra4x4_pred_mode = res->i_predict4x4[x][y];
1617         h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_dst );
1618         x264_mb_encode_4x4( h, mb, idx, res->i_qp );
1619     }
1620     res->i_sad_i4x4 += res->i_lambda * 24;    /* from JVT (SATD0) */
1621 }
1622
1623 static void x264_macroblock_analyse_intra_chroma( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1624 {
1625     int i;
1626
1627     int i_max;
1628     int predict_mode[9];
1629
1630     uint8_t *p_dstc[2], *p_srcc[2];
1631     int      i_dstc[2], i_srcc[2];
1632
1633     /* 8x8 prediction selection for chroma */
1634     p_dstc[0] = mb->context->p_fdec[1]; i_dstc[0] = mb->context->i_fdec[1];
1635     p_dstc[1] = mb->context->p_fdec[2]; i_dstc[1] = mb->context->i_fdec[2];
1636     p_srcc[0] = mb->context->p_img[1];  i_srcc[0] = mb->context->i_img[1];
1637     p_srcc[1] = mb->context->p_img[2];  i_srcc[1] = mb->context->i_img[2];
1638
1639     predict_8x8_mode_available( mb, predict_mode, &i_max );
1640     res->i_sad_i8x8 = -1;
1641     for( i = 0; i < i_max; i++ )
1642     {
1643         int i_sad;
1644         int i_mode;
1645
1646         i_mode = predict_mode[i];
1647
1648         /* we do the prediction */
1649         h->predict_8x8[i_mode]( p_dstc[0], i_dstc[0] );
1650         h->predict_8x8[i_mode]( p_dstc[1], i_dstc[1] );
1651
1652         /* we calculate the cost */
1653         i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_dstc[0], p_srcc[0], i_srcc[0] ) +
1654                 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_dstc[1], p_srcc[1], i_srcc[1] ) +
1655                 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix(i_mode) );
1656
1657         /* if i_score is lower it is better */
1658         if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
1659         {
1660             res->i_predict8x8 = i_mode;
1661             res->i_sad_i8x8     = i_sad;
1662         }
1663     }
1664 }
1665
1666 static void x264_macroblock_analyse_inter_p8x8( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1667 {
1668     x264_mb_context_t *ctx = mb->context;
1669     int i_ref = res->i_ref_p16x16;
1670
1671     uint8_t *p_fref = ctx->p_fref0[i_ref][0];
1672     int      i_fref = ctx->i_fref0[i_ref][0];
1673     uint8_t *p_img  = ctx->p_img[0];
1674     int      i_img  = ctx->i_img[0];
1675
1676     int i;
1677
1678     res->i_ref_p8x8 = i_ref;
1679     res->i_sad_p8x8 = 0;
1680     mb->i_partition = D_8x8;
1681
1682     for( i = 0; i < 4; i++ )
1683     {
1684         static const int test8x8_mode[4] = { D_L0_8x8, D_L0_8x4, D_L0_4x8, D_L0_4x4 };
1685         static const int test8x8_pix[4]  = { PIXEL_8x8, PIXEL_8x4, PIXEL_4x8, PIXEL_4x4 };
1686         static const int test8x8_pos_x[4][4] = { { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 0, 0 }, { 0, 4, 0, 4 } };
1687         static const int test8x8_pos_y[4][4] = { { 0, 0, 0, 0 }, { 0, 4, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 4, 4 } };
1688         int i_test;
1689         int mvp[4][2];
1690         int mv[4][2];
1691
1692         int x, y;
1693         int i_sub;
1694         int i_b_satd;
1695
1696         y = 8 * (i / 2);
1697         x = 8 * (i % 2);
1698         i_b_satd = -1;
1699
1700         i_test = 0;
1701         /* FIXME as it's tooooooo slow test only 8x8 */
1702         //for( i_test = 0; i_test < 4; i_test++ )
1703         {
1704             int i_satd;
1705
1706             i_satd = 0;
1707
1708             mb->i_sub_partition[i] = test8x8_mode[i_test];
1709
1710             for( i_sub = 0; i_sub < mb_sub_partition_count( test8x8_mode[i_test] ); i_sub++ )
1711             {
1712                 x264_macroblock_predict_mv( mb, 0, i, i_sub, &mvp[i_sub][0], &mvp[i_sub][1] );
1713                 mv[i_sub][0] = mvp[i_sub][0];
1714                 mv[i_sub][1] = mvp[i_sub][1];
1715
1716                 i_satd += x264_me_p_umhexagons( h,
1717                                                 &p_fref[(y+test8x8_pos_y[i_test][i_sub])*i_fref +x+test8x8_pos_x[i_test][i_sub]], i_fref,
1718                                                 &p_img[(y+test8x8_pos_y[i_test][i_sub])*i_img +x+test8x8_pos_x[i_test][i_sub]], i_img,
1719                                                 test8x8_pix[i_test],
1720                                                 res->i_lambda,
1721                                                 &mv[i_sub][0], &mv[i_sub][1] );
1722                 i_satd += res->i_lambda * ( bs_size_se( mv[i_sub][0] - mvp[i_sub][0] ) +
1723                                             bs_size_se( mv[i_sub][1] - mvp[i_sub][1] ) );
1724             }
1725
1726             switch( test8x8_mode[i_test] )
1727             {
1728                 case D_L0_8x8:
1729                     i_satd += res->i_lambda * bs_size_ue( 0 );
1730                     break;
1731                 case D_L0_8x4:
1732                     i_satd += res->i_lambda * bs_size_ue( 1 );
1733                     break;
1734                 case D_L0_4x8:
1735                     i_satd += res->i_lambda * bs_size_ue( 2 );
1736                     break;
1737                 case D_L0_4x4:
1738                     i_satd += res->i_lambda * bs_size_ue( 3 );
1739                     break;
1740                 default:
1741                     fprintf( stderr, "internal error (invalid sub type)\n" );
1742                     break;
1743             }
1744
1745             if( i_b_satd == -1 || i_b_satd > i_satd )
1746             {
1747                 i_b_satd = i_satd;
1748                 res->i_sub_partition_p8x8[i] = test8x8_mode[i_test];;
1749                 for( i_sub = 0; i_sub < mb_sub_partition_count( test8x8_mode[i_test] ); i_sub++ )
1750                 {
1751                     res->i_mv_p8x8[i][i_sub][0] = mv[i_sub][0];
1752                     res->i_mv_p8x8[i][i_sub][1] = mv[i_sub][1];
1753                 }
1754             }
1755         }
1756
1757         res->i_sad_p8x8 += i_b_satd;
1758         /* needed for the next block */
1759         mb->i_sub_partition[i] = res->i_sub_partition_p8x8[i];
1760         for( i_sub = 0; i_sub < mb_sub_partition_count( res->i_sub_partition_p8x8[i] ); i_sub++ )
1761         {
1762             x264_macroblock_partition_set( mb, 0, i, i_sub,
1763                                            res->i_ref_p8x8,
1764                                            res->i_mv_p8x8[i][i_sub][0],
1765                                            res->i_mv_p8x8[i][i_sub][1] );
1766         }
1767     }
1768
1769     res->i_sad_p8x8 += 4*res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1770 }
1771
1772 static void x264_macroblock_analyse_inter( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1773 {
1774     x264_mb_context_t *ctx = mb->context;
1775
1776     int i_ref;
1777
1778     /* int res */
1779     res->i_sad_p16x16 = -1;
1780     res->i_sad_p16x8  = -1;
1781     res->i_sad_p8x16  = -1;
1782     res->i_sad_p8x8   = -1;
1783
1784     /* 16x16 Search on all ref frame */
1785     mb->i_type = P_L0;  /* beurk fix that */
1786     mb->i_partition = D_16x16;
1787     for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
1788     {
1789         int i_sad;
1790         int mvxp, mvyp;
1791         int mvx, mvy;
1792
1793         /* Get the predicted MV */
1794         x264_macroblock_partition_set( mb, 0, 0, 0, i_ref, 0, 0 );
1795         x264_macroblock_predict_mv( mb, 0, 0, 0, &mvxp, &mvyp );
1796
1797         mvx = mvxp; mvy = mvyp;
1798         i_sad = x264_me_p_umhexagons( h, ctx->p_fref0[i_ref][0], ctx->i_fref0[i_ref][0],
1799                                          ctx->p_img[0],         ctx->i_img[0],
1800                                          PIXEL_16x16, res->i_lambda, &mvx, &mvy );
1801         if( mvx == mvxp && mvy == mvyp )
1802         {
1803             i_sad -= 16 * res->i_lambda;
1804         }
1805         else
1806         {
1807             i_sad += res->i_lambda * (bs_size_se(mvx - mvxp) + bs_size_se(mvy - mvyp));
1808         }
1809         i_sad += res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1810
1811         if( res->i_sad_p16x16 == -1 || i_sad < res->i_sad_p16x16 )
1812         {
1813             res->i_sad_p16x16   = i_sad;
1814             res->i_ref_p16x16   = i_ref;
1815             res->i_mv_p16x16[0] = mvx;
1816             res->i_mv_p16x16[1] = mvy;
1817         }
1818     }
1819
1820     /* Now do the rafinement (using the ref found in 16x16 mode) */
1821     i_ref = res->i_ref_p16x16;
1822     x264_macroblock_partition_set( mb, 0, 0, 0, i_ref, 0, 0 );
1823
1824     /* try 16x8 */
1825     /* XXX we test i_predict16x16 to try shape with the same direction than edge
1826      * We should do a better algo of course (the one with edge dectection to be used
1827      * for intra mode too)
1828      * */
1829
1830     if( res->i_predict16x16 != I_PRED_16x16_V )
1831     {
1832         int mvp[2][2];
1833
1834         mb->i_partition = D_16x8;
1835
1836         res->i_ref_p16x8   = i_ref;
1837         x264_macroblock_predict_mv( mb, 0, 0, 0, &mvp[0][0], &mvp[0][1] );
1838         x264_macroblock_predict_mv( mb, 0, 1, 0, &mvp[1][0], &mvp[1][1] );
1839
1840         res->i_mv_p16x8[0][0] = mvp[0][0]; res->i_mv_p16x8[0][1] = mvp[0][1];
1841         res->i_mv_p16x8[1][0] = mvp[1][0]; res->i_mv_p16x8[1][1] = mvp[1][1];
1842
1843         res->i_sad_p16x8 = x264_me_p_umhexagons( h,
1844                                                  ctx->p_fref0[i_ref][0], ctx->i_fref0[i_ref][0],
1845                                                  ctx->p_img[0],          ctx->i_img[0],
1846                                                  PIXEL_16x8,
1847                                                  res->i_lambda,
1848                                                  &res->i_mv_p16x8[0][0], &res->i_mv_p16x8[0][1] ) +
1849                            x264_me_p_umhexagons( h,
1850                                                  &ctx->p_fref0[i_ref][0][8*ctx->i_fref0[i_ref][0]], ctx->i_fref0[i_ref][0],
1851                                                  &ctx->p_img[0][8*ctx->i_img[0]],                   ctx->i_img[0],
1852                                                  PIXEL_16x8,
1853                                                  res->i_lambda,
1854                                                  &res->i_mv_p16x8[1][0], &res->i_mv_p16x8[1][1] );
1855
1856         res->i_sad_p16x8 += res->i_lambda * ( bs_size_se(res->i_mv_p16x8[0][0] - mvp[0][0] ) +
1857                                               bs_size_se(res->i_mv_p16x8[0][1] - mvp[0][1] ) +
1858                                               bs_size_se(res->i_mv_p16x8[1][0] - mvp[1][0] ) +
1859                                               bs_size_se(res->i_mv_p16x8[1][1] - mvp[1][1] ) );
1860
1861         res->i_sad_p16x8 += 2*res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1862     }
1863
1864     /* try 8x16 */
1865     if( res->i_predict16x16 != I_PRED_16x16_H )
1866     {
1867         int mvp[2][2];
1868
1869         mb->i_partition = D_8x16;
1870
1871         res->i_ref_p8x16   = i_ref;
1872         x264_macroblock_predict_mv( mb, 0, 0, 0, &mvp[0][0], &mvp[0][1] );
1873         x264_macroblock_predict_mv( mb, 0, 1, 0, &mvp[1][0], &mvp[1][1] );
1874
1875         res->i_mv_p8x16[0][0] = mvp[0][0]; res->i_mv_p8x16[0][1] = mvp[0][1];
1876         res->i_mv_p8x16[1][0] = mvp[1][0]; res->i_mv_p8x16[1][1] = mvp[1][1];
1877
1878         res->i_sad_p8x16 = x264_me_p_umhexagons( h,
1879                                                  ctx->p_fref0[i_ref][0], ctx->i_fref0[i_ref][0],
1880                                                  ctx->p_img[0],          ctx->i_img[0],
1881                                                  PIXEL_8x16,
1882                                                  res->i_lambda,
1883                                                  &res->i_mv_p8x16[0][0], &res->i_mv_p8x16[0][1] ) +
1884                            x264_me_p_umhexagons( h,
1885                                                  &ctx->p_fref0[i_ref][0][8], ctx->i_fref0[i_ref][0],
1886                                                  &ctx->p_img[0][8],          ctx->i_img[0],
1887                                                  PIXEL_8x16,
1888                                                  res->i_lambda,
1889                                                  &res->i_mv_p8x16[1][0], &res->i_mv_p8x16[1][1] );
1890
1891         res->i_sad_p8x16 += res->i_lambda * ( bs_size_se(res->i_mv_p8x16[0][0] - mvp[0][0] ) +
1892                                                 bs_size_se(res->i_mv_p8x16[0][1] - mvp[0][1] ) +
1893                                                 bs_size_se(res->i_mv_p8x16[1][0] - mvp[1][0] ) +
1894                                                 bs_size_se(res->i_mv_p8x16[1][1] - mvp[1][1] ) );
1895         res->i_sad_p8x16 += 2*res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1896     }
1897
1898     if( 1 )
1899     {
1900     //    x264_macroblock_analyse_inter_p8x8( h,mb, res );
1901     }
1902 }
1903
1904 /*****************************************************************************
1905  * x264_macroblock_analyse:
1906  *****************************************************************************/
1907 void x264_macroblock_analyse( x264_t *h, x264_macroblock_t *mb, int i_slice_type )
1908 {
1909     x264_mb_analysis_t analysis;
1910     int i;
1911
1912     /* qp TODO */
1913     mb->i_qp_delta = 0;
1914
1915     /* init analysis */
1916     analysis.i_qp = x264_clip3( h->pps.i_pic_init_qp + h->sh.i_qp_delta + mb->i_qp_delta, 0, 51 );
1917     analysis.i_lambda = i_qp0_cost_table[analysis.i_qp];
1918
1919     x264_macroblock_analyse_edge( h, mb, &analysis );
1920
1921     /*--------------------------- Do the analysis ---------------------------*/
1922     x264_macroblock_analyse_i16x16( h, mb, &analysis );
1923     x264_macroblock_analyse_i4x4  ( h, mb, &analysis );
1924     if( i_slice_type == SLICE_TYPE_P )
1925     {
1926         x264_macroblock_analyse_inter( h, mb, &analysis );
1927     }
1928
1929     /*-------------------- Chose the macroblock mode ------------------------*/
1930     /* Do the MB decision */
1931     if( i_slice_type == SLICE_TYPE_I )
1932     {
1933         mb->i_type = analysis.i_sad_i4x4 < analysis.i_sad_i16x16 ? I_4x4 : I_16x16;
1934     }
1935     else
1936     {
1937         int i_satd;
1938 #define BEST_TYPE( type, partition, satd ) \
1939         if( satd != -1 && satd < i_satd ) \
1940         {   \
1941             i_satd = satd;  \
1942             mb->i_type = type; \
1943             mb->i_partition = partition; \
1944         }
1945
1946         i_satd = analysis.i_sad_i4x4;
1947         mb->i_type = I_4x4;
1948
1949         BEST_TYPE( I_16x16, -1,    analysis.i_sad_i16x16 );
1950         BEST_TYPE( P_L0,  D_16x16, analysis.i_sad_p16x16 );
1951         BEST_TYPE( P_L0,  D_16x8 , analysis.i_sad_p16x8  );
1952         BEST_TYPE( P_L0,  D_8x16 , analysis.i_sad_p8x16  );
1953         BEST_TYPE( P_8x8, D_8x8  , analysis.i_sad_p8x8   );
1954
1955 #undef BEST_TYPE
1956     }
1957
1958     if( IS_INTRA( mb->i_type ) )
1959     {
1960         x264_macroblock_analyse_intra_chroma( h, mb, &analysis );
1961     }
1962
1963     /*-------------------- Update MB from the analysis ----------------------*/
1964     switch( mb->i_type )
1965     {
1966         case I_4x4:
1967             for( i = 0; i < 16; i++ )
1968             {
1969                 mb->block[i].i_intra4x4_pred_mode = analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1970             }
1971             mb->i_chroma_pred_mode = analysis.i_predict8x8;
1972             break;
1973         case I_16x16:
1974             mb->i_intra16x16_pred_mode = analysis.i_predict16x16;
1975             mb->i_chroma_pred_mode = analysis.i_predict8x8;
1976             break;
1977         case P_L0:
1978             switch( mb->i_partition )
1979             {
1980                 case D_16x16:
1981                     x264_macroblock_partition_set( mb, 0, 0, 0,
1982                                                    analysis.i_ref_p16x16, analysis.i_mv_p16x16[0], analysis.i_mv_p16x16[1] );
1983                     break;
1984                 case D_16x8:
1985                     x264_macroblock_partition_set( mb, 0, 0, 0,
1986                                                    analysis.i_ref_p16x8, analysis.i_mv_p16x8[0][0], analysis.i_mv_p16x8[0][1] );
1987                     x264_macroblock_partition_set( mb, 0, 1, 0,
1988                                                    analysis.i_ref_p16x8, analysis.i_mv_p16x8[1][0], analysis.i_mv_p16x8[1][1] );
1989                     break;
1990                 case D_8x16:
1991                     x264_macroblock_partition_set( mb, 0, 0, 0,
1992                                                    analysis.i_ref_p8x16, analysis.i_mv_p8x16[0][0], analysis.i_mv_p8x16[0][1] );
1993                     x264_macroblock_partition_set( mb, 0, 1, 0,
1994                                                    analysis.i_ref_p8x16, analysis.i_mv_p8x16[1][0], analysis.i_mv_p8x16[1][1] );
1995                     break;
1996                 default:
1997                     fprintf( stderr, "internal error\n" );
1998                     break;
1999             }
2000             break;
2001
2002         case P_8x8:
2003             for( i = 0; i < 4; i++ )
2004             {
2005                 int i_sub;
2006
2007                 mb->i_sub_partition[i] = analysis.i_sub_partition_p8x8[i];
2008                 for( i_sub = 0; i_sub < mb_sub_partition_count( mb->i_sub_partition[i] ); i_sub++ )
2009                 {
2010                     x264_macroblock_partition_set( mb, 0, i, i_sub,
2011                                                    analysis.i_ref_p8x8,
2012                                                    analysis.i_mv_p8x8[i][i_sub][0],
2013                                                    analysis.i_mv_p8x8[i][i_sub][1] );
2014                 }
2015             }
2016             break;
2017
2018         default:
2019             fprintf( stderr, "internal error\n" );
2020             break;
2021     }
2022 }
2023
2024
2025
2026 /*****************************************************************************
2027  * x264_macroblock_encode:
2028  *****************************************************************************/
2029 void x264_macroblock_encode( x264_t *h, x264_macroblock_t *mb )
2030 {
2031     int i;
2032
2033     int     i_qscale;
2034
2035     /* quantification scale */
2036     i_qscale = x264_clip3( h->pps.i_pic_init_qp + h->sh.i_qp_delta + mb->i_qp_delta, 0, 51 );
2037
2038     if( mb->i_type == I_16x16 )
2039     {
2040         /* do the right prediction */
2041         h->predict_16x16[mb->i_intra16x16_pred_mode]( mb->context->p_fdec[0], mb->context->i_fdec[0] );
2042
2043         /* encode the 16x16 macroblock */
2044         x264_mb_encode_i16x16( h, mb, i_qscale );
2045
2046         /* fix the pred mode value */
2047         mb->i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix( mb->i_intra16x16_pred_mode );
2048     }
2049     else if( mb->i_type == I_4x4 )
2050     {
2051         for( i = 0; i < 16; i++ )
2052         {
2053             uint8_t *p_dst_by;
2054
2055             /* Do the right prediction */
2056             p_dst_by = mb->context->p_fdec[0] + 4 * block_idx_x[i] + 4 * block_idx_y[i] * mb->context->i_fdec[0];
2057             h->predict_4x4[mb->block[i].i_intra4x4_pred_mode]( p_dst_by, mb->context->i_fdec[0] );
2058
2059             /* encode one 4x4 block */
2060             x264_mb_encode_4x4( h, mb, i, i_qscale );
2061
2062             /* fix the pred mode value */
2063             mb->block[i].i_intra4x4_pred_mode = x264_mb_pred_mode4x4_fix( mb->block[i].i_intra4x4_pred_mode );
2064         }
2065     }
2066     else    /* Inter MB */
2067     {
2068         x264_mb_context_t *ctx = mb->context;
2069         int16_t dct4x4[16][4][4];
2070
2071         int i8x8, i4x4, idx;
2072         int i_decimate_mb = 0;
2073
2074         /* Motion compensation */
2075         x264_macroblock_mc( h, mb, 1 );
2076
2077         for( i8x8 = 0; i8x8 < 4; i8x8++ )
2078         {
2079             int16_t luma[4][4];
2080             int i_decimate_8x8;
2081
2082             /* encode one 4x4 block */
2083             i_decimate_8x8 = 0;
2084             for( i4x4 = 0; i4x4 < 4; i4x4++ )
2085             {
2086                 uint8_t *p_src, *p_dst;
2087
2088                 idx = i8x8 * 4 + i4x4;
2089
2090                 p_src = ctx->p_img[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_img[0];
2091                 p_dst = ctx->p_fdec[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_fdec[0];
2092
2093                 /* we calculate diff */
2094                 h->pixf.sub4x4( luma, p_src, ctx->i_img[0],p_dst, ctx->i_fdec[0] );
2095
2096                 /* calculate dct coeffs */
2097                 h->dctf.dct4x4( dct4x4[idx], luma );
2098                 quant_4x4( dct4x4[idx], i_qscale, 1 );
2099
2100                 scan_zigzag_4x4full( mb->block[idx].luma4x4, dct4x4[idx] );
2101                 i_decimate_8x8 += x264_mb_decimate_score( mb->block[idx].luma4x4, 16 );
2102             }
2103
2104             /* decimate this 8x8 block */
2105             i_decimate_mb += i_decimate_8x8;
2106             if( i_decimate_8x8 < 4 )
2107             {
2108                 for( i4x4 = 0; i4x4 < 4; i4x4++ )
2109                 {
2110                     int x, y;
2111                     idx = i8x8 * 4 + i4x4;
2112                     for( i = 0; i < 16; i++ )
2113                     {
2114                         mb->block[idx].luma4x4[i] = 0;
2115                     }
2116                     for( x = 0; x < 4; x++ )
2117                     {
2118                         for( y = 0; y < 4; y++ )
2119                         {
2120                             dct4x4[idx][x][y] = 0;
2121                         }
2122                     }
2123                 }
2124             }
2125         }
2126
2127         if( i_decimate_mb < 6 )
2128         {
2129             for( i8x8 = 0; i8x8 < 4; i8x8++ )
2130             {
2131                 for( i4x4 = 0; i4x4 < 4; i4x4++ )
2132                 {
2133                     for( i = 0; i < 16; i++ )
2134                     {
2135                         mb->block[i8x8 * 4 + i4x4].luma4x4[i] = 0;
2136                     }
2137                 }
2138             }
2139         }
2140         else
2141         {
2142             for( i8x8 = 0; i8x8 < 4; i8x8++ )
2143             {
2144                 int16_t luma[4][4];
2145                 /* TODO we could avoid it if we had decimate this 8x8 block */
2146                 /* output samples to fdec */
2147                 for( i4x4 = 0; i4x4 < 4; i4x4++ )
2148                 {
2149                     uint8_t *p_dst;
2150
2151                     idx = i8x8 * 4 + i4x4;
2152
2153                     dequant_4x4( dct4x4[idx], i_qscale );
2154                     h->dctf.idct4x4( luma, dct4x4[idx] );
2155
2156                     /* put pixel to fdec */
2157                     p_dst = ctx->p_fdec[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_fdec[0];
2158                     h->pixf.add4x4( p_dst, ctx->i_fdec[0], luma );
2159                 }
2160             }
2161         }
2162     }
2163
2164     /* encode chroma */
2165     i_qscale = i_chroma_qp_table[x264_clip3( i_qscale + h->pps.i_chroma_qp_index_offset, 0, 51 )];
2166     if( IS_INTRA( mb->i_type ) )
2167     {
2168         /* do the right prediction */
2169         h->predict_8x8[mb->i_chroma_pred_mode]( mb->context->p_fdec[1], mb->context->i_fdec[1] );
2170         h->predict_8x8[mb->i_chroma_pred_mode]( mb->context->p_fdec[2], mb->context->i_fdec[2] );
2171     }
2172     else
2173     {
2174         /* Motion compensation */
2175         x264_macroblock_mc( h, mb, 0 );
2176     }
2177     /* encode the 8x8 blocks */
2178     x264_mb_encode_8x8( h, mb, !IS_INTRA( mb->i_type ), i_qscale );
2179
2180     /* fix the pred mode value */
2181     if( IS_INTRA( mb->i_type ) )
2182     {
2183         mb->i_chroma_pred_mode = x264_mb_pred_mode8x8_fix( mb->i_chroma_pred_mode );
2184     }
2185
2186     /* Calculate the Luma/Chroma patern and non_zero_count */
2187     if( mb->i_type == I_16x16 )
2188     {
2189         mb->i_cbp_luma = 0x00;
2190         for( i = 0; i < 16; i++ )
2191         {
2192             mb->block[i].i_non_zero_count = array_non_zero_count( mb->block[i].residual_ac, 15 );
2193             if( mb->block[i].i_non_zero_count > 0 )
2194             {
2195                 mb->i_cbp_luma = 0x0f;
2196             }
2197         }
2198     }
2199     else
2200     {
2201         mb->i_cbp_luma = 0x00;
2202         for( i = 0; i < 16; i++ )
2203         {
2204             mb->block[i].i_non_zero_count = array_non_zero_count( mb->block[i].luma4x4, 16 );
2205             if( mb->block[i].i_non_zero_count > 0 )
2206             {
2207                 mb->i_cbp_luma |= 1 << (i/4);
2208             }
2209         }
2210     }
2211
2212     /* Calculate the chroma patern */
2213     mb->i_cbp_chroma = 0x00;
2214     for( i = 0; i < 8; i++ )
2215     {
2216         mb->block[16+i].i_non_zero_count = array_non_zero_count( mb->block[16+i].residual_ac, 15 );
2217         if( mb->block[16+i].i_non_zero_count > 0 )
2218         {
2219             mb->i_cbp_chroma = 0x02;    /* dc+ac (we can't do only ac) */
2220         }
2221     }
2222     if( mb->i_cbp_chroma == 0x00 &&
2223         ( array_non_zero_count( mb->chroma_dc[0], 4 ) > 0 || array_non_zero_count( mb->chroma_dc[1], 4 ) ) > 0 )
2224     {
2225         mb->i_cbp_chroma = 0x01;    /* dc only */
2226     }
2227
2228     /* Check for P_SKIP
2229      * XXX: in the me perhaps we should take x264_macroblock_predict_mv_pskip into account
2230      *      (if multiple mv give same result)*/
2231     if( mb->i_type == P_L0 && mb->i_partition == D_16x16 &&
2232         mb->i_cbp_luma == 0x00 && mb->i_cbp_chroma == 0x00 )
2233     {
2234
2235         int i_ref;
2236         int mvx, mvy;
2237         x264_macroblock_partition_get( mb, 0, 0, 0, &i_ref, &mvx, &mvy );
2238
2239         if( i_ref == 0 )
2240         {
2241             int mvxp, mvyp;
2242
2243             x264_macroblock_predict_mv_pskip( mb, &mvxp, &mvyp );
2244             if( mvxp == mvx && mvyp == mvy )
2245             {
2246                 mb->i_type = P_SKIP;
2247             }
2248         }
2249     }
2250 }
2251
2252
2253 #define BLOCK_INDEX_CHROMA_DC   (-1)
2254 #define BLOCK_INDEX_LUMA_DC     (-2)
2255
2256 /****************************************************************************
2257  * block_residual_write:
2258  ****************************************************************************/
2259 static void block_residual_write( x264_t *h, bs_t *s, x264_macroblock_t *mb, int i_idx, int *l, int i_count )
2260 {
2261     int level[16], run[16];
2262     int i_total, i_trailing;
2263     int i_total_zero;
2264     int i_last;
2265     unsigned int i_sign;
2266
2267     int i;
2268     int i_zero_left;
2269     int i_suffix_length;
2270
2271     /* first find i_last */
2272     i_last = i_count - 1;
2273     while( i_last >= 0 && l[i_last] == 0 )
2274     {
2275         i_last--;
2276     }
2277
2278     i_sign = 0;
2279     i_total = 0;
2280     i_trailing = 0;
2281     i_total_zero = 0;
2282
2283     if( i_last >= 0 )
2284     {
2285         int b_trailing = 1;
2286         int idx = 0;
2287
2288         /* level and run and total */
2289         while( i_last >= 0 )
2290         {
2291             level[idx] = l[i_last--];
2292
2293             run[idx] = 0;
2294             while( i_last >= 0 && l[i_last] == 0 )
2295             {
2296                 run[idx]++;
2297                 i_last--;
2298             }
2299
2300             i_total++;
2301             i_total_zero += run[idx];
2302
2303             if( b_trailing && abs( level[idx] ) == 1 && i_trailing < 3 )
2304             {
2305                 i_sign <<= 1;
2306                 if( level[idx] < 0 )
2307                 {
2308                     i_sign |= 0x01;
2309                 }
2310
2311                 i_trailing++;
2312             }
2313             else
2314             {
2315                 b_trailing = 0;
2316             }
2317
2318             idx++;
2319         }
2320     }
2321
2322     /* total/trailing */
2323     if( i_idx == BLOCK_INDEX_CHROMA_DC )
2324     {
2325         bs_write_vlc( s, x264_coeff_token[4][i_total][i_trailing] );
2326     }
2327     else
2328     {
2329         /* predict_non_zero_code return 0 <-> (16+16+1)>>1 = 16 */
2330         static const int ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3 };
2331         int nC;
2332
2333         if( i_idx == BLOCK_INDEX_LUMA_DC )
2334         {
2335             nC = predict_non_zero_code( h, mb, 0 );
2336         }
2337         else
2338         {
2339             nC = predict_non_zero_code( h, mb, i_idx );
2340         }
2341
2342         bs_write_vlc( s, x264_coeff_token[ct_index[nC]][i_total][i_trailing] );
2343     }
2344
2345     if( i_total <= 0 )
2346     {
2347         return;
2348     }
2349
2350     i_suffix_length = i_total > 10 && i_trailing < 3 ? 1 : 0;
2351     if( i_trailing > 0 )
2352     {
2353         bs_write( s, i_trailing, i_sign );
2354     }
2355     for( i = i_trailing; i < i_total; i++ )
2356     {
2357         int i_level_code;
2358
2359         /* calculate level code */
2360         if( level[i] < 0 )
2361         {
2362             i_level_code = -2*level[i] - 1;
2363         }
2364         else /* if( level[i] > 0 ) */
2365         {
2366             i_level_code = 2 * level[i] - 2;
2367         }
2368         if( i == i_trailing && i_trailing < 3 )
2369         {
2370             i_level_code -=2; /* as level[i] can't be 1 for the first one if i_trailing < 3 */
2371         }
2372
2373         if( ( i_level_code >> i_suffix_length ) < 14 )
2374         {
2375             bs_write_vlc( s, x264_level_prefix[i_level_code >> i_suffix_length] );
2376             if( i_suffix_length > 0 )
2377             {
2378                 bs_write( s, i_suffix_length, i_level_code );
2379             }
2380         }
2381         else if( i_suffix_length == 0 && i_level_code < 30 )
2382         {
2383             bs_write_vlc( s, x264_level_prefix[14] );
2384             bs_write( s, 4, i_level_code - 14 );
2385         }
2386         else if( i_suffix_length > 0 && ( i_level_code >> i_suffix_length ) == 14 )
2387         {
2388             bs_write_vlc( s, x264_level_prefix[14] );
2389             bs_write( s, i_suffix_length, i_level_code );
2390         }
2391         else
2392         {
2393             bs_write_vlc( s, x264_level_prefix[15] );
2394             i_level_code -= 15 << i_suffix_length;
2395             if( i_suffix_length == 0 )
2396             {
2397                 i_level_code -= 15;
2398             }
2399
2400             if( i_level_code >= ( 1 << 12 ) || i_level_code < 0 )
2401             {
2402                 fprintf( stderr, "OVERFLOW levelcode=%d\n", i_level_code );
2403             }
2404
2405             bs_write( s, 12, i_level_code );    /* check overflow ?? */
2406         }
2407
2408         if( i_suffix_length == 0 )
2409         {
2410             i_suffix_length++;
2411         }
2412         if( abs( level[i] ) > ( 3 << ( i_suffix_length - 1 ) ) && i_suffix_length < 6 )
2413         {
2414             i_suffix_length++;
2415         }
2416     }
2417
2418     if( i_total < i_count )
2419     {
2420         if( i_idx == BLOCK_INDEX_CHROMA_DC )
2421         {
2422             bs_write_vlc( s, x264_total_zeros_dc[i_total-1][i_total_zero] );
2423         }
2424         else
2425         {
2426             bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] );
2427         }
2428     }
2429
2430     for( i = 0, i_zero_left = i_total_zero; i < i_total - 1; i++ )
2431     {
2432         int i_zl;
2433
2434         if( i_zero_left <= 0 )
2435         {
2436             break;
2437         }
2438
2439         i_zl = X264_MIN( i_zero_left - 1, 6 );
2440
2441         bs_write_vlc( s, x264_run_before[i_zl][run[i]] );
2442
2443         i_zero_left -= run[i];
2444     }
2445 }
2446
2447
2448
2449
2450 /*****************************************************************************
2451  * x264_macroblock_write:
2452  *****************************************************************************/
2453 void x264_macroblock_write( x264_t *h, bs_t *s, int i_slice_type, x264_macroblock_t *mb )
2454 {
2455     int i;
2456     int i_mb_i_offset;
2457     int b_sub_ref0 = 1;
2458     /* int b_sub_ref1 = 1; */
2459
2460     switch( i_slice_type )
2461     {
2462         case SLICE_TYPE_I:
2463             i_mb_i_offset = 0;
2464             break;
2465         case SLICE_TYPE_P:
2466             i_mb_i_offset = 5;
2467             break;
2468         case SLICE_TYPE_B:
2469             i_mb_i_offset = 23 + 5;
2470             break;
2471         default:
2472             fprintf( stderr, "internal error or slice unsupported\n" );
2473             return;
2474     }
2475
2476     /* PCM special block type UNTESTED */
2477     if( mb->i_type == I_PCM )
2478     {
2479         bs_write_ue( s, i_mb_i_offset + 25 );   /* I_PCM */
2480         bs_align_0( s );
2481         /* Luma */
2482         for( i = 0; i < 16*16; i++ )
2483         {
2484             bs_write( s, 8, h->picture->plane[0][mb->i_mb_y * 16 * h->picture->i_stride[0] + mb->i_mb_x * 16+i] );
2485         }
2486         /* Cb */
2487         for( i = 0; i < 8*8; i++ )
2488         {
2489             bs_write( s, 8, h->picture->plane[1][mb->i_mb_y * 8 * h->picture->i_stride[1] + mb->i_mb_x * 8+i] );
2490         }
2491         /* Cr */
2492         for( i = 0; i < 8*8; i++ )
2493         {
2494             bs_write( s, 8, h->picture->plane[2][mb->i_mb_y * 8 * h->picture->i_stride[2] + mb->i_mb_x * 8+i] );
2495         }
2496
2497         for( i = 0; i < 16 + 8; i++ )
2498         {
2499             /* special case */
2500             mb->block[i].i_non_zero_count = 16;
2501         }
2502         return;
2503     }
2504
2505     if( mb->i_type == I_4x4 )
2506     {
2507         bs_write_ue( s, i_mb_i_offset + 0 );    /* I_4x4 */
2508     }
2509     else if( mb->i_type == I_16x16 )
2510     {
2511         int i_type = 1 + mb->i_intra16x16_pred_mode + mb->i_cbp_chroma * 4 + ( mb->i_cbp_luma == 0 ? 0 : 12 );
2512
2513         bs_write_ue( s, i_mb_i_offset + i_type );
2514     }
2515     else if( mb->i_type == P_L0 )
2516     {
2517         if( mb->i_partition == D_16x16 )
2518         {
2519             bs_write_ue( s, 0 );
2520         }
2521         else if( mb->i_partition == D_16x8 )
2522         {
2523             bs_write_ue( s, 1 );
2524         }
2525         else if( mb->i_partition == D_8x16 )
2526         {
2527             bs_write_ue( s, 2 );
2528         }
2529     }
2530     else if( mb->i_type == P_8x8 )
2531     {
2532         if( mb->partition[0][0].i_ref[0] == 0 &&
2533             mb->partition[0][2].i_ref[0] == 0 &&
2534             mb->partition[2][0].i_ref[0] == 0 &&
2535             mb->partition[2][2].i_ref[0] == 0 )
2536         {
2537             b_sub_ref0 = 0;
2538             bs_write_ue( s, 4 );    /* P_8x8ref0 */
2539         }
2540         else
2541         {
2542             b_sub_ref0 = 1;
2543             bs_write_ue( s, 3 );
2544         }
2545     }
2546     else
2547     {
2548         /* TODO B type */
2549     }
2550
2551     if( IS_INTRA( mb->i_type ) )
2552     {
2553         /* Prediction */
2554         if( mb->i_type == I_4x4 )
2555         {
2556             for( i = 0; i < 16; i++ )
2557             {
2558                 int i_predicted_mode = predict_pred_intra4x4_mode( h, mb, i );
2559                 int i_mode = mb->block[i].i_intra4x4_pred_mode;
2560
2561                 if( i_predicted_mode == i_mode)
2562                 {
2563                     bs_write( s, 1, 1 );  /* b_prev_intra4x4_pred_mode */
2564                 }
2565                 else
2566                 {
2567                     bs_write( s, 1, 0 );  /* b_prev_intra4x4_pred_mode */
2568                     if( i_mode < i_predicted_mode )
2569                     {
2570                         bs_write( s, 3, i_mode );
2571                     }
2572                     else
2573                     {
2574                         bs_write( s, 3, i_mode - 1 );
2575                     }
2576                 }
2577             }
2578         }
2579         bs_write_ue( s, mb->i_chroma_pred_mode );
2580     }
2581     else if( mb->i_type == P_8x8 )
2582     {
2583         /* sub mb type */
2584         for( i = 0; i < 4; i++ )
2585         {
2586             switch( mb->i_sub_partition[i] )
2587             {
2588                 case D_L0_8x8:
2589                     bs_write_ue( s, 0 );
2590                     break;
2591                 case D_L0_8x4:
2592                     bs_write_ue( s, 1 );
2593                     break;
2594                 case D_L0_4x8:
2595                     bs_write_ue( s, 2 );
2596                     break;
2597                 case D_L0_4x4:
2598                     bs_write_ue( s, 3 );
2599                     break;
2600             }
2601         }
2602         /* ref0 */
2603         if( h->sh.i_num_ref_idx_l0_active > 1 && b_sub_ref0 )
2604         {
2605             for( i = 0; i < 4; i++ )
2606             {
2607                 int i_ref;
2608                 x264_macroblock_partition_get( mb, 0, i, 0, &i_ref, NULL, NULL );
2609
2610                 bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, i_ref );
2611             }
2612         }
2613         for( i = 0; i < 4; i++ )
2614         {
2615             int i_part;
2616             for( i_part = 0; i_part < mb_sub_partition_count( mb->i_sub_partition[i] ); i_part++ )
2617             {
2618                 int mvx, mvy;
2619                 int mvxp, mvyp;
2620
2621                 x264_macroblock_partition_get( mb, 0, i, i_part, NULL, &mvx, &mvy );
2622                 x264_macroblock_predict_mv( mb, 0, i, i_part, &mvxp, &mvyp );
2623
2624                 bs_write_se( s, mvx - mvxp );
2625                 bs_write_se( s, mvy - mvyp);
2626             }
2627         }
2628     }
2629     else if( mb->i_type == B_8x8 )
2630     {
2631         /* TODO for B-frame (merge it with P_8x8 ?)*/
2632     }
2633     else if( mb->i_type != B_DIRECT )
2634     {
2635         /* FIXME -> invalid for B frame */
2636
2637         /* Motion Vector */
2638         int i_part = 1 + ( mb->i_partition != D_16x16 ? 1 : 0 );
2639
2640         if( h->sh.i_num_ref_idx_l0_active > 1 )
2641         {
2642             for( i = 0; i < i_part; i++ )
2643             {
2644                 if( mb->i_type == P_L0 )    /* fixme B-frame */
2645                 {
2646                     int i_ref;
2647                     x264_macroblock_partition_get( mb, 0, i, 0, &i_ref, NULL, NULL );
2648                     bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, i_ref ); /* -1 is correct ? */
2649                 }
2650             }
2651         }
2652         if( h->sh.i_num_ref_idx_l1_active > 1 )
2653         {
2654             for( i = 0; i < i_part; i++ )
2655             {
2656                 /* ref idx part L1 TODO when needed */
2657             }
2658         }
2659
2660         for( i = 0; i < i_part; i++ )
2661         {
2662             if( mb->i_type == P_L0 )
2663             {
2664                 int mvx, mvy;
2665                 int mvxp, mvyp;
2666
2667                 x264_macroblock_partition_get( mb, 0, i, 0, NULL, &mvx, &mvy );
2668                 x264_macroblock_predict_mv( mb, 0, i, 0, &mvxp, &mvyp );
2669
2670                 bs_write_se( s, mvx - mvxp );
2671                 bs_write_se( s, mvy - mvyp);
2672             }
2673         }
2674         /* Same for L1 for B frame */
2675     }
2676
2677     if( mb->i_type != I_16x16 )
2678     {
2679         if( mb->i_type == I_4x4 )
2680         {
2681             bs_write_ue( s, intra4x4_cbp_to_golomb[( mb->i_cbp_chroma << 4 )|mb->i_cbp_luma] );
2682         }
2683         else
2684         {
2685             bs_write_ue( s, inter_cbp_to_golomb[( mb->i_cbp_chroma << 4 )|mb->i_cbp_luma] );
2686         }
2687     }
2688
2689     if( mb->i_cbp_luma > 0 || mb->i_cbp_chroma > 0 || mb->i_type == I_16x16 )
2690     {
2691         bs_write_se( s, mb->i_qp_delta );
2692
2693         /* write residual */
2694         if( mb->i_type == I_16x16 )
2695         {
2696             /* DC Luma */
2697             block_residual_write( h, s, mb, BLOCK_INDEX_LUMA_DC , mb->luma16x16_dc, 16 );
2698
2699             if( mb->i_cbp_luma != 0 )
2700             {
2701                 /* AC Luma */
2702                 for( i = 0; i < 16; i++ )
2703                 {
2704                     block_residual_write( h, s, mb, i, mb->block[i].residual_ac, 15 );
2705                 }
2706             }
2707         }
2708         else
2709         {
2710             for( i = 0; i < 16; i++ )
2711             {
2712                 if( mb->i_cbp_luma & ( 1 << ( i / 4 ) ) )
2713                 {
2714                     block_residual_write( h, s, mb, i, mb->block[i].luma4x4, 16 );
2715                 }
2716             }
2717         }
2718
2719         if( mb->i_cbp_chroma &0x03 )    /* Chroma DC residual present */
2720         {
2721             block_residual_write( h, s, mb, BLOCK_INDEX_CHROMA_DC, mb->chroma_dc[0], 4 );
2722             block_residual_write( h, s, mb, BLOCK_INDEX_CHROMA_DC, mb->chroma_dc[1], 4 );
2723         }
2724         if( mb->i_cbp_chroma&0x02 ) /* Chroma AC residual present */
2725         {
2726             for( i = 0; i < 8; i++ )
2727             {
2728                 block_residual_write( h, s, mb, 16 + i, mb->block[16+i].residual_ac, 15 );
2729             }
2730         }
2731     }
2732 }
2733