git.sesse.net Git - x264/blob - encoder/cavlc.c

   1 /*****************************************************************************
   2  * cavlc.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003-2008 x264 project
   5  *
   6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   7  *          Loren Merritt <lorenm@u.washington.edu>
   8  *          Fiona Glaser <fiona@x264.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include "common/common.h"
  26 #include "macroblock.h"
  27
  28 #ifndef RDO_SKIP_BS
  29 #define RDO_SKIP_BS 0
  30 #endif
  31
  32 static const uint8_t intra4x4_cbp_to_golomb[48]=
  33 {
  34   3, 29, 30, 17, 31, 18, 37,  8, 32, 38, 19,  9, 20, 10, 11,  2,
  35  16, 33, 34, 21, 35, 22, 39,  4, 36, 40, 23,  5, 24,  6,  7,  1,
  36  41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15,  0
  37 };
  38 static const uint8_t inter_cbp_to_golomb[48]=
  39 {
  40   0,  2,  3,  7,  4,  8, 17, 13,  5, 18,  9, 14, 10, 15, 16, 11,
  41   1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
  42   6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
  43 };
  44 static const uint8_t mb_type_b_to_golomb[3][9]=
  45 {
  46     { 4,  8, 12, 10,  6, 14, 16, 18, 20 }, /* D_16x8 */
  47     { 5,  9, 13, 11,  7, 15, 17, 19, 21 }, /* D_8x16 */
  48     { 1, -1, -1, -1,  2, -1, -1, -1,  3 }  /* D_16x16 */
  49 };
  50 static const uint8_t sub_mb_type_p_to_golomb[4]=
  51 {
  52     3, 1, 2, 0
  53 };
  54 static const uint8_t sub_mb_type_b_to_golomb[13]=
  55 {
  56     10,  4,  5,  1, 11,  6,  7,  2, 12,  8,  9,  3,  0
  57 };
  58
  59 #define bs_write_vlc(s,v) bs_write( s, (v).i_size, (v).i_bits )
  60
  61 /****************************************************************************
  62  * block_residual_write_cavlc:
  63  ****************************************************************************/
  64 static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_suffix_length, int level )
  65 {
  66     static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
  67     int i_level_prefix = 15;
  68     int mask = level >> 15;
  69     int abs_level = (level^mask)-mask;
  70     int i_level_code = abs_level*2-mask-2;
  71     if( ( i_level_code >> i_suffix_length ) < 15 )
  72     {
  73         bs_write( s, (i_level_code >> i_suffix_length) + 1 + i_suffix_length,
  74                  (1<<i_suffix_length) + (i_level_code & ((1<<i_suffix_length)-1)) );
  75     }
  76     else
  77     {
  78         i_level_code -= 15 << i_suffix_length;
  79         if( i_suffix_length == 0 )
  80             i_level_code -= 15;
  81
  82         /* If the prefix size exceeds 15, High Profile is required. */
  83         if( i_level_code >= 1<<12 )
  84         {
  85             if( h->sps->i_profile_idc >= PROFILE_HIGH )
  86             {
  87                 while( i_level_code > 1<<(i_level_prefix-3) )
  88                 {
  89                     i_level_code -= 1<<(i_level_prefix-3);
  90                     i_level_prefix++;
  91                 }
  92             }
  93             else
  94             {
  95 #if RDO_SKIP_BS
  96                 /* Weight highly against overflows. */
  97                 s->i_bits_encoded += 1000000;
  98 #else
  99                 x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d is only allowed in High Profile", i_level_code );
 100                 /* clip level, preserving sign */
 101                 i_level_code = (1<<12) - 2 + (i_level_code & 1);
 102 #endif
 103             }
 104         }
 105         bs_write( s, i_level_prefix + 1, 1 );
 106         bs_write( s, i_level_prefix - 3, i_level_code & ((1<<(i_level_prefix-3))-1) );
 107     }
 108     if( i_suffix_length == 0 )
 109         i_suffix_length++;
 110     if( abs_level > next_suffix[i_suffix_length] )
 111         i_suffix_length++;
 112     return i_suffix_length;
 113 }
 114
 115 static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count )
 116 {
 117     static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
 118     static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
 119     int level[16], run[16];
 120     int i_trailing, i_total_zero, i_last, i_suffix_length, i;
 121     int i_total = 0;
 122     unsigned int i_sign;
 123     /* x264_mb_predict_non_zero_code return 0 <-> (16+16+1)>>1 = 16 */
 124     int nC = i_idx >= 25 ? 4 : ct_index[x264_mb_predict_non_zero_code( h, i_idx == 24 ? 0 : i_idx )];
 125
 126     if( !h->mb.cache.non_zero_count[x264_scan8[i_idx]] )
 127     {
 128         bs_write_vlc( s, x264_coeff_token[nC][0] );
 129         return;
 130     }
 131
 132     i_last = h->quantf.coeff_last[i_ctxBlockCat](l);
 133     i_total_zero = i_last + 1;
 134
 135     /* level and run and total */
 136     /* set these to 2 to allow branchless i_trailing calculation */
 137     level[1] = 2;
 138     level[2] = 2;
 139     do
 140     {
 141         int r = 0;
 142         level[i_total] = l[i_last];
 143         while( --i_last >= 0 && l[i_last] == 0 )
 144             r++;
 145         run[i_total++] = r;
 146     } while( i_last >= 0 );
 147
 148     h->mb.cache.non_zero_count[x264_scan8[i_idx]] = i_total;
 149
 150     i_total_zero -= i_total;
 151     i_trailing = ((((level[0]+1) | (1-level[0])) >> 31) & 1) // abs(level[0])>1
 152                | ((((level[1]+1) | (1-level[1])) >> 31) & 2)
 153                | ((((level[2]+1) | (1-level[2])) >> 31) & 4);
 154     i_trailing = ctz_index[i_trailing];
 155     i_sign = ((level[2] >> 31) & 1)
 156            | ((level[1] >> 31) & 2)
 157            | ((level[0] >> 31) & 4);
 158     i_sign >>= 3-i_trailing;
 159
 160     /* total/trailing */
 161     bs_write_vlc( s, x264_coeff_token[nC][i_total*4+i_trailing] );
 162
 163     i_suffix_length = i_total > 10 && i_trailing < 3;
 164     if( i_trailing > 0 || RDO_SKIP_BS )
 165         bs_write( s, i_trailing, i_sign );
 166
 167     if( i_trailing < i_total )
 168     {
 169         int16_t val = level[i_trailing];
 170         int16_t val_original = level[i_trailing]+LEVEL_TABLE_SIZE/2;
 171         if( i_trailing < 3 )
 172             val -= (val>>15)|1; /* as level[i] can't be 1 for the first one if i_trailing < 3 */
 173         val += LEVEL_TABLE_SIZE/2;
 174
 175         if( (unsigned)val_original < LEVEL_TABLE_SIZE )
 176         {
 177             bs_write_vlc( s, x264_level_token[i_suffix_length][val] );
 178             i_suffix_length = x264_level_token[i_suffix_length][val_original].i_next;
 179         }
 180         else
 181             i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
 182         for( i = i_trailing+1; i < i_total; i++ )
 183         {
 184             val = level[i] + LEVEL_TABLE_SIZE/2;
 185             if( (unsigned)val < LEVEL_TABLE_SIZE )
 186             {
 187                 bs_write_vlc( s, x264_level_token[i_suffix_length][val] );
 188                 i_suffix_length = x264_level_token[i_suffix_length][val].i_next;
 189             }
 190             else
 191                 i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
 192         }
 193     }
 194
 195     if( i_total < i_count )
 196     {
 197         if( i_idx >= 25 )
 198             bs_write_vlc( s, x264_total_zeros_dc[i_total-1][i_total_zero] );
 199         else
 200             bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] );
 201     }
 202
 203     for( i = 0; i < i_total-1 && i_total_zero > 0; i++ )
 204     {
 205         int i_zl = X264_MIN( i_total_zero - 1, 6 );
 206         bs_write_vlc( s, x264_run_before[i_zl][run[i]] );
 207         i_total_zero -= run[i];
 208     }
 209 }
 210
 211 static void cavlc_qp_delta( x264_t *h, bs_t *s )
 212 {
 213     int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
 214
 215     /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
 216     if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
 217         && !h->mb.cache.non_zero_count[x264_scan8[24]] )
 218     {
 219 #if !RDO_SKIP_BS
 220         h->mb.i_qp = h->mb.i_last_qp;
 221 #endif
 222         i_dqp = 0;
 223     }
 224
 225     if( i_dqp )
 226     {
 227         if( i_dqp < -26 )
 228             i_dqp += 52;
 229         else if( i_dqp > 25 )
 230             i_dqp -= 52;
 231     }
 232     bs_write_se( s, i_dqp );
 233 }
 234
 235 static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
 236 {
 237     DECLARE_ALIGNED_4( int16_t mvp[2] );
 238     x264_mb_predict_mv( h, i_list, idx, width, mvp );
 239     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
 240     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
 241 }
 242
 243 static void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i_list, int i )
 244 {
 245     if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
 246         return;
 247
 248     switch( h->mb.i_sub_partition[i] )
 249     {
 250         case D_L0_8x8:
 251         case D_L1_8x8:
 252         case D_BI_8x8:
 253             cavlc_mb_mvd( h, s, i_list, 4*i, 2 );
 254             break;
 255         case D_L0_8x4:
 256         case D_L1_8x4:
 257         case D_BI_8x4:
 258             cavlc_mb_mvd( h, s, i_list, 4*i+0, 2 );
 259             cavlc_mb_mvd( h, s, i_list, 4*i+2, 2 );
 260             break;
 261         case D_L0_4x8:
 262         case D_L1_4x8:
 263         case D_BI_4x8:
 264             cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 );
 265             cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 );
 266             break;
 267         case D_L0_4x4:
 268         case D_L1_4x4:
 269         case D_BI_4x4:
 270             cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 );
 271             cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 );
 272             cavlc_mb_mvd( h, s, i_list, 4*i+2, 1 );
 273             cavlc_mb_mvd( h, s, i_list, 4*i+3, 1 );
 274             break;
 275     }
 276 }
 277
 278 static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end )
 279 {
 280     int i8, i4;
 281     if( h->mb.b_transform_8x8 )
 282     {
 283         /* shuffle 8x8 dct coeffs into 4x4 lists */
 284         for( i8 = i8start; i8 <= i8end; i8++ )
 285             if( h->mb.i_cbp_luma & (1 << i8) )
 286             {
 287                 h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4], h->dct.luma8x8[i8] );
 288                 for( i4 = 0; i4 < 4; i4++ )
 289                     h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = array_non_zero( h->dct.luma4x4[i4+i8*4] );
 290             }
 291     }
 292
 293     for( i8 = i8start; i8 <= i8end; i8++ )
 294         if( h->mb.i_cbp_luma & (1 << i8) )
 295             for( i4 = 0; i4 < 4; i4++ )
 296                 block_residual_write_cavlc( h, s, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
 297 }
 298
 299 /*****************************************************************************
 300  * x264_macroblock_write:
 301  *****************************************************************************/
 302 void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
 303 {
 304     const int i_mb_type = h->mb.i_type;
 305     int i_mb_i_offset;
 306     int i;
 307
 308 #if !RDO_SKIP_BS
 309     const int i_mb_pos_start = bs_pos( s );
 310     int       i_mb_pos_tex;
 311 #endif
 312
 313     switch( h->sh.i_type )
 314     {
 315         case SLICE_TYPE_I:
 316             i_mb_i_offset = 0;
 317             break;
 318         case SLICE_TYPE_P:
 319             i_mb_i_offset = 5;
 320             break;
 321         case SLICE_TYPE_B:
 322             i_mb_i_offset = 23;
 323             break;
 324         default:
 325             x264_log(h, X264_LOG_ERROR, "internal error or slice unsupported\n" );
 326             return;
 327     }
 328
 329     if( h->sh.b_mbaff
 330         && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
 331     {
 332         bs_write1( s, h->mb.b_interlaced );
 333     }
 334
 335 #if !RDO_SKIP_BS
 336     if( i_mb_type == I_PCM)
 337     {
 338         bs_write_ue( s, i_mb_i_offset + 25 );
 339         i_mb_pos_tex = bs_pos( s );
 340         h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
 341
 342         bs_align_0( s );
 343
 344         memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
 345         s->p += 256;
 346         for( i = 0; i < 8; i++ )
 347             memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
 348         s->p += 64;
 349         for( i = 0; i < 8; i++ )
 350             memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
 351         s->p += 64;
 352
 353         /* if PCM is chosen, we need to store reconstructed frame data */
 354         h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
 355         h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
 356         h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
 357
 358         h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
 359         return;
 360     }
 361 #endif
 362
 363     /* Write:
 364       - type
 365       - prediction
 366       - mv */
 367     if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
 368     {
 369         int di = i_mb_type == I_8x8 ? 4 : 1;
 370         bs_write_ue( s, i_mb_i_offset + 0 );
 371         if( h->pps->b_transform_8x8_mode )
 372             bs_write1( s, h->mb.b_transform_8x8 );
 373
 374         /* Prediction: Luma */
 375         for( i = 0; i < 16; i += di )
 376         {
 377             int i_pred = x264_mb_predict_intra4x4_mode( h, i );
 378             int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
 379
 380             if( i_pred == i_mode )
 381                 bs_write1( s, 1 );  /* b_prev_intra4x4_pred_mode */
 382             else
 383                 bs_write( s, 4, i_mode - (i_mode > i_pred) );
 384         }
 385         bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
 386     }
 387     else if( i_mb_type == I_16x16 )
 388     {
 389         bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
 390                         h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
 391         bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
 392     }
 393     else if( i_mb_type == P_L0 )
 394     {
 395         DECLARE_ALIGNED_4( int16_t mvp[2] );
 396
 397         if( h->mb.i_partition == D_16x16 )
 398         {
 399             bs_write_ue( s, 0 );
 400
 401             if( h->mb.pic.i_fref[0] > 1 )
 402                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
 403             x264_mb_predict_mv( h, 0, 0, 4, mvp );
 404             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][0] - mvp[0] );
 405             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][1] - mvp[1] );
 406         }
 407         else if( h->mb.i_partition == D_16x8 )
 408         {
 409             bs_write_ue( s, 1 );
 410             if( h->mb.pic.i_fref[0] > 1 )
 411             {
 412                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
 413                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
 414             }
 415
 416             x264_mb_predict_mv( h, 0, 0, 4, mvp );
 417             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][0] - mvp[0] );
 418             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][1] - mvp[1] );
 419
 420             x264_mb_predict_mv( h, 0, 8, 4, mvp );
 421             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[8]][0] - mvp[0] );
 422             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[8]][1] - mvp[1] );
 423         }
 424         else if( h->mb.i_partition == D_8x16 )
 425         {
 426             bs_write_ue( s, 2 );
 427             if( h->mb.pic.i_fref[0] > 1 )
 428             {
 429                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
 430                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
 431             }
 432
 433             x264_mb_predict_mv( h, 0, 0, 2, mvp );
 434             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][0] - mvp[0] );
 435             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][1] - mvp[1] );
 436
 437             x264_mb_predict_mv( h, 0, 4, 2, mvp );
 438             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4]][0] - mvp[0] );
 439             bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4]][1] - mvp[1] );
 440         }
 441     }
 442     else if( i_mb_type == P_8x8 )
 443     {
 444         int b_sub_ref0;
 445         if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
 446              h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
 447         {
 448             bs_write_ue( s, 4 );
 449             b_sub_ref0 = 0;
 450         }
 451         else
 452         {
 453             bs_write_ue( s, 3 );
 454             b_sub_ref0 = 1;
 455         }
 456
 457         /* sub mb type */
 458         if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
 459             for( i = 0; i < 4; i++ )
 460                 bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
 461         else
 462             bs_write( s, 4, 0xf );
 463
 464         /* ref0 */
 465         if( h->mb.pic.i_fref[0] > 1 && b_sub_ref0 )
 466         {
 467             bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
 468             bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
 469             bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
 470             bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
 471         }
 472
 473         for( i = 0; i < 4; i++ )
 474             cavlc_mb8x8_mvd( h, s, 0, i );
 475     }
 476     else if( i_mb_type == B_8x8 )
 477     {
 478         bs_write_ue( s, 22 );
 479
 480         /* sub mb type */
 481         for( i = 0; i < 4; i++ )
 482             bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
 483
 484         /* ref */
 485         for( i = 0; i < 4; i++ )
 486             if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
 487                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
 488         for( i = 0; i < 4; i++ )
 489             if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
 490                 bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
 491
 492         /* mvd */
 493         for( i = 0; i < 4; i++ )
 494             cavlc_mb8x8_mvd( h, s, 0, i );
 495         for( i = 0; i < 4; i++ )
 496             cavlc_mb8x8_mvd( h, s, 1, i );
 497     }
 498     else if( i_mb_type != B_DIRECT )
 499     {
 500         /* All B mode */
 501         /* Motion Vector */
 502         int i_list;
 503         DECLARE_ALIGNED_4( int16_t mvp[2] );
 504         const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
 505
 506         bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
 507
 508         for( i_list = 0; i_list < 2; i_list++ )
 509         {
 510             const int i_ref_max = (i_list == 0 ? h->mb.pic.i_fref[0] : h->mb.pic.i_fref[1]) - 1;
 511
 512             if( i_ref_max )
 513                 switch( h->mb.i_partition )
 514                 {
 515                     case D_16x16:
 516                         if( b_list[i_list][0] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[0]] );
 517                         break;
 518                     case D_16x8:
 519                         if( b_list[i_list][0] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[0]] );
 520                         if( b_list[i_list][1] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[8]] );
 521                         break;
 522                     case D_8x16:
 523                         if( b_list[i_list][0] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[0]] );
 524                         if( b_list[i_list][1] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[4]] );
 525                         break;
 526                 }
 527         }
 528         for( i_list = 0; i_list < 2; i_list++ )
 529         {
 530             switch( h->mb.i_partition )
 531             {
 532                 case D_16x16:
 533                     if( b_list[i_list][0] )
 534                     {
 535                         x264_mb_predict_mv( h, i_list, 0, 4, mvp );
 536                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][0] - mvp[0] );
 537                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][1] - mvp[1] );
 538                     }
 539                     break;
 540                 case D_16x8:
 541                     if( b_list[i_list][0] )
 542                     {
 543                         x264_mb_predict_mv( h, i_list, 0, 4, mvp );
 544                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][0] - mvp[0] );
 545                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][1] - mvp[1] );
 546                     }
 547                     if( b_list[i_list][1] )
 548                     {
 549                         x264_mb_predict_mv( h, i_list, 8, 4, mvp );
 550                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[8]][0] - mvp[0] );
 551                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[8]][1] - mvp[1] );
 552                     }
 553                     break;
 554                 case D_8x16:
 555                     if( b_list[i_list][0] )
 556                     {
 557                         x264_mb_predict_mv( h, i_list, 0, 2, mvp );
 558                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][0] - mvp[0] );
 559                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][1] - mvp[1] );
 560                     }
 561                     if( b_list[i_list][1] )
 562                     {
 563                         x264_mb_predict_mv( h, i_list, 4, 2, mvp );
 564                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4]][0] - mvp[0] );
 565                         bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4]][1] - mvp[1] );
 566                     }
 567                     break;
 568             }
 569         }
 570     }
 571     else if( i_mb_type == B_DIRECT )
 572         bs_write_ue( s, 0 );
 573     else
 574     {
 575         x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" );
 576         return;
 577     }
 578
 579 #if !RDO_SKIP_BS
 580     i_mb_pos_tex = bs_pos( s );
 581     h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
 582 #endif
 583
 584     /* Coded block patern */
 585     if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
 586         bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
 587     else if( i_mb_type != I_16x16 )
 588         bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
 589
 590     /* transform size 8x8 flag */
 591     if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
 592         bs_write1( s, h->mb.b_transform_8x8 );
 593
 594     /* write residual */
 595     if( i_mb_type == I_16x16 )
 596     {
 597         cavlc_qp_delta( h, s );
 598
 599         /* DC Luma */
 600         block_residual_write_cavlc( h, s, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc, 16 );
 601
 602         /* AC Luma */
 603         if( h->mb.i_cbp_luma )
 604             for( i = 0; i < 16; i++ )
 605                 block_residual_write_cavlc( h, s, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 15 );
 606     }
 607     else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
 608     {
 609         cavlc_qp_delta( h, s );
 610         x264_macroblock_luma_write_cavlc( h, s, 0, 3 );
 611     }
 612     if( h->mb.i_cbp_chroma )
 613     {
 614         /* Chroma DC residual present */
 615         block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 4 );
 616         block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 4 );
 617         if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
 618             for( i = 16; i < 24; i++ )
 619                 block_residual_write_cavlc( h, s, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
 620     }
 621
 622 #if !RDO_SKIP_BS
 623     h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
 624 #endif
 625 }
 626
 627 #if RDO_SKIP_BS
 628 /*****************************************************************************
 629  * RD only; doesn't generate a valid bitstream
 630  * doesn't write cbp or chroma dc (I don't know how much this matters)
 631  * doesn't write ref or subpartition (never varies between calls, so no point in doing so)
 632  * works on all partition sizes except 16x16
 633  * for sub8x8, call once per 8x8 block
 634  *****************************************************************************/
 635 static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
 636 {
 637     bs_t s;
 638     const int i_mb_type = h->mb.i_type;
 639     int b_8x16 = h->mb.i_partition == D_8x16;
 640     int j;
 641
 642     s.i_bits_encoded = 0;
 643
 644     if( i_mb_type == P_8x8 )
 645         cavlc_mb8x8_mvd( h, &s, 0, i8 );
 646     else if( i_mb_type == P_L0 )
 647         cavlc_mb_mvd( h, &s, 0, 4*i8, 4>>b_8x16 );
 648     else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
 649     {
 650         if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, &s, 0, 4*i8, 4>>b_8x16 );
 651         if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, &s, 1, 4*i8, 4>>b_8x16 );
 652     }
 653     else if( i_mb_type == B_8x8 )
 654     {
 655         cavlc_mb8x8_mvd( h, &s, 0, i8 );
 656         cavlc_mb8x8_mvd( h, &s, 1, i8 );
 657     }
 658     else
 659     {
 660         x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" );
 661         return 0;
 662     }
 663
 664     for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
 665     {
 666         x264_macroblock_luma_write_cavlc( h, &s, i8, i8 );
 667         block_residual_write_cavlc( h, &s, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
 668         block_residual_write_cavlc( h, &s, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
 669         i8 += x264_pixel_size[i_pixel].h >> 3;
 670     }
 671
 672     return s.i_bits_encoded;
 673 }
 674
 675 static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
 676 {
 677     bs_t s;
 678     int b_8x4 = i_pixel == PIXEL_8x4;
 679     s.i_bits_encoded = 0;
 680     cavlc_mb_mvd( h, &s, 0, i4, 1+b_8x4 );
 681     block_residual_write_cavlc( h, &s, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
 682     if( i_pixel != PIXEL_4x4 )
 683     {
 684         i4 += 2-b_8x4;
 685         block_residual_write_cavlc( h, &s, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
 686     }
 687
 688     return s.i_bits_encoded;
 689 }
 690
 691 static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
 692 {
 693     if( x264_mb_predict_intra4x4_mode( h, i4 ) == x264_mb_pred_mode4x4_fix( i_mode ) )
 694         return 1;
 695     else
 696         return 4;
 697 }
 698
 699 static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
 700 {
 701     int i4;
 702     h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
 703     h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4], h->dct.luma8x8[i8] );
 704     for( i4 = 0; i4 < 4; i4++ )
 705     {
 706         h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = array_non_zero( h->dct.luma4x4[i4+i8*4] );
 707         block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
 708     }
 709     return h->out.bs.i_bits_encoded;
 710 }
 711
 712 static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
 713 {
 714     h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
 715     h->mb.cache.non_zero_count[x264_scan8[i4]] = array_non_zero( h->dct.luma4x4[i4] );
 716     block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
 717     return h->out.bs.i_bits_encoded;
 718 }
 719
 720 static int x264_i8x8_chroma_size_cavlc( x264_t *h )
 721 {
 722     h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
 723     if( h->mb.i_cbp_chroma )
 724     {
 725         block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 4 );
 726         block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 4 );
 727
 728         if( h->mb.i_cbp_chroma == 2 )
 729         {
 730             int i;
 731             for( i = 16; i < 24; i++ )
 732                 block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
 733         }
 734     }
 735     return h->out.bs.i_bits_encoded;
 736 }
 737 #endif