git.sesse.net Git - x264/blob - encoder/cabac.c

   1 /*****************************************************************************
   2  * cabac.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003-2008 x264 project
   5  *
   6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   7  *          Loren Merritt <lorenm@u.washington.edu>
   8  *          Fiona Glaser <fiona@x264.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include "common/common.h"
  26 #include "macroblock.h"
  27
  28 #ifndef RDO_SKIP_BS
  29 #define RDO_SKIP_BS 0
  30 #endif
  31
  32 static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
  33                     int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
  34 {
  35     if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
  36     {
  37         x264_cabac_encode_decision_noup( cb, ctx0, 0 );
  38     }
  39 #if !RDO_SKIP_BS
  40     else if( i_mb_type == I_PCM )
  41     {
  42         x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  43         x264_cabac_encode_flush( h, cb );
  44     }
  45 #endif
  46     else
  47     {
  48         int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
  49
  50         x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  51         x264_cabac_encode_terminal( cb );
  52
  53         x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
  54         if( h->mb.i_cbp_chroma == 0 )
  55             x264_cabac_encode_decision_noup( cb, ctx2, 0 );
  56         else
  57         {
  58             x264_cabac_encode_decision( cb, ctx2, 1 );
  59             x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
  60         }
  61         x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
  62         x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
  63     }
  64 }
  65
  66 static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  67 {
  68     const int i_mb_type = h->mb.i_type;
  69
  70     if( h->sh.b_mbaff &&
  71         (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
  72     {
  73         x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
  74     }
  75
  76     if( h->sh.i_type == SLICE_TYPE_I )
  77     {
  78         int ctx = 0;
  79         if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
  80             ctx++;
  81         if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
  82             ctx++;
  83
  84         x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
  85     }
  86     else if( h->sh.i_type == SLICE_TYPE_P )
  87     {
  88         /* prefix: 14, suffix: 17 */
  89         if( i_mb_type == P_L0 )
  90         {
  91             x264_cabac_encode_decision_noup( cb, 14, 0 );
  92             x264_cabac_encode_decision_noup( cb, 15, h->mb.i_partition != D_16x16 );
  93             x264_cabac_encode_decision_noup( cb, 17-(h->mb.i_partition == D_16x16), h->mb.i_partition == D_16x8 );
  94         }
  95         else if( i_mb_type == P_8x8 )
  96         {
  97             x264_cabac_encode_decision_noup( cb, 14, 0 );
  98             x264_cabac_encode_decision_noup( cb, 15, 0 );
  99             x264_cabac_encode_decision_noup( cb, 16, 1 );
 100         }
 101         else /* intra */
 102         {
 103             /* prefix */
 104             x264_cabac_encode_decision_noup( cb, 14, 1 );
 105
 106             /* suffix */
 107             x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
 108         }
 109     }
 110     else //if( h->sh.i_type == SLICE_TYPE_B )
 111     {
 112         int ctx = 0;
 113         if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
 114             ctx++;
 115         if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
 116             ctx++;
 117
 118         if( i_mb_type == B_DIRECT )
 119         {
 120             x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
 121             return;
 122         }
 123         x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
 124
 125         if( i_mb_type == B_8x8 )
 126         {
 127             x264_cabac_encode_decision_noup( cb, 27+3,   1 );
 128             x264_cabac_encode_decision_noup( cb, 27+4,   1 );
 129             x264_cabac_encode_decision( cb, 27+5,   1 );
 130             x264_cabac_encode_decision( cb, 27+5,   1 );
 131             x264_cabac_encode_decision_noup( cb, 27+5,   1 );
 132         }
 133         else if( IS_INTRA( i_mb_type ) )
 134         {
 135             /* prefix */
 136             x264_cabac_encode_decision_noup( cb, 27+3,   1 );
 137             x264_cabac_encode_decision_noup( cb, 27+4,   1 );
 138             x264_cabac_encode_decision( cb, 27+5,   1 );
 139             x264_cabac_encode_decision( cb, 27+5,   0 );
 140             x264_cabac_encode_decision( cb, 27+5,   1 );
 141
 142             /* suffix */
 143             x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
 144         }
 145         else
 146         {
 147             static const uint8_t i_mb_bits[9*3][6] =
 148             {
 149                 { 1,0,0,0,1,2 }, { 1,0,0,1,0,2 }, { 0,0,2,2,2,2 },  /* L0 L0 */
 150                 { 1,0,1,0,1,2 }, { 1,0,1,1,0,2 }, {0},              /* L0 L1 */
 151                 { 1,1,0,0,0,0 }, { 1,1,0,0,0,1 }, {0},              /* L0 BI */
 152                 { 1,0,1,1,1,2 }, { 1,1,1,1,0,2 }, {0},              /* L1 L0 */
 153                 { 1,0,0,1,1,2 }, { 1,0,1,0,0,2 }, { 0,1,2,2,2,2 },  /* L1 L1 */
 154                 { 1,1,0,0,1,0 }, { 1,1,0,0,1,1 }, {0},              /* L1 BI */
 155                 { 1,1,0,1,0,0 }, { 1,1,0,1,0,1 }, {0},              /* BI L0 */
 156                 { 1,1,0,1,1,0 }, { 1,1,0,1,1,1 }, {0},              /* BI L1 */
 157                 { 1,1,1,0,0,0 }, { 1,1,1,0,0,1 }, { 1,0,0,0,0,2 },  /* BI BI */
 158             };
 159
 160             const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
 161
 162             x264_cabac_encode_decision_noup( cb, 27+3,   i_mb_bits[idx][0] );
 163             x264_cabac_encode_decision( cb, 27+5-i_mb_bits[idx][0], i_mb_bits[idx][1] );
 164             if( i_mb_bits[idx][2] != 2 )
 165             {
 166                 x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][2] );
 167                 x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][3] );
 168                 x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][4] );
 169                 if( i_mb_bits[idx][5] != 2 )
 170                     x264_cabac_encode_decision_noup( cb, 27+5, i_mb_bits[idx][5] );
 171             }
 172         }
 173     }
 174 }
 175
 176 static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
 177 {
 178     if( i_pred == i_mode )
 179         x264_cabac_encode_decision( cb, 68, 1 );
 180     else
 181     {
 182         x264_cabac_encode_decision( cb, 68, 0 );
 183         if( i_mode > i_pred  )
 184             i_mode--;
 185         x264_cabac_encode_decision( cb, 69, (i_mode     )&0x01 );
 186         x264_cabac_encode_decision( cb, 69, (i_mode >> 1)&0x01 );
 187         x264_cabac_encode_decision( cb, 69, (i_mode >> 2)      );
 188     }
 189 }
 190
 191 static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
 192 {
 193     const int i_mode = x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ];
 194     int       ctx = 0;
 195
 196     /* No need to test for I4x4 or I_16x16 as cache_save handle that */
 197     if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 )
 198         ctx++;
 199     if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
 200         ctx++;
 201
 202     x264_cabac_encode_decision_noup( cb, 64 + ctx, i_mode > 0 );
 203     if( i_mode > 0 )
 204     {
 205         x264_cabac_encode_decision( cb, 64 + 3, i_mode > 1 );
 206         if( i_mode > 1 )
 207             x264_cabac_encode_decision_noup( cb, 64 + 3, i_mode > 2 );
 208     }
 209 }
 210
 211 static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb )
 212 {
 213     int cbp = h->mb.i_cbp_luma;
 214     int cbp_l = h->mb.cache.i_cbp_left;
 215     int cbp_t = h->mb.cache.i_cbp_top;
 216     x264_cabac_encode_decision     ( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (cbp >> 0) & 1 );
 217     x264_cabac_encode_decision     ( cb, 76 - ((cbp   >> 0) & 1) - ((cbp_t >> 2) & 2), (cbp >> 1) & 1 );
 218     x264_cabac_encode_decision     ( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp   << 1) & 2), (cbp >> 2) & 1 );
 219     x264_cabac_encode_decision_noup( cb, 76 - ((cbp   >> 2) & 1) - ((cbp   >> 0) & 2), (cbp >> 3) & 1 );
 220 }
 221
 222 static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
 223 {
 224     int cbp_a = h->mb.cache.i_cbp_left & 0x30;
 225     int cbp_b = h->mb.cache.i_cbp_top  & 0x30;
 226     int ctx = 0;
 227
 228     if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++;
 229     if( cbp_b && h->mb.cache.i_cbp_top  != -1 ) ctx+=2;
 230     if( h->mb.i_cbp_chroma == 0 )
 231         x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 );
 232     else
 233     {
 234         x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 );
 235
 236         ctx = 4;
 237         if( cbp_a == 0x20 ) ctx++;
 238         if( cbp_b == 0x20 ) ctx += 2;
 239         x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 );
 240     }
 241 }
 242
 243 static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
 244 {
 245     int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
 246     int ctx;
 247
 248     /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
 249     if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
 250     {
 251 #if !RDO_SKIP_BS
 252         h->mb.i_qp = h->mb.i_last_qp;
 253 #endif
 254         i_dqp = 0;
 255     }
 256
 257     /* Since, per the above, empty-CBP I16x16 blocks never have delta quants,
 258      * we don't have to check for them. */
 259     ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy];
 260
 261     if( i_dqp != 0 )
 262     {
 263         int val = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
 264         /* dqp is interpreted modulo 52 */
 265         if( val >= 51 && val != 52 )
 266             val = 103 - val;
 267         do
 268         {
 269             x264_cabac_encode_decision( cb, 60 + ctx, 1 );
 270             ctx = 2+(ctx>>1);
 271         } while( --val );
 272     }
 273     x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 );
 274 }
 275
 276 #if !RDO_SKIP_BS
 277 void x264_cabac_mb_skip( x264_t *h, int b_skip )
 278 {
 279     int ctx = (h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left ))
 280             + (h->mb.i_mb_type_top >= 0 && !IS_SKIP( h->mb.i_mb_type_top ))
 281             + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
 282     x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
 283 }
 284 #endif
 285
 286 static inline void x264_cabac_mb_sub_p_partition( x264_cabac_t *cb, int i_sub )
 287 {
 288     if( i_sub == D_L0_8x8 )
 289     {
 290         x264_cabac_encode_decision( cb, 21, 1 );
 291         return;
 292     }
 293     x264_cabac_encode_decision( cb, 21, 0 );
 294     if( i_sub == D_L0_8x4 )
 295         x264_cabac_encode_decision( cb, 22, 0 );
 296     else
 297     {
 298         x264_cabac_encode_decision( cb, 22, 1 );
 299         x264_cabac_encode_decision( cb, 23, i_sub == D_L0_4x8 );
 300     }
 301 }
 302
 303 static inline void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub )
 304 {
 305     if( i_sub == D_DIRECT_8x8 )
 306     {
 307         x264_cabac_encode_decision( cb, 36, 0 );
 308         return;
 309     }
 310     x264_cabac_encode_decision( cb, 36, 1 );
 311     if( i_sub == D_BI_8x8 )
 312     {
 313         x264_cabac_encode_decision( cb, 37, 1 );
 314         x264_cabac_encode_decision( cb, 38, 0 );
 315         x264_cabac_encode_decision( cb, 39, 0 );
 316         x264_cabac_encode_decision( cb, 39, 0 );
 317         return;
 318     }
 319     x264_cabac_encode_decision( cb, 37, 0 );
 320     x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
 321 }
 322
 323 static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
 324 {
 325     int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
 326     x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
 327 }
 328
 329 static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
 330 {
 331     const int i8 = x264_scan8[idx];
 332     const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
 333     const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
 334     int i_ref  = h->mb.cache.ref[i_list][i8];
 335     int ctx  = 0;
 336
 337     if( i_refa > 0 && !h->mb.cache.skip[i8 - 1] )
 338         ctx++;
 339     if( i_refb > 0 && !h->mb.cache.skip[i8 - 8] )
 340         ctx += 2;
 341
 342     while( i_ref > 0 )
 343     {
 344         x264_cabac_encode_decision( cb, 54 + ctx, 1 );
 345         ctx = (ctx>>2)+4;
 346         i_ref--;
 347     }
 348     x264_cabac_encode_decision( cb, 54 + ctx, 0 );
 349 }
 350
 351 static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
 352 {
 353     const int i_abs = abs( mvd );
 354     const int ctxbase = l ? 47 : 40;
 355     int i;
 356 #if RDO_SKIP_BS
 357     if( i_abs == 0 )
 358         x264_cabac_encode_decision( cb, ctxbase + ctx, 0 );
 359     else
 360     {
 361         x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
 362         if( i_abs <= 3 )
 363         {
 364             for( i = 1; i < i_abs; i++ )
 365                 x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 );
 366             x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 );
 367             x264_cabac_encode_bypass( cb, mvd < 0 );
 368         }
 369         else
 370         {
 371             x264_cabac_encode_decision( cb, ctxbase + 3, 1 );
 372             x264_cabac_encode_decision( cb, ctxbase + 4, 1 );
 373             x264_cabac_encode_decision( cb, ctxbase + 5, 1 );
 374             if( i_abs < 9 )
 375             {
 376                 cb->f8_bits_encoded += cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
 377                 cb->state[ctxbase+6] = cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
 378             }
 379             else
 380             {
 381                 cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]];
 382                 cb->state[ctxbase+6] = cabac_transition_5ones[cb->state[ctxbase+6]];
 383                 x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
 384             }
 385         }
 386     }
 387 #else
 388     static const uint8_t ctxes[8] = { 3,4,5,6,6,6,6,6 };
 389
 390     if( i_abs == 0 )
 391         x264_cabac_encode_decision( cb, ctxbase + ctx, 0 );
 392     else
 393     {
 394         x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
 395         if( i_abs < 9 )
 396         {
 397             for( i = 1; i < i_abs; i++ )
 398                 x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
 399             x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs-1], 0 );
 400         }
 401         else
 402         {
 403             for( i = 1; i < 9; i++ )
 404                 x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
 405             x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
 406         }
 407         x264_cabac_encode_bypass( cb, mvd < 0 );
 408     }
 409 #endif
 410 }
 411
 412 static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
 413 {
 414     ALIGNED_4( int16_t mvp[2] );
 415     uint32_t amvd;
 416     int mdx, mdy;
 417
 418     /* Calculate mvd */
 419     x264_mb_predict_mv( h, i_list, idx, width, mvp );
 420     mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
 421     mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
 422     amvd = x264_cabac_amvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
 423                                h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
 424
 425     /* encode */
 426     x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFFFF );
 427     x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>16 );
 428
 429     return pack16to32_mask(mdx,mdy);
 430 }
 431
 432 #define x264_cabac_mb_mvd(h,cb,i_list,idx,width,height)\
 433 do\
 434 {\
 435     uint32_t mvd = x264_cabac_mb_mvd(h,cb,i_list,idx,width);\
 436     x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\
 437 } while(0)
 438
 439 static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
 440 {
 441     switch( h->mb.i_sub_partition[i] )
 442     {
 443         case D_L0_8x8:
 444             x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
 445             break;
 446         case D_L0_8x4:
 447             x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 2, 1 );
 448             x264_cabac_mb_mvd( h, cb, 0, 4*i+2, 2, 1 );
 449             break;
 450         case D_L0_4x8:
 451             x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 1, 2 );
 452             x264_cabac_mb_mvd( h, cb, 0, 4*i+1, 1, 2 );
 453             break;
 454         case D_L0_4x4:
 455             x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 1, 1 );
 456             x264_cabac_mb_mvd( h, cb, 0, 4*i+1, 1, 1 );
 457             x264_cabac_mb_mvd( h, cb, 0, 4*i+2, 1, 1 );
 458             x264_cabac_mb_mvd( h, cb, 0, 4*i+3, 1, 1 );
 459             break;
 460         default:
 461             assert(0);
 462     }
 463 }
 464
 465 /* i_ctxBlockCat: 0-> DC 16x16  i_idx = 0
 466  *                1-> AC 16x16  i_idx = luma4x4idx
 467  *                2-> Luma4x4   i_idx = luma4x4idx
 468  *                3-> DC Chroma i_idx = iCbCr
 469  *                4-> AC Chroma i_idx = 4 * iCbCr + chroma4x4idx
 470  *                5-> Luma8x8   i_idx = luma8x8idx
 471  */
 472
 473 static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra )
 474 {
 475     int i_nza;
 476     int i_nzb;
 477
 478     switch( i_cat )
 479     {
 480         case DCT_LUMA_AC:
 481         case DCT_LUMA_4x4:
 482         case DCT_CHROMA_AC:
 483             /* no need to test for skip/pcm */
 484             i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
 485             i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
 486             if( x264_constant_p(b_intra) && !b_intra )
 487                 return 85 + 4*i_cat + ((2*i_nzb + i_nza)&0x7f);
 488             else
 489             {
 490                 i_nza &= 0x7f + (b_intra << 7);
 491                 i_nzb &= 0x7f + (b_intra << 7);
 492                 return 85 + 4*i_cat + 2*!!i_nzb + !!i_nza;
 493             }
 494         case DCT_LUMA_DC:
 495             i_nza = (h->mb.cache.i_cbp_left >> 8) & 1;
 496             i_nzb = (h->mb.cache.i_cbp_top  >> 8) & 1;
 497             return 85 + 4*i_cat + 2*i_nzb + i_nza;
 498         case DCT_CHROMA_DC:
 499             /* no need to test skip/pcm */
 500             i_idx -= 25;
 501             i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (9 + i_idx)) & 1 : b_intra;
 502             i_nzb = h->mb.cache.i_cbp_top  != -1 ? (h->mb.cache.i_cbp_top  >> (9 + i_idx)) & 1 : b_intra;
 503             return 85 + 4*i_cat + 2*i_nzb + i_nza;
 504         default:
 505             return 0;
 506     }
 507 }
 508
 509
 510 static const uint16_t significant_coeff_flag_offset[2][6] = {
 511     { 105, 120, 134, 149, 152, 402 },
 512     { 277, 292, 306, 321, 324, 436 }
 513 };
 514 static const uint16_t last_coeff_flag_offset[2][6] = {
 515     { 166, 181, 195, 210, 213, 417 },
 516     { 338, 353, 367, 382, 385, 451 }
 517 };
 518 static const uint16_t coeff_abs_level_m1_offset[6] =
 519     { 227, 237, 247, 257, 266, 426 };
 520 static const uint8_t significant_coeff_flag_offset_8x8[2][63] =
 521 {{
 522     0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
 523     4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
 524     7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
 525    12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
 526 },{
 527     0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
 528     6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
 529     9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
 530     9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
 531 }};
 532 static const uint8_t last_coeff_flag_offset_8x8[63] = {
 533     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 534     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 535     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
 536     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
 537 };
 538
 539 // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
 540 //           4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
 541 /* map node ctx => cabac ctx for level=1 */
 542 static const int coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
 543 /* map node ctx => cabac ctx for level>1 */
 544 static const int coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
 545 static const uint8_t coeff_abs_level_transition[2][8] = {
 546 /* update node ctx after coding a level=1 */
 547     { 1, 2, 3, 3, 4, 5, 6, 7 },
 548 /* update node ctx after coding a level>1 */
 549     { 4, 4, 4, 4, 5, 6, 7, 7 }
 550 };
 551 static const int count_cat_m1[5] = {15, 14, 15, 3, 14};
 552
 553 #if !RDO_SKIP_BS
 554 static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l )
 555 {
 556     const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
 557     const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
 558     const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat];
 559     const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
 560     int i_coeff_abs_m1[64];
 561     int i_coeff_sign[64];
 562     int i_coeff = 0;
 563     int i_last;
 564     int node_ctx = 0;
 565     int i = 0;
 566
 567     i_last = h->quantf.coeff_last[i_ctxBlockCat](l);
 568
 569 #define WRITE_SIGMAP( l8x8 )\
 570     while(1)\
 571     {\
 572         if( l[i] )\
 573         {\
 574             i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
 575             i_coeff_sign[i_coeff] = l[i] < 0;\
 576             i_coeff++;\
 577             x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 1 );\
 578             if( i == i_last )\
 579             {\
 580                 x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 1 );\
 581                 break;\
 582             }\
 583             else\
 584                 x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 0 );\
 585         }\
 586         else\
 587             x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 0 );\
 588         i++;\
 589         if( i == i_count_m1 )\
 590         {\
 591             i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
 592             i_coeff_sign[i_coeff]   = l[i] < 0;\
 593             i_coeff++;\
 594             break;\
 595         }\
 596     }
 597
 598     if( i_ctxBlockCat == DCT_LUMA_8x8 )
 599     {
 600         const int i_count_m1 = 63;
 601         WRITE_SIGMAP( 1 )
 602     }
 603     else
 604     {
 605         const int i_count_m1 = count_cat_m1[i_ctxBlockCat];
 606         WRITE_SIGMAP( 0 )
 607     }
 608
 609     do
 610     {
 611         int i_prefix, ctx;
 612         i_coeff--;
 613
 614         /* write coeff_abs - 1 */
 615         i_prefix = X264_MIN( i_coeff_abs_m1[i_coeff], 14 );
 616         ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level;
 617
 618         if( i_prefix )
 619         {
 620             x264_cabac_encode_decision( cb, ctx, 1 );
 621             ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
 622             for( i = 0; i < i_prefix - 1; i++ )
 623                 x264_cabac_encode_decision( cb, ctx, 1 );
 624             if( i_prefix < 14 )
 625                 x264_cabac_encode_decision( cb, ctx, 0 );
 626             else
 627                 x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs_m1[i_coeff] - 14 );
 628
 629             node_ctx = coeff_abs_level_transition[1][node_ctx];
 630         }
 631         else
 632         {
 633             x264_cabac_encode_decision( cb, ctx, 0 );
 634             node_ctx = coeff_abs_level_transition[0][node_ctx];
 635         }
 636
 637         x264_cabac_encode_bypass( cb, i_coeff_sign[i_coeff] );
 638     } while( i_coeff > 0 );
 639 }
 640 #define block_residual_write_cabac_8x8( h, cb, l ) block_residual_write_cabac( h, cb, DCT_LUMA_8x8, l )
 641
 642 #else
 643
 644 /* Faster RDO by merging sigmap and level coding.  Note that for 8x8dct
 645  * this is slightly incorrect because the sigmap is not reversible
 646  * (contexts are repeated).  However, there is nearly no quality penalty
 647  * for this (~0.001db) and the speed boost (~30%) is worth it. */
 648 static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l, int b_8x8 )
 649 {
 650     const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
 651     const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
 652     const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat];
 653     const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
 654     int i_last, i_coeff_abs, ctx, i, node_ctx;
 655
 656     i_last = h->quantf.coeff_last[i_ctxBlockCat](l);
 657
 658     i_coeff_abs = abs(l[i_last]);
 659     ctx = coeff_abs_level1_ctx[0] + i_ctx_level;
 660
 661     if( i_last != (b_8x8 ? 63 : count_cat_m1[i_ctxBlockCat]) )
 662     {
 663         x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i_last]:i_last), 1 );
 664         x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i_last]:i_last), 1 );
 665     }
 666
 667     if( i_coeff_abs > 1 )
 668     {
 669         x264_cabac_encode_decision( cb, ctx, 1 );
 670         ctx = coeff_abs_levelgt1_ctx[0] + i_ctx_level;
 671         if( i_coeff_abs < 15 )
 672         {
 673             cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]];
 674             cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]];
 675         }
 676         else
 677         {
 678             cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
 679             cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
 680             x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 );
 681         }
 682         node_ctx = coeff_abs_level_transition[1][0];
 683     }
 684     else
 685     {
 686         x264_cabac_encode_decision( cb, ctx, 0 );
 687         node_ctx = coeff_abs_level_transition[0][0];
 688         x264_cabac_encode_bypass( cb, 0 ); // sign
 689     }
 690
 691     for( i = i_last-1 ; i >= 0; i-- )
 692     {
 693         if( l[i] )
 694         {
 695             i_coeff_abs = abs(l[i]);
 696             x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i]:i), 1 );
 697             x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i]:i), 0 );
 698             ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level;
 699
 700             if( i_coeff_abs > 1 )
 701             {
 702                 x264_cabac_encode_decision( cb, ctx, 1 );
 703                 ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
 704                 if( i_coeff_abs < 15 )
 705                 {
 706                     cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]];
 707                     cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]];
 708                 }
 709                 else
 710                 {
 711                     cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
 712                     cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
 713                     x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 );
 714                 }
 715                 node_ctx = coeff_abs_level_transition[1][node_ctx];
 716             }
 717             else
 718             {
 719                 x264_cabac_encode_decision( cb, ctx, 0 );
 720                 node_ctx = coeff_abs_level_transition[0][node_ctx];
 721                 x264_cabac_encode_bypass( cb, 0 );
 722             }
 723         }
 724         else
 725             x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i]:i), 0 );
 726     }
 727 }
 728
 729 static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, int16_t *l )
 730 {
 731     block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, l, 1 );
 732 }
 733 static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l )
 734 {
 735     block_residual_write_cabac_internal( h, cb, i_ctxBlockCat, l, 0 );
 736 }
 737 #endif
 738
 739 #define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra ) \
 740 { \
 741     int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra ); \
 742     if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
 743     {\
 744         x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
 745         block_residual_write_cabac( h, cb, i_ctxBlockCat, l ); \
 746     }\
 747     else\
 748         x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
 749 }
 750
 751 void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
 752 {
 753     const int i_mb_type = h->mb.i_type;
 754     int i_list;
 755     int i;
 756
 757 #if !RDO_SKIP_BS
 758     const int i_mb_pos_start = x264_cabac_pos( cb );
 759     int       i_mb_pos_tex;
 760 #endif
 761
 762     /* Write the MB type */
 763     x264_cabac_mb_type( h, cb );
 764
 765 #if !RDO_SKIP_BS
 766     if( i_mb_type == I_PCM )
 767     {
 768         i_mb_pos_tex = x264_cabac_pos( cb );
 769         h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
 770
 771         memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
 772         cb->p += 256;
 773         for( i = 0; i < 8; i++ )
 774             memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
 775         cb->p += 64;
 776         for( i = 0; i < 8; i++ )
 777             memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
 778         cb->p += 64;
 779
 780         cb->i_low   = 0;
 781         cb->i_range = 0x01FE;
 782         cb->i_queue = -1;
 783         cb->i_bytes_outstanding = 0;
 784
 785         /* if PCM is chosen, we need to store reconstructed frame data */
 786         h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
 787         h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
 788         h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
 789
 790         h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
 791         return;
 792     }
 793 #endif
 794
 795     if( IS_INTRA( i_mb_type ) )
 796     {
 797         if( h->pps->b_transform_8x8_mode && i_mb_type != I_16x16 )
 798             x264_cabac_mb_transform_size( h, cb );
 799
 800         if( i_mb_type != I_16x16 )
 801         {
 802             int di = h->mb.b_transform_8x8 ? 4 : 1;
 803             for( i = 0; i < 16; i += di )
 804             {
 805                 const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
 806                 const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
 807                 x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
 808             }
 809         }
 810
 811         x264_cabac_mb_intra_chroma_pred_mode( h, cb );
 812     }
 813     else if( i_mb_type == P_L0 )
 814     {
 815         if( h->mb.i_partition == D_16x16 )
 816         {
 817             if( h->mb.pic.i_fref[0] > 1 )
 818             {
 819                 x264_cabac_mb_ref( h, cb, 0, 0 );
 820             }
 821             x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
 822         }
 823         else if( h->mb.i_partition == D_16x8 )
 824         {
 825             if( h->mb.pic.i_fref[0] > 1 )
 826             {
 827                 x264_cabac_mb_ref( h, cb, 0, 0 );
 828                 x264_cabac_mb_ref( h, cb, 0, 8 );
 829             }
 830             x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
 831             x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
 832         }
 833         else //if( h->mb.i_partition == D_8x16 )
 834         {
 835             if( h->mb.pic.i_fref[0] > 1 )
 836             {
 837                 x264_cabac_mb_ref( h, cb, 0, 0 );
 838                 x264_cabac_mb_ref( h, cb, 0, 4 );
 839             }
 840             x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
 841             x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
 842         }
 843     }
 844     else if( i_mb_type == P_8x8 )
 845     {
 846         /* sub mb type */
 847         for( i = 0; i < 4; i++ )
 848             x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
 849
 850         /* ref 0 */
 851         if( h->mb.pic.i_fref[0] > 1 )
 852         {
 853             x264_cabac_mb_ref( h, cb, 0, 0 );
 854             x264_cabac_mb_ref( h, cb, 0, 4 );
 855             x264_cabac_mb_ref( h, cb, 0, 8 );
 856             x264_cabac_mb_ref( h, cb, 0, 12 );
 857         }
 858
 859         for( i = 0; i < 4; i++ )
 860             x264_cabac_mb8x8_mvd( h, cb, i );
 861     }
 862     else if( i_mb_type == B_8x8 )
 863     {
 864         /* sub mb type */
 865         for( i = 0; i < 4; i++ )
 866             x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
 867
 868         /* ref */
 869         if( h->mb.pic.i_fref[0] > 1 )
 870             for( i = 0; i < 4; i++ )
 871                 if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
 872                     x264_cabac_mb_ref( h, cb, 0, 4*i );
 873
 874         if( h->mb.pic.i_fref[1] > 1 )
 875             for( i = 0; i < 4; i++ )
 876                 if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
 877                     x264_cabac_mb_ref( h, cb, 1, 4*i );
 878
 879         for( i = 0; i < 4; i++ )
 880             if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
 881                 x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
 882
 883         for( i = 0; i < 4; i++ )
 884             if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
 885                 x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
 886     }
 887     else if( i_mb_type != B_DIRECT )
 888     {
 889         /* All B mode */
 890         const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
 891         if( h->mb.pic.i_fref[0] > 1 )
 892         {
 893             if( b_list[0][0] )
 894                 x264_cabac_mb_ref( h, cb, 0, 0 );
 895             if( b_list[0][1] && h->mb.i_partition != D_16x16 )
 896                 x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
 897         }
 898         if( h->mb.pic.i_fref[1] > 1 )
 899         {
 900             if( b_list[1][0] )
 901                 x264_cabac_mb_ref( h, cb, 1, 0 );
 902             if( b_list[1][1] && h->mb.i_partition != D_16x16 )
 903                 x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
 904         }
 905         for( i_list = 0; i_list < 2; i_list++ )
 906         {
 907             if( h->mb.i_partition == D_16x16 )
 908             {
 909                 if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
 910             }
 911             else if( h->mb.i_partition == D_16x8 )
 912             {
 913                 if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
 914                 if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
 915             }
 916             else //if( h->mb.i_partition == D_8x16 )
 917             {
 918                 if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
 919                 if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
 920             }
 921         }
 922     }
 923
 924 #if !RDO_SKIP_BS
 925     i_mb_pos_tex = x264_cabac_pos( cb );
 926     h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
 927 #endif
 928
 929     if( i_mb_type != I_16x16 )
 930     {
 931         x264_cabac_mb_cbp_luma( h, cb );
 932         x264_cabac_mb_cbp_chroma( h, cb );
 933     }
 934
 935     if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
 936     {
 937         x264_cabac_mb_transform_size( h, cb );
 938     }
 939
 940     if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 )
 941     {
 942         const int b_intra = IS_INTRA( i_mb_type );
 943         x264_cabac_mb_qp_delta( h, cb );
 944
 945         /* write residual */
 946         if( i_mb_type == I_16x16 )
 947         {
 948             /* DC Luma */
 949             block_residual_write_cabac_cbf( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 1 );
 950
 951             /* AC Luma */
 952             if( h->mb.i_cbp_luma != 0 )
 953                 for( i = 0; i < 16; i++ )
 954                     block_residual_write_cabac_cbf( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 1 );
 955         }
 956         else if( h->mb.b_transform_8x8 )
 957         {
 958             for( i = 0; i < 4; i++ )
 959                 if( h->mb.i_cbp_luma & ( 1 << i ) )
 960                     block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i] );
 961         }
 962         else
 963         {
 964             for( i = 0; i < 16; i++ )
 965                 if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
 966                     block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
 967         }
 968
 969         if( h->mb.i_cbp_chroma&0x03 )    /* Chroma DC residual present */
 970         {
 971             block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra );
 972             block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra );
 973             if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
 974                 for( i = 16; i < 24; i++ )
 975                     block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, b_intra );
 976         }
 977     }
 978
 979 #if !RDO_SKIP_BS
 980     h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
 981 #endif
 982 }
 983
 984 #if RDO_SKIP_BS
 985 /*****************************************************************************
 986  * RD only; doesn't generate a valid bitstream
 987  * doesn't write cbp or chroma dc (I don't know how much this matters)
 988  * doesn't write ref (never varies between calls, so no point in doing so)
 989  * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
 990  * works on all partition sizes except 16x16
 991  *****************************************************************************/
 992 static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
 993 {
 994     const int i_mb_type = h->mb.i_type;
 995     int b_8x16 = h->mb.i_partition == D_8x16;
 996     int j;
 997
 998     if( i_mb_type == P_8x8 )
 999     {
1000         x264_cabac_mb8x8_mvd( h, cb, i8 );
1001         x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
1002     }
1003     else if( i_mb_type == P_L0 )
1004         x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1005     else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
1006     {
1007         if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1008         if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1009     }
1010     else //if( i_mb_type == B_8x8 )
1011     {
1012         if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
1013             x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
1014         if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
1015             x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
1016     }
1017
1018     for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
1019     {
1020         if( h->mb.i_cbp_luma & (1 << i8) )
1021         {
1022             if( h->mb.b_transform_8x8 )
1023                 block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] );
1024             else
1025             {
1026                 int i4;
1027                 for( i4 = 0; i4 < 4; i4++ )
1028                     block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 0 );
1029             }
1030         }
1031
1032         block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
1033         block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 0 );
1034
1035         i8 += x264_pixel_size[i_pixel].h >> 3;
1036     }
1037 }
1038
1039 static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
1040 {
1041     int b_8x4 = i_pixel == PIXEL_8x4;
1042     block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 0 );
1043     if( i_pixel == PIXEL_4x4 )
1044     {
1045         x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 );
1046     }
1047     else
1048     {
1049         x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
1050         block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+2-b_8x4, h->dct.luma4x4[i4+2-b_8x4], 0 );
1051     }
1052 }
1053
1054 static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
1055 {
1056     const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
1057     i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1058     x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
1059     x264_cabac_mb_cbp_luma( h, cb );
1060     if( h->mb.i_cbp_luma & (1 << i8) )
1061         block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] );
1062 }
1063
1064 static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
1065 {
1066     const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
1067     i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1068     x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
1069     block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 1 );
1070 }
1071
1072 static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
1073 {
1074     x264_cabac_mb_intra_chroma_pred_mode( h, cb );
1075     x264_cabac_mb_cbp_chroma( h, cb );
1076     if( h->mb.i_cbp_chroma > 0 )
1077     {
1078         block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 1 );
1079         block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 1 );
1080
1081         if( h->mb.i_cbp_chroma == 2 )
1082         {
1083             int i;
1084             for( i = 16; i < 24; i++ )
1085                 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 1 );
1086         }
1087     }
1088 }
1089 #endif