git.sesse.net Git - x264/blob - encoder/cabac.c

   1 /*****************************************************************************
   2  * cabac.c: cabac bitstream writing
   3  *****************************************************************************
   4  * Copyright (C) 2003-2013 x264 project
   5  *
   6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   7  *          Loren Merritt <lorenm@u.washington.edu>
   8  *          Fiona Glaser <fiona@x264.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  23  *
  24  * This program is also available under a commercial proprietary license.
  25  * For more information, contact us at licensing@x264.com.
  26  *****************************************************************************/
  27
  28 #include "common/common.h"
  29 #include "macroblock.h"
  30
  31 #ifndef RDO_SKIP_BS
  32 #define RDO_SKIP_BS 0
  33 #endif
  34
  35 static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
  36                     int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
  37 {
  38     if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
  39     {
  40         x264_cabac_encode_decision_noup( cb, ctx0, 0 );
  41     }
  42 #if !RDO_SKIP_BS
  43     else if( i_mb_type == I_PCM )
  44     {
  45         x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  46         x264_cabac_encode_flush( h, cb );
  47     }
  48 #endif
  49     else
  50     {
  51         int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
  52
  53         x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  54         x264_cabac_encode_terminal( cb );
  55
  56         x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
  57         if( h->mb.i_cbp_chroma == 0 )
  58             x264_cabac_encode_decision_noup( cb, ctx2, 0 );
  59         else
  60         {
  61             x264_cabac_encode_decision( cb, ctx2, 1 );
  62             x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
  63         }
  64         x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
  65         x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
  66     }
  67 }
  68
  69 #if !RDO_SKIP_BS
  70 static void x264_cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
  71 {
  72     int ctx = 0;
  73     ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x;
  74     ctx += (h->mb.i_mb_top_mbpair_xy >= 0
  75             && h->mb.slice_table[h->mb.i_mb_top_mbpair_xy] == h->sh.i_first_mb
  76             && h->mb.field[h->mb.i_mb_top_mbpair_xy]);
  77
  78     x264_cabac_encode_decision_noup( cb, 70 + ctx, MB_INTERLACED );
  79     h->mb.field_decoding_flag = MB_INTERLACED;
  80 }
  81 #endif
  82
  83 static void x264_cabac_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
  84 {
  85     if( i_pred == i_mode )
  86         x264_cabac_encode_decision( cb, 68, 1 );
  87     else
  88     {
  89         x264_cabac_encode_decision( cb, 68, 0 );
  90         if( i_mode > i_pred  )
  91             i_mode--;
  92         x264_cabac_encode_decision( cb, 69, (i_mode     )&0x01 );
  93         x264_cabac_encode_decision( cb, 69, (i_mode >> 1)&0x01 );
  94         x264_cabac_encode_decision( cb, 69, (i_mode >> 2)      );
  95     }
  96 }
  97
  98 static void x264_cabac_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
  99 {
 100     int i_mode = x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode];
 101     int ctx = 0;
 102
 103     /* No need to test for I4x4 or I_16x16 as cache_save handle that */
 104     if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
 105         ctx++;
 106     if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
 107         ctx++;
 108
 109     x264_cabac_encode_decision_noup( cb, 64 + ctx, i_mode > 0 );
 110     if( i_mode > 0 )
 111     {
 112         x264_cabac_encode_decision( cb, 64 + 3, i_mode > 1 );
 113         if( i_mode > 1 )
 114             x264_cabac_encode_decision_noup( cb, 64 + 3, i_mode > 2 );
 115     }
 116 }
 117
 118 static void x264_cabac_cbp_luma( x264_t *h, x264_cabac_t *cb )
 119 {
 120     int cbp = h->mb.i_cbp_luma;
 121     int cbp_l = h->mb.cache.i_cbp_left;
 122     int cbp_t = h->mb.cache.i_cbp_top;
 123     x264_cabac_encode_decision     ( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (cbp >> 0) & 1 );
 124     x264_cabac_encode_decision     ( cb, 76 - ((cbp   >> 0) & 1) - ((cbp_t >> 2) & 2), (cbp >> 1) & 1 );
 125     x264_cabac_encode_decision     ( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp   << 1) & 2), (cbp >> 2) & 1 );
 126     x264_cabac_encode_decision_noup( cb, 76 - ((cbp   >> 2) & 1) - ((cbp   >> 0) & 2), (cbp >> 3) & 1 );
 127 }
 128
 129 static void x264_cabac_cbp_chroma( x264_t *h, x264_cabac_t *cb )
 130 {
 131     int cbp_a = h->mb.cache.i_cbp_left & 0x30;
 132     int cbp_b = h->mb.cache.i_cbp_top  & 0x30;
 133     int ctx = 0;
 134
 135     if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++;
 136     if( cbp_b && h->mb.cache.i_cbp_top  != -1 ) ctx+=2;
 137     if( h->mb.i_cbp_chroma == 0 )
 138         x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 );
 139     else
 140     {
 141         x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 );
 142
 143         ctx = 4;
 144         if( cbp_a == 0x20 ) ctx++;
 145         if( cbp_b == 0x20 ) ctx += 2;
 146         x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma >> 1 );
 147     }
 148 }
 149
 150 static void x264_cabac_qp_delta( x264_t *h, x264_cabac_t *cb )
 151 {
 152     int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
 153     int ctx;
 154
 155     /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
 156     if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
 157     {
 158 #if !RDO_SKIP_BS
 159         h->mb.i_qp = h->mb.i_last_qp;
 160 #endif
 161         i_dqp = 0;
 162     }
 163
 164     /* Since, per the above, empty-CBP I16x16 blocks never have delta quants,
 165      * we don't have to check for them. */
 166     ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy];
 167
 168     if( i_dqp != 0 )
 169     {
 170         /* Faster than (i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp-1)).
 171          * If you so much as sneeze on these lines, gcc will compile this suboptimally. */
 172         i_dqp *= 2;
 173         int val = 1 - i_dqp;
 174         if( val < 0 ) val = i_dqp;
 175         val--;
 176         /* dqp is interpreted modulo (QP_MAX_SPEC+1) */
 177         if( val >= QP_MAX_SPEC && val != QP_MAX_SPEC+1 )
 178             val = 2*QP_MAX_SPEC+1 - val;
 179         do
 180         {
 181             x264_cabac_encode_decision( cb, 60 + ctx, 1 );
 182             ctx = 2+(ctx>>1);
 183         } while( --val );
 184     }
 185     x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 );
 186 }
 187
 188 #if !RDO_SKIP_BS
 189 void x264_cabac_mb_skip( x264_t *h, int b_skip )
 190 {
 191     int ctx = h->mb.cache.i_neighbour_skip + 11;
 192     if( h->sh.i_type != SLICE_TYPE_P )
 193        ctx += 13;
 194     x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
 195 }
 196 #endif
 197
 198 static inline void x264_cabac_subpartition_p( x264_cabac_t *cb, int i_sub )
 199 {
 200     if( i_sub == D_L0_8x8 )
 201     {
 202         x264_cabac_encode_decision( cb, 21, 1 );
 203         return;
 204     }
 205     x264_cabac_encode_decision( cb, 21, 0 );
 206     if( i_sub == D_L0_8x4 )
 207         x264_cabac_encode_decision( cb, 22, 0 );
 208     else
 209     {
 210         x264_cabac_encode_decision( cb, 22, 1 );
 211         x264_cabac_encode_decision( cb, 23, i_sub == D_L0_4x8 );
 212     }
 213 }
 214
 215 static ALWAYS_INLINE void x264_cabac_subpartition_b( x264_cabac_t *cb, int i_sub )
 216 {
 217     if( i_sub == D_DIRECT_8x8 )
 218     {
 219         x264_cabac_encode_decision( cb, 36, 0 );
 220         return;
 221     }
 222     x264_cabac_encode_decision( cb, 36, 1 );
 223     if( i_sub == D_BI_8x8 )
 224     {
 225         x264_cabac_encode_decision( cb, 37, 1 );
 226         x264_cabac_encode_decision( cb, 38, 0 );
 227         x264_cabac_encode_decision( cb, 39, 0 );
 228         x264_cabac_encode_decision( cb, 39, 0 );
 229         return;
 230     }
 231     x264_cabac_encode_decision( cb, 37, 0 );
 232     x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
 233 }
 234
 235 static ALWAYS_INLINE void x264_cabac_transform_size( x264_t *h, x264_cabac_t *cb )
 236 {
 237     int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
 238     x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
 239 }
 240
 241 static ALWAYS_INLINE void x264_cabac_ref_internal( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int bframe )
 242 {
 243     const int i8 = x264_scan8[idx];
 244     const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
 245     const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
 246     int ctx = 0;
 247
 248     if( i_refa > 0 && (!bframe || !h->mb.cache.skip[i8 - 1]) )
 249         ctx++;
 250     if( i_refb > 0 && (!bframe || !h->mb.cache.skip[i8 - 8]) )
 251         ctx += 2;
 252
 253     for( int i_ref = h->mb.cache.ref[i_list][i8]; i_ref > 0; i_ref-- )
 254     {
 255         x264_cabac_encode_decision( cb, 54 + ctx, 1 );
 256         ctx = (ctx>>2)+4;
 257     }
 258     x264_cabac_encode_decision( cb, 54 + ctx, 0 );
 259 }
 260
 261 static NOINLINE void x264_cabac_ref_p( x264_t *h, x264_cabac_t *cb, int idx )
 262 {
 263     x264_cabac_ref_internal( h, cb, 0, idx, 0 );
 264 }
 265 static NOINLINE void x264_cabac_ref_b( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
 266 {
 267     x264_cabac_ref_internal( h, cb, i_list, idx, 1 );
 268 }
 269
 270 static ALWAYS_INLINE int x264_cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
 271 {
 272     int ctxbase = l ? 47 : 40;
 273
 274     if( mvd == 0 )
 275     {
 276         x264_cabac_encode_decision( cb, ctxbase + ctx, 0 );
 277         return 0;
 278     }
 279
 280     int i_abs = abs( mvd );
 281     x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
 282 #if RDO_SKIP_BS
 283     if( i_abs <= 3 )
 284     {
 285         for( int i = 1; i < i_abs; i++ )
 286             x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 );
 287         x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 );
 288         x264_cabac_encode_bypass( cb, mvd >> 31 );
 289     }
 290     else
 291     {
 292         x264_cabac_encode_decision( cb, ctxbase + 3, 1 );
 293         x264_cabac_encode_decision( cb, ctxbase + 4, 1 );
 294         x264_cabac_encode_decision( cb, ctxbase + 5, 1 );
 295         if( i_abs < 9 )
 296         {
 297             cb->f8_bits_encoded += x264_cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
 298             cb->state[ctxbase+6] = x264_cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
 299         }
 300         else
 301         {
 302             cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]];
 303             cb->state[ctxbase+6] = cabac_transition_5ones[cb->state[ctxbase+6]];
 304             x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
 305         }
 306     }
 307 #else
 308     static const uint8_t ctxes[8] = { 3,4,5,6,6,6,6,6 };
 309
 310     if( i_abs < 9 )
 311     {
 312         for( int i = 1; i < i_abs; i++ )
 313             x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
 314         x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs-1], 0 );
 315     }
 316     else
 317     {
 318         for( int i = 1; i < 9; i++ )
 319             x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
 320         x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
 321     }
 322     x264_cabac_encode_bypass( cb, mvd >> 31 );
 323 #endif
 324     /* Since we don't need to keep track of MVDs larger than 66, just cap the value.
 325      * This lets us store MVDs as 8-bit values instead of 16-bit. */
 326     return X264_MIN( i_abs, 66 );
 327 }
 328
 329 static NOINLINE uint16_t x264_cabac_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
 330 {
 331     ALIGNED_4( int16_t mvp[2] );
 332     int mdx, mdy;
 333
 334     /* Calculate mvd */
 335     x264_mb_predict_mv( h, i_list, idx, width, mvp );
 336     mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
 337     mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
 338     uint16_t amvd = x264_cabac_mvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
 339                                        h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
 340
 341     /* encode */
 342     mdx = x264_cabac_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF );
 343     mdy = x264_cabac_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 );
 344
 345     return pack8to16(mdx,mdy);
 346 }
 347
 348 #define x264_cabac_mvd(h,cb,i_list,idx,width,height)\
 349 do\
 350 {\
 351     uint16_t mvd = x264_cabac_mvd(h,cb,i_list,idx,width);\
 352     x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\
 353 } while(0)
 354
 355 static inline void x264_cabac_8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
 356 {
 357     switch( h->mb.i_sub_partition[i] )
 358     {
 359         case D_L0_8x8:
 360             x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 );
 361             break;
 362         case D_L0_8x4:
 363             x264_cabac_mvd( h, cb, 0, 4*i+0, 2, 1 );
 364             x264_cabac_mvd( h, cb, 0, 4*i+2, 2, 1 );
 365             break;
 366         case D_L0_4x8:
 367             x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 2 );
 368             x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 2 );
 369             break;
 370         case D_L0_4x4:
 371             x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 1 );
 372             x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 1 );
 373             x264_cabac_mvd( h, cb, 0, 4*i+2, 1, 1 );
 374             x264_cabac_mvd( h, cb, 0, 4*i+3, 1, 1 );
 375             break;
 376         default:
 377             assert(0);
 378     }
 379 }
 380
 381 static ALWAYS_INLINE void x264_cabac_mb_header_i( x264_t *h, x264_cabac_t *cb, int i_mb_type, int slice_type, int chroma )
 382 {
 383     if( slice_type == SLICE_TYPE_I )
 384     {
 385         int ctx = 0;
 386         if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
 387             ctx++;
 388         if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
 389             ctx++;
 390
 391         x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
 392     }
 393     else if( slice_type == SLICE_TYPE_P )
 394     {
 395         /* prefix */
 396         x264_cabac_encode_decision_noup( cb, 14, 1 );
 397
 398         /* suffix */
 399         x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
 400     }
 401     else if( slice_type == SLICE_TYPE_B )
 402     {
 403         /* prefix */
 404         x264_cabac_encode_decision_noup( cb, 27+3,   1 );
 405         x264_cabac_encode_decision_noup( cb, 27+4,   1 );
 406         x264_cabac_encode_decision( cb, 27+5,   1 );
 407         x264_cabac_encode_decision( cb, 27+5,   0 );
 408         x264_cabac_encode_decision( cb, 27+5,   1 );
 409
 410         /* suffix */
 411         x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
 412     }
 413
 414     if( i_mb_type == I_PCM )
 415         return;
 416
 417     if( i_mb_type != I_16x16 )
 418     {
 419         if( h->pps->b_transform_8x8_mode )
 420             x264_cabac_transform_size( h, cb );
 421
 422         int di = h->mb.b_transform_8x8 ? 4 : 1;
 423         for( int i = 0; i < 16; i += di )
 424         {
 425             const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
 426             const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
 427             x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
 428         }
 429     }
 430
 431     if( chroma )
 432         x264_cabac_intra_chroma_pred_mode( h, cb );
 433 }
 434
 435 static ALWAYS_INLINE void x264_cabac_mb_header_p( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
 436 {
 437     if( i_mb_type == P_L0 )
 438     {
 439         x264_cabac_encode_decision_noup( cb, 14, 0 );
 440         if( h->mb.i_partition == D_16x16 )
 441         {
 442             x264_cabac_encode_decision_noup( cb, 15, 0 );
 443             x264_cabac_encode_decision_noup( cb, 16, 0 );
 444             if( h->mb.pic.i_fref[0] > 1 )
 445                 x264_cabac_ref_p( h, cb, 0 );
 446             x264_cabac_mvd( h, cb, 0, 0, 4, 4 );
 447         }
 448         else if( h->mb.i_partition == D_16x8 )
 449         {
 450             x264_cabac_encode_decision_noup( cb, 15, 1 );
 451             x264_cabac_encode_decision_noup( cb, 17, 1 );
 452             if( h->mb.pic.i_fref[0] > 1 )
 453             {
 454                 x264_cabac_ref_p( h, cb, 0 );
 455                 x264_cabac_ref_p( h, cb, 8 );
 456             }
 457             x264_cabac_mvd( h, cb, 0, 0, 4, 2 );
 458             x264_cabac_mvd( h, cb, 0, 8, 4, 2 );
 459         }
 460         else //if( h->mb.i_partition == D_8x16 )
 461         {
 462             x264_cabac_encode_decision_noup( cb, 15, 1 );
 463             x264_cabac_encode_decision_noup( cb, 17, 0 );
 464             if( h->mb.pic.i_fref[0] > 1 )
 465             {
 466                 x264_cabac_ref_p( h, cb, 0 );
 467                 x264_cabac_ref_p( h, cb, 4 );
 468             }
 469             x264_cabac_mvd( h, cb, 0, 0, 2, 4 );
 470             x264_cabac_mvd( h, cb, 0, 4, 2, 4 );
 471         }
 472     }
 473     else if( i_mb_type == P_8x8 )
 474     {
 475         x264_cabac_encode_decision_noup( cb, 14, 0 );
 476         x264_cabac_encode_decision_noup( cb, 15, 0 );
 477         x264_cabac_encode_decision_noup( cb, 16, 1 );
 478
 479         /* sub mb type */
 480         for( int i = 0; i < 4; i++ )
 481             x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i] );
 482
 483         /* ref 0 */
 484         if( h->mb.pic.i_fref[0] > 1 )
 485         {
 486             x264_cabac_ref_p( h, cb,  0 );
 487             x264_cabac_ref_p( h, cb,  4 );
 488             x264_cabac_ref_p( h, cb,  8 );
 489             x264_cabac_ref_p( h, cb, 12 );
 490         }
 491
 492         for( int i = 0; i < 4; i++ )
 493             x264_cabac_8x8_mvd( h, cb, i );
 494     }
 495     else /* intra */
 496         x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_P, chroma );
 497 }
 498
 499 static ALWAYS_INLINE void x264_cabac_mb_header_b( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
 500 {
 501     int ctx = 0;
 502     if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
 503         ctx++;
 504     if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
 505         ctx++;
 506
 507     if( i_mb_type == B_DIRECT )
 508     {
 509         x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
 510         return;
 511     }
 512     x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
 513
 514     if( i_mb_type == B_8x8 )
 515     {
 516         x264_cabac_encode_decision_noup( cb, 27+3,   1 );
 517         x264_cabac_encode_decision_noup( cb, 27+4,   1 );
 518         x264_cabac_encode_decision( cb, 27+5,   1 );
 519         x264_cabac_encode_decision( cb, 27+5,   1 );
 520         x264_cabac_encode_decision_noup( cb, 27+5,   1 );
 521
 522         /* sub mb type */
 523         for( int i = 0; i < 4; i++ )
 524             x264_cabac_subpartition_b( cb, h->mb.i_sub_partition[i] );
 525
 526         /* ref */
 527         if( h->mb.pic.i_fref[0] > 1 )
 528             for( int i = 0; i < 4; i++ )
 529                 if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
 530                     x264_cabac_ref_b( h, cb, 0, 4*i );
 531
 532         if( h->mb.pic.i_fref[1] > 1 )
 533             for( int i = 0; i < 4; i++ )
 534                 if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
 535                     x264_cabac_ref_b( h, cb, 1, 4*i );
 536
 537         for( int i = 0; i < 4; i++ )
 538             if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
 539                 x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 );
 540
 541         for( int i = 0; i < 4; i++ )
 542             if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
 543                 x264_cabac_mvd( h, cb, 1, 4*i, 2, 2 );
 544     }
 545     else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI )
 546     {
 547         /* All B modes */
 548         static const uint8_t i_mb_bits[9*3] =
 549         {
 550             0x31, 0x29, 0x4, /* L0 L0 */
 551             0x35, 0x2d, 0,   /* L0 L1 */
 552             0x43, 0x63, 0,   /* L0 BI */
 553             0x3d, 0x2f, 0,   /* L1 L0 */
 554             0x39, 0x25, 0x6, /* L1 L1 */
 555             0x53, 0x73, 0,   /* L1 BI */
 556             0x4b, 0x6b, 0,   /* BI L0 */
 557             0x5b, 0x7b, 0,   /* BI L1 */
 558             0x47, 0x67, 0x21 /* BI BI */
 559         };
 560
 561         const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
 562         int bits = i_mb_bits[idx];
 563
 564         x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
 565         x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
 566         if( bits != 1 )
 567         {
 568             x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
 569             x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
 570             x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
 571             if( bits != 1 )
 572                 x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
 573         }
 574
 575         const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
 576         if( h->mb.pic.i_fref[0] > 1 )
 577         {
 578             if( b_list[0][0] )
 579                 x264_cabac_ref_b( h, cb, 0, 0 );
 580             if( b_list[0][1] && h->mb.i_partition != D_16x16 )
 581                 x264_cabac_ref_b( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
 582         }
 583         if( h->mb.pic.i_fref[1] > 1 )
 584         {
 585             if( b_list[1][0] )
 586                 x264_cabac_ref_b( h, cb, 1, 0 );
 587             if( b_list[1][1] && h->mb.i_partition != D_16x16 )
 588                 x264_cabac_ref_b( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
 589         }
 590         for( int i_list = 0; i_list < 2; i_list++ )
 591         {
 592             if( h->mb.i_partition == D_16x16 )
 593             {
 594                 if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 4 );
 595             }
 596             else if( h->mb.i_partition == D_16x8 )
 597             {
 598                 if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 2 );
 599                 if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 8, 4, 2 );
 600             }
 601             else //if( h->mb.i_partition == D_8x16 )
 602             {
 603                 if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 2, 4 );
 604                 if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 4, 2, 4 );
 605             }
 606         }
 607     }
 608     else /* intra */
 609         x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma );
 610 }
 611
 612 static int ALWAYS_INLINE x264_cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc )
 613 {
 614     static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
 615
 616     if( b_dc )
 617     {
 618         i_idx -= LUMA_DC;
 619         if( i_cat == DCT_CHROMA_DC )
 620         {
 621             int i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1 : b_intra;
 622             int i_nzb = h->mb.cache.i_cbp_top  != -1 ? (h->mb.cache.i_cbp_top  >> (8 + i_idx)) & 1 : b_intra;
 623             return base_ctx[i_cat] + 2*i_nzb + i_nza;
 624         }
 625         else
 626         {
 627             int i_nza = (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1;
 628             int i_nzb = (h->mb.cache.i_cbp_top  >> (8 + i_idx)) & 1;
 629             return base_ctx[i_cat] + 2*i_nzb + i_nza;
 630         }
 631     }
 632     else
 633     {
 634         int i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
 635         int i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
 636         if( x264_constant_p(b_intra) && !b_intra )
 637             return base_ctx[i_cat] + ((2*i_nzb + i_nza)&0x7f);
 638         else
 639         {
 640             i_nza &= 0x7f + (b_intra << 7);
 641             i_nzb &= 0x7f + (b_intra << 7);
 642             return base_ctx[i_cat] + 2*!!i_nzb + !!i_nza;
 643         }
 644     }
 645 }
 646
 647 static const uint16_t significant_coeff_flag_offset[2][14] =
 648 {
 649     { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
 650     { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
 651 };
 652 static const uint16_t last_coeff_flag_offset[2][14] =
 653 {
 654     { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748 },
 655     { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757 }
 656 };
 657 static const uint16_t coeff_abs_level_m1_offset[14] =
 658 {
 659     227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
 660 };
 661 #if RDO_SKIP_BS
 662 extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][63];
 663 extern const uint8_t x264_last_coeff_flag_offset_8x8[63];
 664 extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7];
 665 #else
 666 const uint8_t x264_significant_coeff_flag_offset_8x8[2][63] =
 667 {{
 668     0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
 669     4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
 670     7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
 671    12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
 672 },{
 673     0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
 674     6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
 675     9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
 676     9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
 677 }};
 678 const uint8_t x264_last_coeff_flag_offset_8x8[63] =
 679 {
 680     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 681     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 682     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
 683     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
 684 };
 685 const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
 686 #endif
 687
 688 // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
 689 //           4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
 690 /* map node ctx => cabac ctx for level=1 */
 691 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
 692 /* map node ctx => cabac ctx for level>1 */
 693 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
 694 /* 4:2:2 chroma dc uses a slightly different state machine for some reason, also note that
 695  * 4:2:0 chroma dc doesn't use the last state so it has identical output with both arrays. */
 696 static const uint8_t coeff_abs_levelgt1_ctx_chroma_dc[8] = { 5, 5, 5, 5, 6, 7, 8, 8 };
 697 static const uint8_t coeff_abs_level_transition[2][8] = {
 698 /* update node ctx after coding a level=1 */
 699     { 1, 2, 3, 3, 4, 5, 6, 7 },
 700 /* update node ctx after coding a level>1 */
 701     { 4, 4, 4, 4, 5, 6, 7, 7 }
 702 };
 703 static const uint8_t count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63};
 704
 705 #if !RDO_SKIP_BS
 706 static ALWAYS_INLINE void x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc )
 707 {
 708     int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
 709     int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
 710     int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
 711     int coeff_idx = -1, node_ctx = 0;
 712     int last = h->quantf.coeff_last[ctx_block_cat]( l );
 713     const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
 714     dctcoef coeffs[64];
 715
 716 #define WRITE_SIGMAP( sig_off, last_off )\
 717 {\
 718     int i = 0;\
 719     while( 1 )\
 720     {\
 721         if( l[i] )\
 722         {\
 723             coeffs[++coeff_idx] = l[i];\
 724             x264_cabac_encode_decision( cb, ctx_sig + sig_off, 1 );\
 725             if( i == last )\
 726             {\
 727                 x264_cabac_encode_decision( cb, ctx_last + last_off, 1 );\
 728                 break;\
 729             }\
 730             else\
 731                 x264_cabac_encode_decision( cb, ctx_last + last_off, 0 );\
 732         }\
 733         else\
 734             x264_cabac_encode_decision( cb, ctx_sig + sig_off, 0 );\
 735         if( ++i == count_m1 )\
 736         {\
 737             coeffs[++coeff_idx] = l[i];\
 738             break;\
 739         }\
 740     }\
 741 }
 742
 743     if( chroma422dc )
 744     {
 745         int count_m1 = 7;
 746         WRITE_SIGMAP( x264_coeff_flag_offset_chroma_422_dc[i], x264_coeff_flag_offset_chroma_422_dc[i] )
 747     }
 748     else
 749     {
 750         int count_m1 = count_cat_m1[ctx_block_cat];
 751         if( count_m1 == 63 )
 752         {
 753             const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
 754             WRITE_SIGMAP( sig_offset[i], x264_last_coeff_flag_offset_8x8[i] )
 755         }
 756         else
 757             WRITE_SIGMAP( i, i )
 758     }
 759
 760     do
 761     {
 762         /* write coeff_abs - 1 */
 763         int coeff = coeffs[coeff_idx];
 764         int abs_coeff = abs(coeff);
 765         int coeff_sign = coeff >> 31;
 766         int ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
 767
 768         if( abs_coeff > 1 )
 769         {
 770             x264_cabac_encode_decision( cb, ctx, 1 );
 771             ctx = levelgt1_ctx[node_ctx] + ctx_level;
 772             for( int i = X264_MIN( abs_coeff, 15 ) - 2; i > 0; i-- )
 773                 x264_cabac_encode_decision( cb, ctx, 1 );
 774             if( abs_coeff < 15 )
 775                 x264_cabac_encode_decision( cb, ctx, 0 );
 776             else
 777                 x264_cabac_encode_ue_bypass( cb, 0, abs_coeff - 15 );
 778
 779             node_ctx = coeff_abs_level_transition[1][node_ctx];
 780         }
 781         else
 782         {
 783             x264_cabac_encode_decision( cb, ctx, 0 );
 784             node_ctx = coeff_abs_level_transition[0][node_ctx];
 785         }
 786
 787         x264_cabac_encode_bypass( cb, coeff_sign );
 788     } while( --coeff_idx >= 0 );
 789 }
 790 static void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 791 {
 792     x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 );
 793 }
 794 static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 795 {
 796     /* Template a version specifically for chroma 4:2:2 DC in order to avoid
 797      * slowing down everything else due to the added complexity. */
 798     x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 1 );
 799 }
 800 #define x264_cabac_block_residual_8x8( h, cb, cat, l ) x264_cabac_block_residual( h, cb, cat, l )
 801 #else
 802
 803 /* Faster RDO by merging sigmap and level coding. Note that for 8x8dct and chroma 4:2:2 dc this is
 804  * slightly incorrect because the sigmap is not reversible (contexts are repeated). However, there
 805  * is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */
 806 static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
 807 {
 808     const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
 809     int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
 810     int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
 811     int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
 812     int last = h->quantf.coeff_last[ctx_block_cat]( l );
 813     int coeff_abs = abs(l[last]);
 814     int ctx = coeff_abs_level1_ctx[0] + ctx_level;
 815     int node_ctx;
 816     const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
 817
 818     if( last != (b_8x8 ? 63 : chroma422dc ? 7 : count_cat_m1[ctx_block_cat]) )
 819     {
 820         x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[last] :
 821                                     chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
 822         x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[last] :
 823                                     chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
 824     }
 825
 826     if( coeff_abs > 1 )
 827     {
 828         x264_cabac_encode_decision( cb, ctx, 1 );
 829         ctx = levelgt1_ctx[0] + ctx_level;
 830         if( coeff_abs < 15 )
 831         {
 832             cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
 833             cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
 834         }
 835         else
 836         {
 837             cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
 838             cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
 839             x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
 840         }
 841         node_ctx = coeff_abs_level_transition[1][0];
 842     }
 843     else
 844     {
 845         x264_cabac_encode_decision( cb, ctx, 0 );
 846         node_ctx = coeff_abs_level_transition[0][0];
 847         x264_cabac_encode_bypass( cb, 0 ); // sign
 848     }
 849
 850     for( int i = last-1 ; i >= 0; i-- )
 851     {
 852         if( l[i] )
 853         {
 854             coeff_abs = abs(l[i]);
 855             x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
 856                                         chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 1 );
 857             x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[i] :
 858                                         chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
 859             ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
 860
 861             if( coeff_abs > 1 )
 862             {
 863                 x264_cabac_encode_decision( cb, ctx, 1 );
 864                 ctx = levelgt1_ctx[node_ctx] + ctx_level;
 865                 if( coeff_abs < 15 )
 866                 {
 867                     cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
 868                     cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
 869                 }
 870                 else
 871                 {
 872                     cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
 873                     cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
 874                     x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
 875                 }
 876                 node_ctx = coeff_abs_level_transition[1][node_ctx];
 877             }
 878             else
 879             {
 880                 x264_cabac_encode_decision( cb, ctx, 0 );
 881                 node_ctx = coeff_abs_level_transition[0][node_ctx];
 882                 x264_cabac_encode_bypass( cb, 0 );
 883             }
 884         }
 885         else
 886             x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
 887                                         chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
 888     }
 889 }
 890
 891 static void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 892 {
 893     x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 );
 894 }
 895 static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 896 {
 897     x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 );
 898 }
 899 static void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 900 {
 901     x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 );
 902 }
 903 #endif
 904
 905 #define x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, b_dc, name )\
 906 do\
 907 {\
 908     int ctxidxinc = x264_cabac_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra, b_dc );\
 909     if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
 910     {\
 911         x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
 912         x264_cabac_block_residual##name( h, cb, ctx_block_cat, l );\
 913     }\
 914     else\
 915         x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
 916 } while(0)
 917
 918 #define x264_cabac_block_residual_dc_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
 919     x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 1, )
 920
 921 #define x264_cabac_block_residual_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
 922     x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, )
 923
 924 #define x264_cabac_block_residual_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
 925     x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, _8x8 )
 926
 927 #define x264_cabac_block_residual_422_dc_cbf( h, cb, ch, b_intra )\
 928     x264_cabac_block_residual_cbf_internal( h, cb, DCT_CHROMA_DC, CHROMA_DC+(ch), h->dct.chroma_dc[ch], b_intra, 1, _422_dc )
 929
 930 static ALWAYS_INLINE void x264_macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma )
 931 {
 932     const int i_mb_type = h->mb.i_type;
 933
 934 #if !RDO_SKIP_BS
 935     const int i_mb_pos_start = x264_cabac_pos( cb );
 936     int       i_mb_pos_tex;
 937
 938     if( SLICE_MBAFF &&
 939         (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
 940     {
 941         x264_cabac_field_decoding_flag( h, cb );
 942     }
 943 #endif
 944
 945     if( h->sh.i_type == SLICE_TYPE_P )
 946         x264_cabac_mb_header_p( h, cb, i_mb_type, chroma );
 947     else if( h->sh.i_type == SLICE_TYPE_B )
 948         x264_cabac_mb_header_b( h, cb, i_mb_type, chroma );
 949     else //if( h->sh.i_type == SLICE_TYPE_I )
 950         x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_I, chroma );
 951
 952 #if !RDO_SKIP_BS
 953     i_mb_pos_tex = x264_cabac_pos( cb );
 954     h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
 955
 956     if( i_mb_type == I_PCM )
 957     {
 958         bs_t s;
 959         bs_init( &s, cb->p, cb->p_end - cb->p );
 960
 961         for( int p = 0; p < plane_count; p++ )
 962             for( int i = 0; i < 256; i++ )
 963                 bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] );
 964         if( chroma )
 965             for( int ch = 1; ch < 3; ch++ )
 966                 for( int i = 0; i < 16>>CHROMA_V_SHIFT; i++ )
 967                     for( int j = 0; j < 8; j++ )
 968                         bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
 969
 970         bs_flush( &s );
 971         cb->p = s.p;
 972         x264_cabac_encode_init_core( cb );
 973
 974         h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
 975         return;
 976     }
 977 #endif
 978
 979     if( i_mb_type != I_16x16 )
 980     {
 981         x264_cabac_cbp_luma( h, cb );
 982         if( chroma )
 983             x264_cabac_cbp_chroma( h, cb );
 984     }
 985
 986     if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
 987     {
 988         x264_cabac_transform_size( h, cb );
 989     }
 990
 991     if( h->mb.i_cbp_luma || (chroma && h->mb.i_cbp_chroma) || i_mb_type == I_16x16 )
 992     {
 993         const int b_intra = IS_INTRA( i_mb_type );
 994         x264_cabac_qp_delta( h, cb );
 995
 996         /* write residual */
 997         if( i_mb_type == I_16x16 )
 998         {
 999             /* DC Luma */
1000             for( int p = 0; p < plane_count; p++ )
1001             {
1002                 x264_cabac_block_residual_dc_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 );
1003
1004                 /* AC Luma */
1005                 if( h->mb.i_cbp_luma )
1006                     for( int i = p*16; i < p*16+16; i++ )
1007                         x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 );
1008             }
1009         }
1010         else if( h->mb.b_transform_8x8 )
1011         {
1012             if( plane_count == 3 )
1013             {
1014                 ALIGNED_4( uint8_t nnzbak[3][8] );
1015
1016 /* Stupid nnz munging in the case that neighbors don't have
1017  * 8x8 transform enabled. */
1018 #define BACKUP( dst, src, res )\
1019     dst = src;\
1020     src = res;
1021
1022 #define RESTORE( dst, src, res )\
1023     src = dst;
1024
1025 #define MUNGE_8x8_NNZ( MUNGE )\
1026 if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[0]] )\
1027 {\
1028     MUNGE( nnzbak[0][0], h->mb.cache.non_zero_count[x264_scan8[16*0+ 0] - 1], 0x80 )\
1029     MUNGE( nnzbak[0][1], h->mb.cache.non_zero_count[x264_scan8[16*0+ 2] - 1], 0x80 )\
1030     MUNGE( nnzbak[1][0], h->mb.cache.non_zero_count[x264_scan8[16*1+ 0] - 1], 0x80 )\
1031     MUNGE( nnzbak[1][1], h->mb.cache.non_zero_count[x264_scan8[16*1+ 2] - 1], 0x80 )\
1032     MUNGE( nnzbak[2][0], h->mb.cache.non_zero_count[x264_scan8[16*2+ 0] - 1], 0x80 )\
1033     MUNGE( nnzbak[2][1], h->mb.cache.non_zero_count[x264_scan8[16*2+ 2] - 1], 0x80 )\
1034 }\
1035 if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[1]] )\
1036 {\
1037     MUNGE( nnzbak[0][2], h->mb.cache.non_zero_count[x264_scan8[16*0+ 8] - 1], 0x80 )\
1038     MUNGE( nnzbak[0][3], h->mb.cache.non_zero_count[x264_scan8[16*0+10] - 1], 0x80 )\
1039     MUNGE( nnzbak[1][2], h->mb.cache.non_zero_count[x264_scan8[16*1+ 8] - 1], 0x80 )\
1040     MUNGE( nnzbak[1][3], h->mb.cache.non_zero_count[x264_scan8[16*1+10] - 1], 0x80 )\
1041     MUNGE( nnzbak[2][2], h->mb.cache.non_zero_count[x264_scan8[16*2+ 8] - 1], 0x80 )\
1042     MUNGE( nnzbak[2][3], h->mb.cache.non_zero_count[x264_scan8[16*2+10] - 1], 0x80 )\
1043 }\
1044 if( (h->mb.i_neighbour & MB_TOP) && !h->mb.mb_transform_size[h->mb.i_mb_top_xy] )\
1045 {\
1046     MUNGE( M32( &nnzbak[0][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*0] - 8] ), 0x80808080U )\
1047     MUNGE( M32( &nnzbak[1][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*1] - 8] ), 0x80808080U )\
1048     MUNGE( M32( &nnzbak[2][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*2] - 8] ), 0x80808080U )\
1049 }
1050
1051                 MUNGE_8x8_NNZ( BACKUP )
1052
1053                 for( int p = 0; p < 3; p++ )
1054                     for( int i = 0; i < 4; i++ )
1055                         if( h->mb.i_cbp_luma & ( 1 << i ) )
1056                             x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
1057
1058                 MUNGE_8x8_NNZ( RESTORE )
1059             }
1060             else
1061             {
1062                 for( int i = 0; i < 4; i++ )
1063                     if( h->mb.i_cbp_luma & ( 1 << i ) )
1064                         x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
1065             }
1066         }
1067         else
1068         {
1069             for( int p = 0; p < plane_count; p++ )
1070                 for( int i = 0; i < 16; i++ )
1071                     if( h->mb.i_cbp_luma & ( 1 << ( i >> 2 ) ) )
1072                         x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+p*16, h->dct.luma4x4[i+p*16], b_intra );
1073         }
1074
1075         if( chroma && h->mb.i_cbp_chroma ) /* Chroma DC residual present */
1076         {
1077             if( CHROMA_FORMAT == CHROMA_422 )
1078             {
1079                 x264_cabac_block_residual_422_dc_cbf( h, cb, 0, b_intra );
1080                 x264_cabac_block_residual_422_dc_cbf( h, cb, 1, b_intra );
1081             }
1082             else
1083             {
1084                 x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra );
1085                 x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra );
1086             }
1087
1088             if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */
1089             {
1090                 int step = 8 << CHROMA_V_SHIFT;
1091                 for( int i = 16; i < 3*16; i += step )
1092                     for( int j = i; j < i+4; j++ )
1093                         x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, b_intra );
1094             }
1095         }
1096     }
1097
1098 #if !RDO_SKIP_BS
1099     h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
1100 #endif
1101 }
1102
1103 void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
1104 {
1105     if( CHROMA444 )
1106         x264_macroblock_write_cabac_internal( h, cb, 3, 0 );
1107     else
1108         x264_macroblock_write_cabac_internal( h, cb, 1, 1 );
1109 }
1110
1111 #if RDO_SKIP_BS
1112 /*****************************************************************************
1113  * RD only; doesn't generate a valid bitstream
1114  * doesn't write cbp or chroma dc (I don't know how much this matters)
1115  * doesn't write ref (never varies between calls, so no point in doing so)
1116  * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
1117  * works on all partition sizes except 16x16
1118  *****************************************************************************/
1119 static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
1120 {
1121     const int i_mb_type = h->mb.i_type;
1122     int b_8x16 = h->mb.i_partition == D_8x16;
1123     int plane_count = CHROMA444 ? 3 : 1;
1124
1125     if( i_mb_type == P_8x8 )
1126     {
1127         x264_cabac_8x8_mvd( h, cb, i8 );
1128         x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i8] );
1129     }
1130     else if( i_mb_type == P_L0 )
1131         x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1132     else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
1133     {
1134         if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1135         if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1136     }
1137     else //if( i_mb_type == B_8x8 )
1138     {
1139         if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
1140             x264_cabac_mvd( h, cb, 0, 4*i8, 2, 2 );
1141         if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
1142             x264_cabac_mvd( h, cb, 1, 4*i8, 2, 2 );
1143     }
1144
1145     for( int j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
1146     {
1147         if( h->mb.i_cbp_luma & (1 << i8) )
1148         {
1149             if( h->mb.b_transform_8x8 )
1150             {
1151                 if( CHROMA444 )
1152                     for( int p = 0; p < 3; p++ )
1153                         x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 );
1154                 else
1155                     x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
1156             }
1157             else
1158                 for( int p = 0; p < plane_count; p++ )
1159                     for( int i4 = 0; i4 < 4; i4++ )
1160                         x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 );
1161         }
1162
1163         if( h->mb.i_cbp_chroma )
1164         {
1165             if( CHROMA_FORMAT == CHROMA_422 )
1166             {
1167                 int offset = (5*i8) & 0x09;
1168                 x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1, 0 );
1169                 x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1, 0 );
1170                 x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1, 0 );
1171                 x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1, 0 );
1172             }
1173             else
1174             {
1175                 x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
1176                 x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 );
1177             }
1178         }
1179
1180         i8 += x264_pixel_size[i_pixel].h >> 3;
1181     }
1182 }
1183
1184 static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
1185 {
1186     int b_8x4 = i_pixel == PIXEL_8x4;
1187     int plane_count = CHROMA444 ? 3 : 1;
1188     if( i_pixel == PIXEL_4x4 )
1189         x264_cabac_mvd( h, cb, 0, i4, 1, 1 );
1190     else
1191         x264_cabac_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
1192     for( int p = 0; p < plane_count; p++ )
1193     {
1194         x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 );
1195         if( i_pixel != PIXEL_4x4 )
1196             x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 );
1197     }
1198 }
1199
1200 static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
1201 {
1202     const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
1203     i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1204     x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
1205     x264_cabac_cbp_luma( h, cb );
1206     if( h->mb.i_cbp_luma & (1 << i8) )
1207     {
1208         if( CHROMA444 )
1209             for( int p = 0; p < 3; p++ )
1210                 x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 );
1211         else
1212             x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
1213     }
1214 }
1215
1216 static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
1217 {
1218     const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
1219     int plane_count = CHROMA444 ? 3 : 1;
1220     i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1221     x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
1222     for( int p = 0; p < plane_count; p++ )
1223         x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 );
1224 }
1225
1226 static void x264_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
1227 {
1228     x264_cabac_intra_chroma_pred_mode( h, cb );
1229     x264_cabac_cbp_chroma( h, cb );
1230     if( h->mb.i_cbp_chroma )
1231     {
1232         if( CHROMA_FORMAT == CHROMA_422 )
1233         {
1234             x264_cabac_block_residual_422_dc_cbf( h, cb, 0, 1 );
1235             x264_cabac_block_residual_422_dc_cbf( h, cb, 1, 1 );
1236         }
1237         else
1238         {
1239             x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 );
1240             x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 );
1241         }
1242
1243         if( h->mb.i_cbp_chroma == 2 )
1244         {
1245             int step = 8 << CHROMA_V_SHIFT;
1246             for( int i = 16; i < 3*16; i += step )
1247                 for( int j = i; j < i+4; j++ )
1248                     x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, 1 );
1249         }
1250     }
1251 }
1252 #endif