X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=encoder%2Fcabac.c;h=27e6868cf506c8ee88a1fb643cb73229bd816022;hb=18eed0b9ee1314cc3ba9d16c0e44401f62aba624;hp=c25c65cbed3468596c2bf4d6a0f1ddf3ce948817;hpb=30da25a99e24e5c1ff5972b7f5c22c4be2a944b1;p=x264 diff --git a/encoder/cabac.c b/encoder/cabac.c index c25c65cb..27e6868c 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -1,10 +1,11 @@ /***************************************************************************** * cabac.c: h264 encoder library ***************************************************************************** - * Copyright (C) 2003 Laurent Aimar - * $Id: cabac.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $ + * Copyright (C) 2003-2008 x264 project * * Authors: Laurent Aimar + * Loren Merritt + * Fiona Glaser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,60 +19,47 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. *****************************************************************************/ #include "common/common.h" #include "macroblock.h" -static inline void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val ) -{ -#ifdef RDO_SKIP_BS - cb->f8_bits_encoded += ( bs_size_ue( val + (1<= (1<> k)&0x01 ); +#ifndef RDO_SKIP_BS +#define RDO_SKIP_BS 0 #endif -} static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type, int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 ) { if( i_mb_type == I_4x4 || i_mb_type == I_8x8 ) { - x264_cabac_encode_decision( cb, ctx0, 0 ); + x264_cabac_encode_decision_noup( cb, ctx0, 0 ); } +#if !RDO_SKIP_BS else if( i_mb_type == I_PCM ) { - x264_cabac_encode_decision( cb, ctx0, 1 ); + x264_cabac_encode_decision_noup( cb, ctx0, 1 ); x264_cabac_encode_flush( h, cb ); } +#endif else { int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode]; - x264_cabac_encode_decision( cb, ctx0, 1 ); + x264_cabac_encode_decision_noup( cb, ctx0, 1 ); x264_cabac_encode_terminal( cb ); - x264_cabac_encode_decision( cb, ctx1, ( h->mb.i_cbp_luma == 0 ? 0 : 1 )); + x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma ); if( h->mb.i_cbp_chroma == 0 ) - { - x264_cabac_encode_decision( cb, ctx2, 0 ); - } + x264_cabac_encode_decision_noup( cb, ctx2, 0 ); else { x264_cabac_encode_decision( cb, ctx2, 1 ); - x264_cabac_encode_decision( cb, ctx3, ( h->mb.i_cbp_chroma == 1 ? 0 : 1 ) ); + x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 ); } - x264_cabac_encode_decision( cb, ctx4, ( (i_pred / 2) ? 1 : 0 )); - x264_cabac_encode_decision( cb, ctx5, ( (i_pred % 2) ? 1 : 0 )); + x264_cabac_encode_decision( cb, ctx4, i_pred>>1 ); + x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 ); } } @@ -82,20 +70,16 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) if( h->sh.b_mbaff && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) ) { - x264_cabac_encode_decision( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced ); + x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced ); } if( h->sh.i_type == SLICE_TYPE_I ) { int ctx = 0; if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 ) - { ctx++; - } if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 ) - { ctx++; - } x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 ); } @@ -104,73 +88,53 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) /* prefix: 14, suffix: 17 */ if( i_mb_type == P_L0 ) { - if( h->mb.i_partition == D_16x16 ) - { - x264_cabac_encode_decision( cb, 14, 0 ); - x264_cabac_encode_decision( cb, 15, 0 ); - x264_cabac_encode_decision( cb, 16, 0 ); - } - else if( h->mb.i_partition == D_16x8 ) - { - x264_cabac_encode_decision( cb, 14, 0 ); - x264_cabac_encode_decision( cb, 15, 1 ); - x264_cabac_encode_decision( cb, 17, 1 ); - } - else if( h->mb.i_partition == D_8x16 ) - { - x264_cabac_encode_decision( cb, 14, 0 ); - x264_cabac_encode_decision( cb, 15, 1 ); - x264_cabac_encode_decision( cb, 17, 0 ); - } + x264_cabac_encode_decision_noup( cb, 14, 0 ); + x264_cabac_encode_decision_noup( cb, 15, h->mb.i_partition != D_16x16 ); + x264_cabac_encode_decision_noup( cb, 17-(h->mb.i_partition == D_16x16), h->mb.i_partition == D_16x8 ); } else if( i_mb_type == P_8x8 ) { - x264_cabac_encode_decision( cb, 14, 0 ); - x264_cabac_encode_decision( cb, 15, 0 ); - x264_cabac_encode_decision( cb, 16, 1 ); + x264_cabac_encode_decision_noup( cb, 14, 0 ); + x264_cabac_encode_decision_noup( cb, 15, 0 ); + x264_cabac_encode_decision_noup( cb, 16, 1 ); } else /* intra */ { /* prefix */ - x264_cabac_encode_decision( cb, 14, 1 ); + x264_cabac_encode_decision_noup( cb, 14, 1 ); /* suffix */ x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 ); } } - else if( h->sh.i_type == SLICE_TYPE_B ) + else //if( h->sh.i_type == SLICE_TYPE_B ) { int ctx = 0; if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT ) - { ctx++; - } if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT ) - { ctx++; - } if( i_mb_type == B_DIRECT ) { - x264_cabac_encode_decision( cb, 27+ctx, 0 ); + x264_cabac_encode_decision_noup( cb, 27+ctx, 0 ); + return; } - else if( i_mb_type == B_8x8 ) - { - x264_cabac_encode_decision( cb, 27+ctx, 1 ); - x264_cabac_encode_decision( cb, 27+3, 1 ); - x264_cabac_encode_decision( cb, 27+4, 1 ); + x264_cabac_encode_decision_noup( cb, 27+ctx, 1 ); + if( i_mb_type == B_8x8 ) + { + x264_cabac_encode_decision_noup( cb, 27+3, 1 ); + x264_cabac_encode_decision_noup( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); - x264_cabac_encode_decision( cb, 27+5, 1 ); + x264_cabac_encode_decision_noup( cb, 27+5, 1 ); } else if( IS_INTRA( i_mb_type ) ) { /* prefix */ - x264_cabac_encode_decision( cb, 27+ctx, 1 ); - x264_cabac_encode_decision( cb, 27+3, 1 ); - x264_cabac_encode_decision( cb, 27+4, 1 ); - + x264_cabac_encode_decision_noup( cb, 27+3, 1 ); + x264_cabac_encode_decision_noup( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 0 ); x264_cabac_encode_decision( cb, 27+5, 1 ); @@ -180,65 +144,48 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) } else { - static const int i_mb_len[9*3] = - { - 6, 6, 3, /* L0 L0 */ - 6, 6, 0, /* L0 L1 */ - 7, 7, 0, /* L0 BI */ - 6, 6, 0, /* L1 L0 */ - 6, 6, 3, /* L1 L1 */ - 7, 7, 0, /* L1 BI */ - 7, 7, 0, /* BI L0 */ - 7, 7, 0, /* BI L1 */ - 7, 7, 6, /* BI BI */ - }; - static const int i_mb_bits[9*3][7] = + static const uint8_t i_mb_bits[9*3] = { - { 1,1,0,0,0,1 }, { 1,1,0,0,1,0, }, { 1,0,0 }, /* L0 L0 */ - { 1,1,0,1,0,1 }, { 1,1,0,1,1,0 }, {0}, /* L0 L1 */ - { 1,1,1,0,0,0,0 }, { 1,1,1,0,0,0,1 }, {0}, /* L0 BI */ - { 1,1,0,1,1,1 }, { 1,1,1,1,1,0 }, {0}, /* L1 L0 */ - { 1,1,0,0,1,1 }, { 1,1,0,1,0,0 }, { 1,0,1 }, /* L1 L1 */ - { 1,1,1,0,0,1,0 }, { 1,1,1,0,0,1,1 }, {0}, /* L1 BI */ - { 1,1,1,0,1,0,0 }, { 1,1,1,0,1,0,1 }, {0}, /* BI L0 */ - { 1,1,1,0,1,1,0 }, { 1,1,1,0,1,1,1 }, {0}, /* BI L1 */ - { 1,1,1,1,0,0,0 }, { 1,1,1,1,0,0,1 }, { 1,1,0,0,0,0 }, /* BI BI */ + 0x31, 0x29, 0x4, /* L0 L0 */ + 0x35, 0x2d, 0, /* L0 L1 */ + 0x43, 0x63, 0, /* L0 BI */ + 0x3d, 0x2f, 0, /* L1 L0 */ + 0x39, 0x25, 0x6, /* L1 L1 */ + 0x53, 0x73, 0, /* L1 BI */ + 0x4b, 0x6b, 0, /* BI L0 */ + 0x5b, 0x7b, 0, /* BI L1 */ + 0x47, 0x67, 0x21 /* BI BI */ }; const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8); - int i; + int bits = i_mb_bits[idx]; - x264_cabac_encode_decision( cb, 27+ctx, i_mb_bits[idx][0] ); - x264_cabac_encode_decision( cb, 27+3, i_mb_bits[idx][1] ); - x264_cabac_encode_decision( cb, 27+5-i_mb_bits[idx][1], i_mb_bits[idx][2] ); - for( i = 3; i < i_mb_len[idx]; i++ ) - x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] ); + x264_cabac_encode_decision_noup( cb, 27+3, bits&1 ); + x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2; + if( bits != 1 ) + { + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1; + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1; + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1; + if( bits != 1 ) + x264_cabac_encode_decision_noup( cb, 27+5, bits&1 ); + } } } - else - { - x264_log(h, X264_LOG_ERROR, "unknown SLICE_TYPE unsupported in x264_macroblock_write_cabac\n" ); - } } static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode ) { if( i_pred == i_mode ) - { - /* b_prev_intra4x4_pred_mode */ x264_cabac_encode_decision( cb, 68, 1 ); - } else { - /* b_prev_intra4x4_pred_mode */ x264_cabac_encode_decision( cb, 68, 0 ); if( i_mode > i_pred ) - { i_mode--; - } x264_cabac_encode_decision( cb, 69, (i_mode )&0x01 ); x264_cabac_encode_decision( cb, 69, (i_mode >> 1)&0x01 ); - x264_cabac_encode_decision( cb, 69, (i_mode >> 2)&0x01 ); + x264_cabac_encode_decision( cb, 69, (i_mode >> 2) ); } } @@ -249,119 +196,68 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb ) /* No need to test for I4x4 or I_16x16 as cache_save handle that */ if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 ) - { ctx++; - } if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 ) - { ctx++; - } - x264_cabac_encode_decision( cb, 64 + ctx, i_mode > 0 ); + x264_cabac_encode_decision_noup( cb, 64 + ctx, i_mode > 0 ); if( i_mode > 0 ) { x264_cabac_encode_decision( cb, 64 + 3, i_mode > 1 ); if( i_mode > 1 ) - { - x264_cabac_encode_decision( cb, 64 + 3, i_mode > 2 ); - } + x264_cabac_encode_decision_noup( cb, 64 + 3, i_mode > 2 ); } } static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb ) { - /* TODO: clean up and optimize */ - int i8x8; - for( i8x8 = 0; i8x8 < 4; i8x8++ ) - { - int i_mba_xy = -1; - int i_mbb_xy = -1; - int x = block_idx_x[4*i8x8]; - int y = block_idx_y[4*i8x8]; - int ctx = 0; - - if( x > 0 ) - i_mba_xy = h->mb.i_mb_xy; - else if( h->mb.i_neighbour & MB_LEFT ) - i_mba_xy = h->mb.i_mb_xy - 1; - - if( y > 0 ) - i_mbb_xy = h->mb.i_mb_xy; - else if( h->mb.i_neighbour & MB_TOP ) - i_mbb_xy = h->mb.i_mb_top_xy; - - - /* No need to test for PCM and SKIP */ - if( i_mba_xy >= 0 ) - { - const int i8x8a = block_idx_xy[(x-1)&0x03][y]/4; - if( ((h->mb.cbp[i_mba_xy] >> i8x8a)&0x01) == 0 ) - { - ctx++; - } - } - - if( i_mbb_xy >= 0 ) - { - const int i8x8b = block_idx_xy[x][(y-1)&0x03]/4; - if( ((h->mb.cbp[i_mbb_xy] >> i8x8b)&0x01) == 0 ) - { - ctx += 2; - } - } - - x264_cabac_encode_decision( cb, 73 + ctx, (h->mb.i_cbp_luma >> i8x8)&0x01 ); - } + int cbp = h->mb.i_cbp_luma; + int cbp_l = h->mb.cache.i_cbp_left; + int cbp_t = h->mb.cache.i_cbp_top; + x264_cabac_encode_decision ( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (cbp >> 0) & 1 ); + x264_cabac_encode_decision ( cb, 76 - ((cbp >> 0) & 1) - ((cbp_t >> 2) & 2), (cbp >> 1) & 1 ); + x264_cabac_encode_decision ( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp << 1) & 2), (cbp >> 2) & 1 ); + x264_cabac_encode_decision_noup( cb, 76 - ((cbp >> 2) & 1) - ((cbp >> 0) & 2), (cbp >> 3) & 1 ); } static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb ) { - int cbp_a = -1; - int cbp_b = -1; - int ctx; - - /* No need to test for SKIP/PCM */ - if( h->mb.i_neighbour & MB_LEFT ) - { - cbp_a = (h->mb.cbp[h->mb.i_mb_xy - 1] >> 4)&0x3; - } - - if( h->mb.i_neighbour & MB_TOP ) - { - cbp_b = (h->mb.cbp[h->mb.i_mb_top_xy] >> 4)&0x3; - } + int cbp_a = h->mb.cache.i_cbp_left & 0x30; + int cbp_b = h->mb.cache.i_cbp_top & 0x30; + int ctx = 0; - ctx = 0; - if( cbp_a > 0 ) ctx++; - if( cbp_b > 0 ) ctx += 2; + if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++; + if( cbp_b && h->mb.cache.i_cbp_top != -1 ) ctx+=2; if( h->mb.i_cbp_chroma == 0 ) - { - x264_cabac_encode_decision( cb, 77 + ctx, 0 ); - } + x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 ); else { - x264_cabac_encode_decision( cb, 77 + ctx, 1 ); + x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 ); ctx = 4; - if( cbp_a == 2 ) ctx++; - if( cbp_b == 2 ) ctx += 2; - x264_cabac_encode_decision( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 ); + if( cbp_a == 0x20 ) ctx++; + if( cbp_b == 0x20 ) ctx += 2; + x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 ); } } -/* TODO check it with != qp per mb */ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb ) { - int i_mbn_xy = h->mb.i_mb_prev_xy; int i_dqp = h->mb.i_qp - h->mb.i_last_qp; int ctx; - /* No need to test for PCM / SKIP */ - if( h->mb.i_last_dqp && - ( h->mb.type[i_mbn_xy] == I_16x16 || (h->mb.cbp[i_mbn_xy]&0x3f) ) ) - ctx = 1; - else - ctx = 0; + /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */ + if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] ) + { +#if !RDO_SKIP_BS + h->mb.i_qp = h->mb.i_last_qp; +#endif + i_dqp = 0; + } + + /* Since, per the above, empty-CBP I16x16 blocks never have delta quants, + * we don't have to check for them. */ + ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy]; if( i_dqp != 0 ) { @@ -369,106 +265,69 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb ) /* dqp is interpreted modulo 52 */ if( val >= 51 && val != 52 ) val = 103 - val; - while( val-- ) + do { x264_cabac_encode_decision( cb, 60 + ctx, 1 ); - if( ctx < 2 ) - ctx = 2; - else - ctx = 3; - } + ctx = 2+(ctx>>1); + } while( --val ); } - x264_cabac_encode_decision( cb, 60 + ctx, 0 ); + x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 ); } +#if !RDO_SKIP_BS void x264_cabac_mb_skip( x264_t *h, int b_skip ) { - int ctx = 0; - - if( h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left ) ) - { - ctx++; - } - if( h->mb.i_mb_type_top >= 0 && !IS_SKIP( h->mb.i_mb_type_top ) ) - { - ctx++; - } - - ctx += (h->sh.i_type == SLICE_TYPE_P) ? 11 : 24; + int ctx = (h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left )) + + (h->mb.i_mb_type_top >= 0 && !IS_SKIP( h->mb.i_mb_type_top )) + + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24); x264_cabac_encode_decision( &h->cabac, ctx, b_skip ); } +#endif static inline void x264_cabac_mb_sub_p_partition( x264_cabac_t *cb, int i_sub ) { if( i_sub == D_L0_8x8 ) { x264_cabac_encode_decision( cb, 21, 1 ); + return; } - else if( i_sub == D_L0_8x4 ) - { - x264_cabac_encode_decision( cb, 21, 0 ); + x264_cabac_encode_decision( cb, 21, 0 ); + if( i_sub == D_L0_8x4 ) x264_cabac_encode_decision( cb, 22, 0 ); - } - else if( i_sub == D_L0_4x8 ) - { - x264_cabac_encode_decision( cb, 21, 0 ); - x264_cabac_encode_decision( cb, 22, 1 ); - x264_cabac_encode_decision( cb, 23, 1 ); - } - else if( i_sub == D_L0_4x4 ) + else { - x264_cabac_encode_decision( cb, 21, 0 ); x264_cabac_encode_decision( cb, 22, 1 ); - x264_cabac_encode_decision( cb, 23, 0 ); + x264_cabac_encode_decision( cb, 23, i_sub == D_L0_4x8 ); } } -static inline void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub ) +static ALWAYS_INLINE void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub ) { -#define WRITE_SUB_3(a,b,c) {\ - x264_cabac_encode_decision( cb, 36, a );\ - x264_cabac_encode_decision( cb, 37, b );\ - x264_cabac_encode_decision( cb, 39, c );\ - } -#define WRITE_SUB_5(a,b,c,d,e) {\ - x264_cabac_encode_decision( cb, 36, a );\ - x264_cabac_encode_decision( cb, 37, b );\ - x264_cabac_encode_decision( cb, 38, c );\ - x264_cabac_encode_decision( cb, 39, d );\ - x264_cabac_encode_decision( cb, 39, e );\ - } -#define WRITE_SUB_6(a,b,c,d,e,f) {\ - WRITE_SUB_5(a,b,c,d,e)\ - x264_cabac_encode_decision( cb, 39, f );\ + if( i_sub == D_DIRECT_8x8 ) + { + x264_cabac_encode_decision( cb, 36, 0 ); + return; } - - switch( i_sub ) + x264_cabac_encode_decision( cb, 36, 1 ); + if( i_sub == D_BI_8x8 ) { - case D_DIRECT_8x8: - x264_cabac_encode_decision( cb, 36, 0 ); - break; - case D_L0_8x8: WRITE_SUB_3(1,0,0); break; - case D_L1_8x8: WRITE_SUB_3(1,0,1); break; - case D_BI_8x8: WRITE_SUB_5(1,1,0,0,0); break; - case D_L0_8x4: WRITE_SUB_5(1,1,0,0,1); break; - case D_L0_4x8: WRITE_SUB_5(1,1,0,1,0); break; - case D_L1_8x4: WRITE_SUB_5(1,1,0,1,1); break; - case D_L1_4x8: WRITE_SUB_6(1,1,1,0,0,0); break; - case D_BI_8x4: WRITE_SUB_6(1,1,1,0,0,1); break; - case D_BI_4x8: WRITE_SUB_6(1,1,1,0,1,0); break; - case D_L0_4x4: WRITE_SUB_6(1,1,1,0,1,1); break; - case D_L1_4x4: WRITE_SUB_5(1,1,1,1,0); break; - case D_BI_4x4: WRITE_SUB_5(1,1,1,1,1); break; + x264_cabac_encode_decision( cb, 37, 1 ); + x264_cabac_encode_decision( cb, 38, 0 ); + x264_cabac_encode_decision( cb, 39, 0 ); + x264_cabac_encode_decision( cb, 39, 0 ); + return; } + x264_cabac_encode_decision( cb, 37, 0 ); + x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 ); } -static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb ) +static ALWAYS_INLINE void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb ) { int ctx = 399 + h->mb.cache.i_neighbour_transform_size; - x264_cabac_encode_decision( cb, ctx, h->mb.b_transform_8x8 ); + x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 ); } -static inline void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx ) +static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx ) { const int i8 = x264_scan8[idx]; const int i_refa = h->mb.cache.ref[i_list][i8 - 1]; @@ -476,217 +335,192 @@ static inline void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, i int i_ref = h->mb.cache.ref[i_list][i8]; int ctx = 0; - if( i_refa > 0 && !h->mb.cache.skip[i8 - 1]) + if( i_refa > 0 && !h->mb.cache.skip[i8 - 1] ) ctx++; - if( i_refb > 0 && !h->mb.cache.skip[i8 - 8]) + if( i_refb > 0 && !h->mb.cache.skip[i8 - 8] ) ctx += 2; while( i_ref > 0 ) { x264_cabac_encode_decision( cb, 54 + ctx, 1 ); - if( ctx < 4 ) - ctx = 4; - else - ctx = 5; - + ctx = (ctx>>2)+4; i_ref--; } x264_cabac_encode_decision( cb, 54 + ctx, 0 ); } - - -static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd ) +static ALWAYS_INLINE int x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx ) { - const int amvd = abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 1][l] ) + - abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 8][l] ); const int i_abs = abs( mvd ); - const int i_prefix = X264_MIN( i_abs, 9 ); - const int ctxbase = (l == 0 ? 40 : 47); - int ctx; + const int ctxbase = l ? 47 : 40; int i; - - - if( amvd < 3 ) - ctx = 0; - else if( amvd > 32 ) - ctx = 2; +#if RDO_SKIP_BS + if( i_abs == 0 ) + x264_cabac_encode_decision( cb, ctxbase + ctx, 0 ); else - ctx = 1; - - for( i = 0; i < i_prefix; i++ ) { x264_cabac_encode_decision( cb, ctxbase + ctx, 1 ); - if( ctx < 3 ) - ctx = 3; - else if( ctx < 6 ) - ctx++; + if( i_abs <= 3 ) + { + for( i = 1; i < i_abs; i++ ) + x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 ); + x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 ); + x264_cabac_encode_bypass( cb, mvd < 0 ); + } + else + { + x264_cabac_encode_decision( cb, ctxbase + 3, 1 ); + x264_cabac_encode_decision( cb, ctxbase + 4, 1 ); + x264_cabac_encode_decision( cb, ctxbase + 5, 1 ); + if( i_abs < 9 ) + { + cb->f8_bits_encoded += cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]]; + cb->state[ctxbase+6] = cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]]; + } + else + { + cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]]; + cb->state[ctxbase+6] = cabac_transition_5ones[cb->state[ctxbase+6]]; + x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 ); + } + } } - if( i_prefix < 9 ) +#else + static const uint8_t ctxes[8] = { 3,4,5,6,6,6,6,6 }; + + if( i_abs == 0 ) x264_cabac_encode_decision( cb, ctxbase + ctx, 0 ); else - x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 ); - - /* sign */ - if( mvd ) + { + x264_cabac_encode_decision( cb, ctxbase + ctx, 1 ); + if( i_abs < 9 ) + { + for( i = 1; i < i_abs; i++ ) + x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 ); + x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs-1], 0 ); + } + else + { + for( i = 1; i < 9; i++ ) + x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 ); + x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 ); + } x264_cabac_encode_bypass( cb, mvd < 0 ); + } +#endif + /* Since we don't need to keep track of MVDs larger than 33, just cap the value. + * This lets us store MVDs as 8-bit values instead of 16-bit. */ + return X264_MIN( i_abs, 33 ); } -static inline void x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width, int height ) +static NOINLINE uint16_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width ) { - int mvp[2]; + ALIGNED_4( int16_t mvp[2] ); int mdx, mdy; /* Calculate mvd */ x264_mb_predict_mv( h, i_list, idx, width, mvp ); mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0]; mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1]; + uint16_t amvd = x264_cabac_mvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1], + h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]); /* encode */ - x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx ); - x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy ); + mdx = x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF ); + mdy = x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 ); - /* save value */ - x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mdx, mdy ); + return pack8to16(mdx,mdy); } -static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int i ) -{ - if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) - return; +#define x264_cabac_mb_mvd(h,cb,i_list,idx,width,height)\ +do\ +{\ + uint16_t mvd = x264_cabac_mb_mvd(h,cb,i_list,idx,width);\ + x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\ +} while(0) +static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i ) +{ switch( h->mb.i_sub_partition[i] ) { case D_L0_8x8: - case D_L1_8x8: - case D_BI_8x8: - x264_cabac_mb_mvd( h, cb, i_list, 4*i, 2, 2 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 ); break; case D_L0_8x4: - case D_L1_8x4: - case D_BI_8x4: - x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 2, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 2, 1 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 2, 1 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+2, 2, 1 ); break; case D_L0_4x8: - case D_L1_4x8: - case D_BI_4x8: - x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 2 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 2 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 1, 2 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+1, 1, 2 ); break; case D_L0_4x4: - case D_L1_4x4: - case D_BI_4x4: - x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 1, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+3, 1, 1 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 1, 1 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+1, 1, 1 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+2, 1, 1 ); + x264_cabac_mb_mvd( h, cb, 0, 4*i+3, 1, 1 ); break; + default: + assert(0); } } -static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) +/* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0 + * 1-> AC 16x16 i_idx = luma4x4idx + * 2-> Luma4x4 i_idx = luma4x4idx + * 3-> DC Chroma i_idx = iCbCr + * 4-> AC Chroma i_idx = 4 * iCbCr + chroma4x4idx + * 5-> Luma8x8 i_idx = luma8x8idx + */ + +static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra ) { - int i_mba_xy = -1; - int i_mbb_xy = -1; - int i_nza = 0; - int i_nzb = 0; - int ctx; + int i_nza; + int i_nzb; - if( i_cat == DCT_LUMA_DC ) + switch( i_cat ) { - if( h->mb.i_neighbour & MB_LEFT ) - { - i_mba_xy = h->mb.i_mb_xy - 1; - if( h->mb.i_mb_type_left == I_16x16 ) - i_nza = h->mb.cbp[i_mba_xy] & 0x100; - } - if( h->mb.i_neighbour & MB_TOP ) - { - i_mbb_xy = h->mb.i_mb_top_xy; - if( h->mb.i_mb_type_top == I_16x16 ) - i_nzb = h->mb.cbp[i_mbb_xy] & 0x100; - } - } - else if( i_cat == DCT_LUMA_AC || i_cat == DCT_LUMA_4x4 ) - { - if( i_idx & ~10 ) // block_idx_x > 0 - i_mba_xy = h->mb.i_mb_xy; - else if( h->mb.i_neighbour & MB_LEFT ) - i_mba_xy = h->mb.i_mb_xy - 1; - - if( i_idx & ~5 ) // block_idx_y > 0 - i_mbb_xy = h->mb.i_mb_xy; - else if( h->mb.i_neighbour & MB_TOP ) - i_mbb_xy = h->mb.i_mb_top_xy; - - /* no need to test for skip/pcm */ - if( i_mba_xy >= 0 ) + case DCT_LUMA_AC: + case DCT_LUMA_4x4: + case DCT_CHROMA_AC: + /* no need to test for skip/pcm */ i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1]; - if( i_mbb_xy >= 0 ) i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8]; + if( x264_constant_p(b_intra) && !b_intra ) + return 85 + 4*i_cat + ((2*i_nzb + i_nza)&0x7f); + else + { + i_nza &= 0x7f + (b_intra << 7); + i_nzb &= 0x7f + (b_intra << 7); + return 85 + 4*i_cat + 2*!!i_nzb + !!i_nza; + } + case DCT_LUMA_DC: + i_nza = (h->mb.cache.i_cbp_left >> 8) & 1; + i_nzb = (h->mb.cache.i_cbp_top >> 8) & 1; + return 85 + 4*i_cat + 2*i_nzb + i_nza; + case DCT_CHROMA_DC: + /* no need to test skip/pcm */ + i_idx -= 25; + i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (9 + i_idx)) & 1 : b_intra; + i_nzb = h->mb.cache.i_cbp_top != -1 ? (h->mb.cache.i_cbp_top >> (9 + i_idx)) & 1 : b_intra; + return 85 + 4*i_cat + 2*i_nzb + i_nza; + default: + return 0; } - else if( i_cat == DCT_CHROMA_DC ) - { - /* no need to test skip/pcm */ - if( h->mb.i_neighbour & MB_LEFT ) - { - i_mba_xy = h->mb.i_mb_xy - 1; - i_nza = h->mb.cbp[i_mba_xy] & (0x200 << i_idx); - } - if( h->mb.i_neighbour & MB_TOP ) - { - i_mbb_xy = h->mb.i_mb_top_xy; - i_nzb = h->mb.cbp[i_mbb_xy] & (0x200 << i_idx); - } - } - else if( i_cat == DCT_CHROMA_AC ) - { - if( i_idx & 1 ) - i_mba_xy = h->mb.i_mb_xy; - else if( h->mb.i_neighbour & MB_LEFT ) - i_mba_xy = h->mb.i_mb_xy - 1; - - if( i_idx & 2 ) - i_mbb_xy = h->mb.i_mb_xy; - else if( h->mb.i_neighbour & MB_TOP ) - i_mbb_xy = h->mb.i_mb_top_xy; - - /* no need to test skip/pcm */ - if( i_mba_xy >= 0 ) - i_nza = h->mb.cache.non_zero_count[x264_scan8[16+i_idx] - 1]; - if( i_mbb_xy >= 0 ) - i_nzb = h->mb.cache.non_zero_count[x264_scan8[16+i_idx] - 8]; - } - - if( IS_INTRA( h->mb.i_type ) ) - { - if( i_mba_xy < 0 ) - i_nza = 1; - if( i_mbb_xy < 0 ) - i_nzb = 1; - } - - ctx = 4 * i_cat; - if( i_nza ) - ctx += 1; - if( i_nzb ) - ctx += 2; - return ctx; } -static const int significant_coeff_flag_offset[2][6] = { +static const uint16_t significant_coeff_flag_offset[2][6] = { { 105, 120, 134, 149, 152, 402 }, { 277, 292, 306, 321, 324, 436 } }; -static const int last_coeff_flag_offset[2][6] = { +static const uint16_t last_coeff_flag_offset[2][6] = { { 166, 181, 195, 210, 213, 417 }, { 338, 353, 367, 382, 385, 451 } }; -static const int coeff_abs_level_m1_offset[6] = +static const uint16_t coeff_abs_level_m1_offset[6] = { 227, 237, 247, 257, 266, 426 }; -static const int significant_coeff_flag_offset_8x8[2][63] = +static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {{ 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, @@ -698,114 +532,224 @@ static const int significant_coeff_flag_offset_8x8[2][63] = 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }}; -static const int last_coeff_flag_offset_8x8[63] = { +static const uint8_t last_coeff_flag_offset_8x8[63] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 }; -static const int identity[16] = - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; -static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count ) +// node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). +// 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). +/* map node ctx => cabac ctx for level=1 */ +static const int coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; +/* map node ctx => cabac ctx for level>1 */ +static const int coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 }; +static const uint8_t coeff_abs_level_transition[2][8] = { +/* update node ctx after coding a level=1 */ + { 1, 2, 3, 3, 4, 5, 6, 7 }, +/* update node ctx after coding a level>1 */ + { 4, 4, 4, 4, 5, 6, 7, 7 } +}; +static const int count_cat_m1[5] = {15, 14, 15, 3, 14}; + +#if !RDO_SKIP_BS +static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l ) { const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat]; - + const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced]; int i_coeff_abs_m1[64]; int i_coeff_sign[64]; int i_coeff = 0; - int i_last = 0; - int i_sigmap_size; - - int i_abslevel1 = 0; - int i_abslevelgt1 = 0; + int i_last; + int node_ctx = 0; + int i = 0; + + i_last = h->quantf.coeff_last[i_ctxBlockCat](l); + +#define WRITE_SIGMAP( l8x8 )\ + while(1)\ + {\ + if( l[i] )\ + {\ + i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\ + i_coeff_sign[i_coeff] = l[i] < 0;\ + i_coeff++;\ + x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 1 );\ + if( i == i_last )\ + {\ + x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 1 );\ + break;\ + }\ + else\ + x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 0 );\ + }\ + else\ + x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 0 );\ + i++;\ + if( i == i_count_m1 )\ + {\ + i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\ + i_coeff_sign[i_coeff] = l[i] < 0;\ + i_coeff++;\ + break;\ + }\ + } - int i; + if( i_ctxBlockCat == DCT_LUMA_8x8 ) + { + const int i_count_m1 = 63; + WRITE_SIGMAP( 1 ) + } + else + { + const int i_count_m1 = count_cat_m1[i_ctxBlockCat]; + WRITE_SIGMAP( 0 ) + } - const int *significant_coeff_flag_offset; - const int *last_coeff_flag_offset; + do + { + int i_prefix, ctx; + i_coeff--; - /* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0 - * 1-> AC 16x16 i_idx = luma4x4idx - * 2-> Luma4x4 i_idx = luma4x4idx - * 3-> DC Chroma i_idx = iCbCr - * 4-> AC Chroma i_idx = 4 * iCbCr + chroma4x4idx - * 5-> Luma8x8 i_idx = luma8x8idx - */ + /* write coeff_abs - 1 */ + i_prefix = X264_MIN( i_coeff_abs_m1[i_coeff], 14 ); + ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level; - for( i = 0; i < i_count; i++ ) - { - if( l[i] != 0 ) + if( i_prefix ) { - i_coeff_abs_m1[i_coeff] = abs( l[i] ) - 1; - i_coeff_sign[i_coeff] = ( l[i] < 0 ); - i_coeff++; + x264_cabac_encode_decision( cb, ctx, 1 ); + ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level; + for( i = 0; i < i_prefix - 1; i++ ) + x264_cabac_encode_decision( cb, ctx, 1 ); + if( i_prefix < 14 ) + x264_cabac_encode_decision( cb, ctx, 0 ); + else + x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs_m1[i_coeff] - 14 ); - i_last = i; + node_ctx = coeff_abs_level_transition[1][node_ctx]; + } + else + { + x264_cabac_encode_decision( cb, ctx, 0 ); + node_ctx = coeff_abs_level_transition[0][node_ctx]; } - } - if( i_count != 64 ) - { - /* coded block flag */ - x264_cabac_encode_decision( cb, 85 + x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx ), i_coeff != 0 ); - if( i_coeff == 0 ) - return; - } + x264_cabac_encode_bypass( cb, i_coeff_sign[i_coeff] ); + } while( i_coeff > 0 ); +} +#define block_residual_write_cabac_8x8( h, cb, l ) block_residual_write_cabac( h, cb, DCT_LUMA_8x8, l ) - significant_coeff_flag_offset = (i_ctxBlockCat == DCT_LUMA_8x8) - ? significant_coeff_flag_offset_8x8[h->mb.b_interlaced] - : identity; - last_coeff_flag_offset = (i_ctxBlockCat == DCT_LUMA_8x8) - ? last_coeff_flag_offset_8x8 : identity; +#else - i_sigmap_size = X264_MIN( i_last+1, i_count-1 ); - for( i = 0; i < i_sigmap_size; i++ ) +/* Faster RDO by merging sigmap and level coding. Note that for 8x8dct + * this is slightly incorrect because the sigmap is not reversible + * (contexts are repeated). However, there is nearly no quality penalty + * for this (~0.001db) and the speed boost (~30%) is worth it. */ +static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l, int b_8x8 ) +{ + const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; + const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; + const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat]; + const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced]; + int i_last, i_coeff_abs, ctx, i, node_ctx; + + i_last = h->quantf.coeff_last[i_ctxBlockCat](l); + + i_coeff_abs = abs(l[i_last]); + ctx = coeff_abs_level1_ctx[0] + i_ctx_level; + + if( i_last != (b_8x8 ? 63 : count_cat_m1[i_ctxBlockCat]) ) { - x264_cabac_encode_decision( cb, i_ctx_sig + significant_coeff_flag_offset[i], l[i] != 0 ); - if( l[i] != 0 ) - x264_cabac_encode_decision( cb, i_ctx_last + last_coeff_flag_offset[i], i == i_last ); + x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i_last]:i_last), 1 ); + x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i_last]:i_last), 1 ); } - for( i = i_coeff - 1; i >= 0; i-- ) + if( i_coeff_abs > 1 ) { - /* write coeff_abs - 1 */ - const int i_prefix = X264_MIN( i_coeff_abs_m1[i], 14 ); - const int i_ctxIdxInc = (i_abslevelgt1 ? 0 : X264_MIN( 4, i_abslevel1 + 1 )) + i_ctx_level; - x264_cabac_encode_decision( cb, i_ctxIdxInc, i_prefix != 0 ); + x264_cabac_encode_decision( cb, ctx, 1 ); + ctx = coeff_abs_levelgt1_ctx[0] + i_ctx_level; + if( i_coeff_abs < 15 ) + { + cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]]; + cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]]; + } + else + { + cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]]; + cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]]; + x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 ); + } + node_ctx = coeff_abs_level_transition[1][0]; + } + else + { + x264_cabac_encode_decision( cb, ctx, 0 ); + node_ctx = coeff_abs_level_transition[0][0]; + x264_cabac_encode_bypass( cb, 0 ); // sign + } - if( i_prefix != 0 ) + for( i = i_last-1 ; i >= 0; i-- ) + { + if( l[i] ) { - const int i_ctxIdxInc = 5 + X264_MIN( 4, i_abslevelgt1 ) + i_ctx_level; -#ifdef RDO_SKIP_BS - cb->f8_bits_encoded += cabac_prefix_size[i_prefix][cb->state[i_ctxIdxInc]]; - cb->state[i_ctxIdxInc] = cabac_prefix_transition[i_prefix][cb->state[i_ctxIdxInc]]; -#else - int j; - for( j = 0; j < i_prefix - 1; j++ ) - x264_cabac_encode_decision( cb, i_ctxIdxInc, 1 ); - if( i_prefix < 14 ) - x264_cabac_encode_decision( cb, i_ctxIdxInc, 0 ); -#endif - if( i_prefix >= 14 ) - x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs_m1[i] - 14 ); + i_coeff_abs = abs(l[i]); + x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i]:i), 1 ); + x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i]:i), 0 ); + ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level; - i_abslevelgt1++; + if( i_coeff_abs > 1 ) + { + x264_cabac_encode_decision( cb, ctx, 1 ); + ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level; + if( i_coeff_abs < 15 ) + { + cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]]; + cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]]; + } + else + { + cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]]; + cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]]; + x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 ); + } + node_ctx = coeff_abs_level_transition[1][node_ctx]; + } + else + { + x264_cabac_encode_decision( cb, ctx, 0 ); + node_ctx = coeff_abs_level_transition[0][node_ctx]; + x264_cabac_encode_bypass( cb, 0 ); + } } else - i_abslevel1++; - - /* write sign */ -#ifdef RDO_SKIP_BS - if( i_prefix == 0 ) -#endif - x264_cabac_encode_bypass( cb, i_coeff_sign[i] ); + x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i]:i), 0 ); } } +static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, int16_t *l ) +{ + block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, l, 1 ); +} +static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l ) +{ + block_residual_write_cabac_internal( h, cb, i_ctxBlockCat, l, 0 ); +} +#endif +#define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra ) \ +{ \ + int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra ); \ + if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\ + {\ + x264_cabac_encode_decision( cb, ctxidxinc, 1 );\ + block_residual_write_cabac( h, cb, i_ctxBlockCat, l ); \ + }\ + else\ + x264_cabac_encode_decision( cb, ctxidxinc, 0 );\ +} void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) { @@ -813,7 +757,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) int i_list; int i; -#ifndef RDO_SKIP_BS +#if !RDO_SKIP_BS const int i_mb_pos_start = x264_cabac_pos( cb ); int i_mb_pos_tex; #endif @@ -821,36 +765,30 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) /* Write the MB type */ x264_cabac_mb_type( h, cb ); - /* PCM special block type UNTESTED */ +#if !RDO_SKIP_BS if( i_mb_type == I_PCM ) { -#ifdef RDO_SKIP_BS - cb->f8_bits_encoded += (384*8) << 8; -#else - if( cb->p + 385 >= cb->p_end ) - return; //FIXME throw an error - /* Luma */ - for( i = 0; i < 16; i++ ) - { - memcpy( cb->p, h->fenc->plane[0] + i*h->mb.pic.i_stride[0], 16 ); - cb->p += 16; - } - /* Cb */ + i_mb_pos_tex = x264_cabac_pos( cb ); + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start; + + memcpy( cb->p, h->mb.pic.p_fenc[0], 256 ); + cb->p += 256; for( i = 0; i < 8; i++ ) - { - memcpy( cb->p, h->fenc->plane[1] + i*h->mb.pic.i_stride[1], 8 ); - cb->p += 8; - } - /* Cr */ + memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 ); + cb->p += 64; for( i = 0; i < 8; i++ ) - { - memcpy( cb->p, h->fenc->plane[2] + i*h->mb.pic.i_stride[2], 8 ); - cb->p += 8; - } - x264_cabac_encode_init( cb, cb->p, cb->p_end ); -#endif + memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 ); + cb->p += 64; + + cb->i_low = 0; + cb->i_range = 0x01FE; + cb->i_queue = -1; + cb->i_bytes_outstanding = 0; + + h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex; return; } +#endif if( IS_INTRA( i_mb_type ) ) { @@ -859,7 +797,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) if( i_mb_type != I_16x16 ) { - int di = (i_mb_type == I_8x8) ? 4 : 1; + int di = h->mb.b_transform_8x8 ? 4 : 1; for( i = 0; i < 16; i += di ) { const int i_pred = x264_mb_predict_intra4x4_mode( h, i ); @@ -890,7 +828,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 ); x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 ); } - else if( h->mb.i_partition == D_8x16 ) + else //if( h->mb.i_partition == D_8x16 ) { if( h->mb.pic.i_fref[0] > 1 ) { @@ -904,10 +842,8 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) else if( i_mb_type == P_8x8 ) { /* sub mb type */ - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[0] ); - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[1] ); - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[2] ); - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[3] ); + for( i = 0; i < 4; i++ ) + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] ); /* ref 0 */ if( h->mb.pic.i_fref[0] > 1 ) @@ -919,64 +855,50 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) } for( i = 0; i < 4; i++ ) - x264_cabac_mb8x8_mvd( h, cb, 0, i ); + x264_cabac_mb8x8_mvd( h, cb, i ); } else if( i_mb_type == B_8x8 ) { /* sub mb type */ - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[0] ); - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[1] ); - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[2] ); - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[3] ); + for( i = 0; i < 4; i++ ) + x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] ); /* ref */ - for( i_list = 0; i_list < 2; i_list++ ) - { - if( ( i_list ? h->mb.pic.i_fref[1] : h->mb.pic.i_fref[0] ) == 1 ) - continue; + if( h->mb.pic.i_fref[0] > 1 ) for( i = 0; i < 4; i++ ) - if( x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) - x264_cabac_mb_ref( h, cb, i_list, 4*i ); - } + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) + x264_cabac_mb_ref( h, cb, 0, 4*i ); + + if( h->mb.pic.i_fref[1] > 1 ) + for( i = 0; i < 4; i++ ) + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) + x264_cabac_mb_ref( h, cb, 1, 4*i ); for( i = 0; i < 4; i++ ) - x264_cabac_mb8x8_mvd( h, cb, 0, i ); + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) + x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 ); + for( i = 0; i < 4; i++ ) - x264_cabac_mb8x8_mvd( h, cb, 1, i ); + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) + x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 ); } else if( i_mb_type != B_DIRECT ) { /* All B mode */ - int b_list[2][2]; - - /* init ref list utilisations */ - for( i = 0; i < 2; i++ ) + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type]; + if( h->mb.pic.i_fref[0] > 1 ) { - b_list[0][i] = x264_mb_type_list0_table[i_mb_type][i]; - b_list[1][i] = x264_mb_type_list1_table[i_mb_type][i]; + if( b_list[0][0] ) + x264_cabac_mb_ref( h, cb, 0, 0 ); + if( b_list[0][1] && h->mb.i_partition != D_16x16 ) + x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) ); } - - for( i_list = 0; i_list < 2; i_list++ ) + if( h->mb.pic.i_fref[1] > 1 ) { - const int i_ref_max = i_list == 0 ? h->mb.pic.i_fref[0] : h->mb.pic.i_fref[1]; - - if( i_ref_max > 1 ) - { - if( h->mb.i_partition == D_16x16 ) - { - if( b_list[i_list][0] ) x264_cabac_mb_ref( h, cb, i_list, 0 ); - } - else if( h->mb.i_partition == D_16x8 ) - { - if( b_list[i_list][0] ) x264_cabac_mb_ref( h, cb, i_list, 0 ); - if( b_list[i_list][1] ) x264_cabac_mb_ref( h, cb, i_list, 8 ); - } - else if( h->mb.i_partition == D_8x16 ) - { - if( b_list[i_list][0] ) x264_cabac_mb_ref( h, cb, i_list, 0 ); - if( b_list[i_list][1] ) x264_cabac_mb_ref( h, cb, i_list, 4 ); - } - } + if( b_list[1][0] ) + x264_cabac_mb_ref( h, cb, 1, 0 ); + if( b_list[1][1] && h->mb.i_partition != D_16x16 ) + x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) ); } for( i_list = 0; i_list < 2; i_list++ ) { @@ -989,7 +911,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 ); if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 ); } - else if( h->mb.i_partition == D_8x16 ) + else //if( h->mb.i_partition == D_8x16 ) { if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 ); if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 ); @@ -997,9 +919,9 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) } } -#ifndef RDO_SKIP_BS +#if !RDO_SKIP_BS i_mb_pos_tex = x264_cabac_pos( cb ); - h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start; + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start; #endif if( i_mb_type != I_16x16 ) @@ -1015,98 +937,80 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 ) { + const int b_intra = IS_INTRA( i_mb_type ); x264_cabac_mb_qp_delta( h, cb ); /* write residual */ if( i_mb_type == I_16x16 ) { /* DC Luma */ - block_residual_write_cabac( h, cb, DCT_LUMA_DC, 0, h->dct.luma16x16_dc, 16 ); + block_residual_write_cabac_cbf( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 1 ); /* AC Luma */ if( h->mb.i_cbp_luma != 0 ) for( i = 0; i < 16; i++ ) - block_residual_write_cabac( h, cb, DCT_LUMA_AC, i, h->dct.block[i].residual_ac, 15 ); + block_residual_write_cabac_cbf( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 1 ); } else if( h->mb.b_transform_8x8 ) { for( i = 0; i < 4; i++ ) if( h->mb.i_cbp_luma & ( 1 << i ) ) - block_residual_write_cabac( h, cb, DCT_LUMA_8x8, i, h->dct.luma8x8[i], 64 ); + block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i] ); } else { for( i = 0; i < 16; i++ ) if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) ) - block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i, h->dct.block[i].luma4x4, 16 ); + block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra ); } - if( h->mb.i_cbp_chroma &0x03 ) /* Chroma DC residual present */ + if( h->mb.i_cbp_chroma&0x03 ) /* Chroma DC residual present */ { - block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 0, h->dct.chroma_dc[0], 4 ); - block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 1, h->dct.chroma_dc[1], 4 ); - } - if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */ - { - for( i = 0; i < 8; i++ ) - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[16+i].residual_ac, 15 ); + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra ); + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra ); + if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */ + for( i = 16; i < 24; i++ ) + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, b_intra ); } } -#ifndef RDO_SKIP_BS - if( IS_INTRA( i_mb_type ) ) - h->stat.frame.i_itex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex; - else - h->stat.frame.i_ptex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex; +#if !RDO_SKIP_BS + h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex; #endif } -#ifdef RDO_SKIP_BS +#if RDO_SKIP_BS /***************************************************************************** * RD only; doesn't generate a valid bitstream * doesn't write cbp or chroma dc (I don't know how much this matters) + * doesn't write ref (never varies between calls, so no point in doing so) + * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO * works on all partition sizes except 16x16 - * for sub8x8, call once per 8x8 block *****************************************************************************/ -void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel ) +static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel ) { const int i_mb_type = h->mb.i_type; + int b_8x16 = h->mb.i_partition == D_8x16; int j; if( i_mb_type == P_8x8 ) { + x264_cabac_mb8x8_mvd( h, cb, i8 ); x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] ); - if( h->mb.pic.i_fref[0] > 1 ) - x264_cabac_mb_ref( h, cb, 0, 4*i8 ); - x264_cabac_mb8x8_mvd( h, cb, 0, i8 ); } else if( i_mb_type == P_L0 ) + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2< B_DIRECT && i_mb_type < B_8x8 ) { - if( h->mb.pic.i_fref[0] > 1 ) - x264_cabac_mb_ref( h, cb, 0, 4*i8 ); - if( h->mb.i_partition == D_16x8 ) - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4, 2 ); - else //8x16 - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 4 ); + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<>b_8x16, 2<mb.i_sub_partition[i8] ); - - if( h->mb.pic.i_fref[0] > 1 - && x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] ) - x264_cabac_mb_ref( h, cb, 0, 4*i8 ); - if( h->mb.pic.i_fref[1] > 1 - && x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) - x264_cabac_mb_ref( h, cb, 1, 4*i8 ); - - x264_cabac_mb8x8_mvd( h, cb, 0, i8 ); - x264_cabac_mb8x8_mvd( h, cb, 1, i8 ); - } - else - { - x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" ); - return; + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] ) + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 ); + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) + x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 ); } for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- ) @@ -1114,28 +1018,45 @@ void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel if( h->mb.i_cbp_luma & (1 << i8) ) { if( h->mb.b_transform_8x8 ) - block_residual_write_cabac( h, cb, DCT_LUMA_8x8, i8, h->dct.luma8x8[i8], 64 ); + block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] ); else { int i4; for( i4 = 0; i4 < 4; i4++ ) - block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 ); + block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 0 ); } } - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i8, h->dct.block[16+i8 ].residual_ac, 15 ); - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i8+4, h->dct.block[16+i8+4].residual_ac, 15 ); + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 ); + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 0 ); i8 += x264_pixel_size[i_pixel].h >> 3; } } +static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel ) +{ + int b_8x4 = i_pixel == PIXEL_8x4; + block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 0 ); + if( i_pixel == PIXEL_4x4 ) + { + x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 ); + } + else + { + x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 ); + block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+2-b_8x4, h->dct.luma4x4[i4+2-b_8x4], 0 ); + } +} + static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode ) { const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 ); i_mode = x264_mb_pred_mode4x4_fix( i_mode ); x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode ); - block_residual_write_cabac( h, cb, DCT_LUMA_8x8, 4*i8, h->dct.luma8x8[i8], 64 ); + x264_cabac_mb_cbp_luma( h, cb ); + if( h->mb.i_cbp_luma & (1 << i8) ) + block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] ); } static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode ) @@ -1143,22 +1064,23 @@ static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 ); i_mode = x264_mb_pred_mode4x4_fix( i_mode ); x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode ); - block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.block[i4].luma4x4, 16 ); + block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 1 ); } static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb ) { x264_cabac_mb_intra_chroma_pred_mode( h, cb ); + x264_cabac_mb_cbp_chroma( h, cb ); if( h->mb.i_cbp_chroma > 0 ) { - block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 0, h->dct.chroma_dc[0], 4 ); - block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 1, h->dct.chroma_dc[1], 4 ); + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 1 ); + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 1 ); if( h->mb.i_cbp_chroma == 2 ) { int i; - for( i = 0; i < 8; i++ ) - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[16+i].residual_ac, 15 ); + for( i = 16; i < 24; i++ ) + block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 1 ); } } }