/*****************************************************************************
* cabac.c: cabac bitstream writing
*****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2016 x264 project
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
int ctx;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
+ /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely
+ * flat background area. Don't do this if it would raise the quantizer, since that could
+ * cause unexpected deblocking artifacts. */
+ if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] && h->mb.i_qp > h->mb.i_last_qp )
{
#if !RDO_SKIP_BS
h->mb.i_qp = h->mb.i_last_qp;
i_dqp = 0;
}
- /* Since, per the above, empty-CBP I16x16 blocks never have delta quants,
- * we don't have to check for them. */
- ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy];
+ ctx = h->mb.i_last_dqp && (h->mb.type[h->mb.i_mb_prev_xy] == I_16x16 || (h->mb.cbp[h->mb.i_mb_prev_xy]&0x3f));
if( i_dqp != 0 )
{
}
}
-static const uint16_t significant_coeff_flag_offset[2][14] =
-{
- { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
- { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
-};
-static const uint16_t last_coeff_flag_offset[2][14] =
-{
- { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748 },
- { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757 }
-};
-static const uint16_t coeff_abs_level_m1_offset[14] =
-{
- 227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
-};
-#if RDO_SKIP_BS
-extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][63];
+#if !RDO_SKIP_BS
+extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][64];
extern const uint8_t x264_last_coeff_flag_offset_8x8[63];
extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7];
+extern const uint16_t x264_significant_coeff_flag_offset[2][16];
+extern const uint16_t x264_last_coeff_flag_offset[2][16];
+extern const uint16_t x264_coeff_abs_level_m1_offset[16];
+extern const uint8_t x264_count_cat_m1[14];
#else
-const uint8_t x264_significant_coeff_flag_offset_8x8[2][63] =
+/* Padded to [64] for easier addressing */
+const uint8_t x264_significant_coeff_flag_offset_8x8[2][64] =
{{
0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
+const uint16_t x264_significant_coeff_flag_offset[2][16] =
+{
+ { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718, 0, 0 },
+ { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733, 0, 0 }
+};
+const uint16_t x264_last_coeff_flag_offset[2][16] =
+{
+ { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748, 0, 0 },
+ { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757, 0, 0 }
+};
+const uint16_t x264_coeff_abs_level_m1_offset[16] =
+{
+ 227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
+};
+const uint8_t x264_count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63};
#endif
// node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
/* 4:2:2 chroma dc uses a slightly different state machine for some reason, also note that
* 4:2:0 chroma dc doesn't use the last state so it has identical output with both arrays. */
static const uint8_t coeff_abs_levelgt1_ctx_chroma_dc[8] = { 5, 5, 5, 5, 6, 7, 8, 8 };
+
static const uint8_t coeff_abs_level_transition[2][8] = {
/* update node ctx after coding a level=1 */
{ 1, 2, 3, 3, 4, 5, 6, 7 },
/* update node ctx after coding a level>1 */
{ 4, 4, 4, 4, 5, 6, 7, 7 }
};
-static const uint8_t count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63};
#if !RDO_SKIP_BS
static ALWAYS_INLINE void x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc )
{
- int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
- int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
- int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
+ int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_level = x264_coeff_abs_level_m1_offset[ctx_block_cat];
int coeff_idx = -1, node_ctx = 0;
int last = h->quantf.coeff_last[ctx_block_cat]( l );
const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
}
else
{
- int count_m1 = count_cat_m1[ctx_block_cat];
+ int count_m1 = x264_count_cat_m1[ctx_block_cat];
if( count_m1 == 63 )
{
const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
x264_cabac_encode_bypass( cb, coeff_sign );
} while( --coeff_idx >= 0 );
}
-static void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+
+void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 );
}
+
+static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+{
+#if ARCH_X86_64 && HAVE_MMX
+ h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
+#else
+ x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
+#endif
+}
static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
/* Template a version specifically for chroma 4:2:2 DC in order to avoid
static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
{
const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
- int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
- int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
- int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
+ int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_level = x264_coeff_abs_level_m1_offset[ctx_block_cat];
int last = h->quantf.coeff_last[ctx_block_cat]( l );
int coeff_abs = abs(l[last]);
int ctx = coeff_abs_level1_ctx[0] + ctx_level;
int node_ctx;
const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
- if( last != (b_8x8 ? 63 : chroma422dc ? 7 : count_cat_m1[ctx_block_cat]) )
+ if( last != (b_8x8 ? 63 : chroma422dc ? 7 : x264_count_cat_m1[ctx_block_cat]) )
{
x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[last] :
chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
}
}
-static void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 );
}
-static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
- x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 );
+ x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 );
}
-static void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+
+static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
- x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 );
+#if ARCH_X86_64 && HAVE_MMX
+ h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
+#else
+ x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
+#endif
+}
+static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+{
+#if ARCH_X86_64 && HAVE_MMX
+ h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
+#else
+ x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l );
+#endif
+}
+
+static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+{
+ x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 );
}
#endif
MUNGE_8x8_NNZ( BACKUP )
for( int p = 0; p < 3; p++ )
- for( int i = 0; i < 4; i++ )
- if( h->mb.i_cbp_luma & ( 1 << i ) )
- x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
+ FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
+ x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
MUNGE_8x8_NNZ( RESTORE )
}
else
{
- for( int i = 0; i < 4; i++ )
- if( h->mb.i_cbp_luma & ( 1 << i ) )
- x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
+ FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
+ x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
}
}
else
{
for( int p = 0; p < plane_count; p++ )
- for( int i = 0; i < 16; i++ )
- if( h->mb.i_cbp_luma & ( 1 << ( i >> 2 ) ) )
- x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+p*16, h->dct.luma4x4[i+p*16], b_intra );
+ FOREACH_BIT( i8x8, 0, h->mb.i_cbp_luma )
+ for( int i = 0; i < 4; i++ )
+ x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+i8x8*4+p*16, h->dct.luma4x4[i+i8x8*4+p*16], b_intra );
}
if( chroma && h->mb.i_cbp_chroma ) /* Chroma DC residual present */