* H.26L/H.264/AVC/JVT/14496-10/... cabac decoding
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
- * @file libavcodec/h264_cabac.c
+ * @file
* H.264 / AVC / MPEG4 part10 cabac decoding.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
cbp_b = h->top_cbp;
ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
- cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
+ cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
- cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
+ cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
- cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
+ cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
- cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
+ cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
return cbp;
}
static int decode_cabac_mb_cbp_chroma( H264Context *h) {
return ref;
}
-static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
- int amvd = h->mvd_cache[list][scan8[n] - 1][l] +
- h->mvd_cache[list][scan8[n] - 8][l];
- int ctxbase = (l == 0) ? 40 : 47;
+static int decode_cabac_mb_mvd( H264Context *h, int ctxbase, int amvd, int *mvda) {
int mvd;
- if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+FFMIN(((amvd+28)*17)>>9,2)]))
+ if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+((amvd-3)>>(INT_BIT-1))+((amvd-33)>>(INT_BIT-1))+2])){
+// if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+(amvd>2)+(amvd>32)])){
+ *mvda= 0;
return 0;
+ }
mvd= 1;
ctxbase+= 3;
while( k-- ) {
mvd += get_cabac_bypass( &h->cabac )<<k;
}
- }
+ *mvda=mvd < 70 ? mvd : 70;
+ }else
+ *mvda=mvd;
return get_cabac_bypass_sign( &h->cabac, -mvd );
}
+#define DECODE_CABAC_MB_MVD( h, list, n )\
+{\
+ int amvd0 = h->mvd_cache[list][scan8[n] - 1][0] +\
+ h->mvd_cache[list][scan8[n] - 8][0];\
+ int amvd1 = h->mvd_cache[list][scan8[n] - 1][1] +\
+ h->mvd_cache[list][scan8[n] - 8][1];\
+\
+ mx += decode_cabac_mb_mvd( h, 40, amvd0, &mpx );\
+ my += decode_cabac_mb_mvd( h, 47, amvd1, &mpy );\
+}
+
static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
int nza, nzb;
int ctx = 0;
nza = h->left_cbp&0x100;
nzb = h-> top_cbp&0x100;
} else {
+ idx -= CHROMA_DC_BLOCK_INDEX;
nza = (h->left_cbp>>(6+idx))&0x01;
nzb = (h-> top_cbp>>(6+idx))&0x01;
}
#define CC &h->cabac
#endif
-
- /* cat: 0-> DC 16x16 n = 0
- * 1-> AC 16x16 n = luma4x4idx
- * 2-> Luma4x4 n = luma4x4idx
- * 3-> DC Chroma n = iCbCr
- * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
- * 5-> Luma8x8 n = 4 * luma8x8idx
- */
-
- /* read coded block flag */
- if( is_dc || cat != 5 ) {
- if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
- if( !is_dc )
- h->non_zero_count_cache[scan8[n]] = 0;
-
-#ifdef CABAC_ON_STACK
- h->cabac.range = cc.range ;
- h->cabac.low = cc.low ;
- h->cabac.bytestream= cc.bytestream;
-#endif
- return;
- }
- }
-
significant_coeff_ctx_base = h->cabac_state
+ significant_coeff_flag_offset[MB_FIELD][cat];
last_coeff_ctx_base = h->cabac_state
if( cat == 0 )
h->cbp_table[h->mb_xy] |= 0x100;
else
- h->cbp_table[h->mb_xy] |= 0x40 << n;
+ h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX);
+ h->non_zero_count_cache[scan8[n]] = coeff_count;
} else {
if( cat == 5 )
fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
}
-#if !CONFIG_SMALL
-static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
- decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
+static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
+ decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1);
}
-static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
+static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
}
-#endif
-static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
-#if CONFIG_SMALL
- decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
-#else
- if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
- else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
-#endif
+/* cat: 0-> DC 16x16 n = 0
+ * 1-> AC 16x16 n = luma4x4idx
+ * 2-> Luma4x4 n = luma4x4idx
+ * 3-> DC Chroma n = iCbCr
+ * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
+ * 5-> Luma8x8 n = 4 * luma8x8idx */
+
+/* Partially inline the CABAC residual decode: inline the coded block flag.
+ * This has very little impact on binary size and improves performance
+ * because it allows improved constant propagation into get_cabac_cbf_ctx,
+ * as well as because most blocks have zero CBFs. */
+
+static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
+ /* read coded block flag */
+ if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 1 ) ] ) == 0 ) {
+ h->non_zero_count_cache[scan8[n]] = 0;
+ return;
+ }
+ decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
+}
+
+static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
+ /* read coded block flag */
+ if( cat != 5 && get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 0 ) ] ) == 0 ) {
+ h->non_zero_count_cache[scan8[n]] = 0;
+ return;
+ }
+ decode_cabac_residual_nondc_internal( h, block, cat, n, scantable, qmul, max_coeff );
}
/**
* decodes a macroblock
- * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
*/
int ff_h264_decode_mb_cabac(H264Context *h) {
MpegEncContext * const s = &h->s;
}else{
int bits;
bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
- bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
- bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
- bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+ bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
+ bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
+ bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
if( bits < 8 ){
mb_type= bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
}else if( bits == 13 ){
}else if( bits == 15 ){
mb_type= 22; /* B_8x8 */
}else{
- bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+ bits= ( bits<<1 ) + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
mb_type= bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
}
}
pred_mode= ff_h264_check_intra_pred_mode( h, pred_mode );
if( pred_mode < 0 ) return -1;
h->chroma_pred_mode= pred_mode;
+ } else {
+ h->chroma_pred_mode= DC_128_PRED8x8;
}
} else if( partition_count == 4 ) {
int i, j, sub_partition_count[4], list, ref[2][4];
for(i=0; i<4; i++){
h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
if(IS_DIRECT(h->sub_mb_type[i])){
- fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
+ fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 2);
continue;
}
int mx, my;
const int index= 4*i + block_width*j;
int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
- int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
- pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
-
- mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
- my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
+ uint8_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
+ pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
+ DECODE_CABAC_MB_MVD( h, list, index)
tprintf(s->avctx, "final mv:%d %d\n", mx, my);
- mpx= abs(mpx-mx);
- mpy= abs(mpy-my);
if(IS_SUB_8X8(sub_mb_type)){
mv_cache[ 1 ][0]=
mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
}
}else{
fill_rectangle(h->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4);
- fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 4);
+ fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 2);
}
}
}
} else if( IS_DIRECT(mb_type) ) {
ff_h264_pred_direct_motion(h, &mb_type);
- fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
- fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+ fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 2);
+ fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 2);
dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
} else {
- int list, mx, my, i, mpx, mpy;
+ int list, i;
if(IS_16X16(mb_type)){
for(list=0; list<h->list_count; list++){
if(IS_DIR(mb_type, 0, list)){
}else
ref=0;
fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
- }else
- fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
+ }
}
for(list=0; list<h->list_count; list++){
if(IS_DIR(mb_type, 0, list)){
- pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
-
- mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
- my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
+ int mx,my,mpx,mpy;
+ pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
+ DECODE_CABAC_MB_MVD( h, list, 0)
tprintf(s->avctx, "final mv:%d %d\n", mx, my);
- fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(abs(mx-mpx),abs(my-mpy)), 4);
+ fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack8to16(mpx,mpy), 2);
fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
- }else
- fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
+ }
}
}
else if(IS_16X8(mb_type)){
for(list=0; list<h->list_count; list++){
for(i=0; i<2; i++){
if(IS_DIR(mb_type, i, list)){
- pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
- mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
- my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
+ int mx,my,mpx,mpy;
+ pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
+ DECODE_CABAC_MB_MVD( h, list, 8*i)
tprintf(s->avctx, "final mv:%d %d\n", mx, my);
- fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(abs(mx-mpx),abs(my-mpy)), 4);
+ fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack8to16(mpx,mpy), 2);
fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
}else{
- fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+ fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 2);
fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
}
}
for(list=0; list<h->list_count; list++){
for(i=0; i<2; i++){
if(IS_DIR(mb_type, i, list)){
- pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
- mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
- my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
+ int mx,my,mpx,mpy;
+ pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
+ DECODE_CABAC_MB_MVD( h, list, 4*i)
tprintf(s->avctx, "final mv:%d %d\n", mx, my);
- fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(abs(mx-mpx),abs(my-mpy)), 4);
+ fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack8to16(mpx,mpy), 2);
fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
}else{
- fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+ fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 2);
fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
}
}
s->current_picture.mb_type[mb_xy]= mb_type;
if( cbp || IS_INTRA16x16( mb_type ) ) {
- const uint8_t *scan, *scan8x8, *dc_scan;
+ const uint8_t *scan, *scan8x8;
const uint32_t *qmul;
if(IS_INTERLACED(mb_type)){
scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
scan= s->qscale ? h->field_scan : h->field_scan_q0;
- dc_scan= luma_dc_field_scan;
}else{
scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
- dc_scan= luma_dc_zigzag_scan;
}
// decode_cabac_mb_dqp
if( IS_INTRA16x16( mb_type ) ) {
int i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
- decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
+ AV_ZERO128(h->mb_luma_dc+0);
+ AV_ZERO128(h->mb_luma_dc+8);
+ decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
if( cbp&15 ) {
qmul = h->dequant4_coeff[0][s->qscale];
for( i = 0; i < 16; i++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
- decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
+ decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
if( cbp & (1<<i8x8) ) {
if( IS_8x8DCT(mb_type) ) {
- decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
+ decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
} else {
qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
const int index = 4*i8x8 + i4x4;
//av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
- decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
+ decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
}
}
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
- decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
+ decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
- decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
+ decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
}
}
} else {