X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=encoder%2Fcavlc.c;h=f0479aa95366810c5e8fea061a0fafd2f368f3d0;hb=5d40e878b75422c7b13cd5ab01ddfc2cf6b33938;hp=e6d85c6b8c1f122b7f74b8b9758aea7ba2a3a3cf;hpb=20e8982e3196bf8d0820772571e75a50cd07aabe;p=x264 diff --git a/encoder/cavlc.c b/encoder/cavlc.c index e6d85c6b..f0479aa9 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -5,6 +5,7 @@ * * Authors: Laurent Aimar * Loren Merritt + * Fiona Glaser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,6 +25,10 @@ #include "common/common.h" #include "macroblock.h" +#ifndef RDO_SKIP_BS +#define RDO_SKIP_BS 0 +#endif + static const uint8_t intra4x4_cbp_to_golomb[48]= { 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2, @@ -51,156 +56,124 @@ static const uint8_t sub_mb_type_b_to_golomb[13]= 10, 4, 5, 1, 11, 6, 7, 2, 12, 8, 9, 3, 0 }; -#define BLOCK_INDEX_CHROMA_DC (-1) -#define BLOCK_INDEX_LUMA_DC (-2) - -static inline void bs_write_vlc( bs_t *s, vlc_t v ) -{ - bs_write( s, v.i_size, v.i_bits ); -} +#define bs_write_vlc(s,v) bs_write( s, (v).i_size, (v).i_bits ) /**************************************************************************** * block_residual_write_cavlc: ****************************************************************************/ -static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *l, int i_count ) +static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_suffix_length, int level ) { - int level[16], run[16]; - int i_total, i_trailing; - int i_total_zero; - int i_last; - unsigned int i_sign; - int i; - int i_suffix_length; - - /* first find i_last */ - for( i_last = i_count-1; i_last >= 3; i_last -= 4 ) - if( *(uint64_t*)(l+i_last-3) ) - break; - while( i_last >= 0 && l[i_last] == 0 ) - i_last--; - - i_sign = 0; - i_total = 0; - i_trailing = 0; - i_total_zero = i_last + 1; - - if( i_last >= 0 ) + static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff }; + int i_level_prefix = 15; + int mask = level >> 15; + int abs_level = (level^mask)-mask; + int i_level_code = abs_level*2-mask-2; + if( ( i_level_code >> i_suffix_length ) < 15 ) { - int idx = 0; - - /* level and run and total */ - while( i_last >= 0 ) - { - int r = 0; - level[idx] = l[i_last]; - while( --i_last >= 0 && l[i_last] == 0 ) - r++; - run[idx++] = r; - } - - i_total = idx; - i_total_zero -= idx; + bs_write( s, (i_level_code >> i_suffix_length) + 1 + i_suffix_length, + (1<= 1<<12 ) { - if( (unsigned)(level[idx]+1) > 2 ) + if( h->sps->i_profile_idc >= PROFILE_HIGH ) { - i_trailing = idx; - break; + while( i_level_code > 1<<(i_level_prefix-3) ) + { + i_level_code -= 1<<(i_level_prefix-3); + i_level_prefix++; + } + } + else + { +#if RDO_SKIP_BS + /* Weight highly against overflows. */ + s->i_bits_encoded += 1000000; +#else + x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d is only allowed in High Profile\n", i_level_code ); + /* clip level, preserving sign */ + i_level_code = (1<<12) - 2 + (i_level_code & 1); +#endif } - i_sign <<= 1; - i_sign |= level[idx] < 0; } - } + bs_write( s, i_level_prefix + 1, 1 ); + bs_write( s, i_level_prefix - 3, i_level_code & ((1<<(i_level_prefix-3))-1) ); + } + if( i_suffix_length == 0 ) + i_suffix_length++; + if( abs_level > next_suffix[i_suffix_length] ) + i_suffix_length++; + return i_suffix_length; +} + +static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, int16_t *l, int nC ) +{ + static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0}; + static const int count_cat[5] = {16, 15, 16, 4, 15}; + x264_run_level_t runlevel; + int i_trailing, i_total_zero, i_suffix_length, i; + int i_total = 0; + unsigned int i_sign; + + /* level and run and total */ + /* set these to 2 to allow branchless i_trailing calculation */ + runlevel.level[1] = 2; + runlevel.level[2] = 2; + i_total = h->quantf.coeff_level_run[i_ctxBlockCat]( l, &runlevel ); + i_total_zero = runlevel.last + 1 - i_total; + + i_trailing = ((((runlevel.level[0]+1) | (1-runlevel.level[0])) >> 31) & 1) // abs(runlevel.level[0])>1 + | ((((runlevel.level[1]+1) | (1-runlevel.level[1])) >> 31) & 2) + | ((((runlevel.level[2]+1) | (1-runlevel.level[2])) >> 31) & 4); + i_trailing = ctz_index[i_trailing]; + i_sign = ((runlevel.level[2] >> 31) & 1) + | ((runlevel.level[1] >> 31) & 2) + | ((runlevel.level[0] >> 31) & 4); + i_sign >>= 3-i_trailing; /* total/trailing */ - if( i_idx == BLOCK_INDEX_CHROMA_DC ) - { - bs_write_vlc( s, x264_coeff_token[4][i_total*4+i_trailing] ); - } - else - { - /* x264_mb_predict_non_zero_code return 0 <-> (16+16+1)>>1 = 16 */ - static const int ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3 }; - int nC = x264_mb_predict_non_zero_code( h, i_idx == BLOCK_INDEX_LUMA_DC ? 0 : i_idx ); - bs_write_vlc( s, x264_coeff_token[ct_index[nC]][i_total*4+i_trailing] ); - } + bs_write_vlc( s, x264_coeff_token[nC][i_total*4+i_trailing-4] ); - if( i_total <= 0 ) - return; + i_suffix_length = i_total > 10 && i_trailing < 3; + bs_write( s, i_trailing, i_sign ); - i_suffix_length = i_total > 10 && i_trailing < 3 ? 1 : 0; - if( i_trailing > 0 ) - { - bs_write( s, i_trailing, i_sign ); - } - for( i = i_trailing; i < i_total; i++ ) + if( i_trailing < i_total ) { - int mask = level[i] >> 15; - int abs_level = (level[i]^mask)-mask; - int i_level_code = abs_level*2-mask-2; + int16_t val = runlevel.level[i_trailing]; + int16_t val_original = runlevel.level[i_trailing]+LEVEL_TABLE_SIZE/2; + if( i_trailing < 3 ) + val -= (val>>15)|1; /* as runlevel.level[i] can't be 1 for the first one if i_trailing < 3 */ + val += LEVEL_TABLE_SIZE/2; - if( i == i_trailing && i_trailing < 3 ) - i_level_code -= 2; /* as level[i] can't be 1 for the first one if i_trailing < 3 */ - - if( ( i_level_code >> i_suffix_length ) < 14 ) - { - bs_write( s, (i_level_code >> i_suffix_length) + 1 + i_suffix_length, - (1< 0 && ( i_level_code >> i_suffix_length ) == 14 ) + if( (unsigned)val_original < LEVEL_TABLE_SIZE ) { - bs_write( s, 15 + i_suffix_length, - (1<= 1<<12 ) + val = runlevel.level[i] + LEVEL_TABLE_SIZE/2; + if( (unsigned)val < LEVEL_TABLE_SIZE ) { - if( h->sps->i_profile_idc >= PROFILE_HIGH ) - { - while( i_level_code > 1<<(i_level_prefix-3) ) - { - i_level_code -= 1<<(i_level_prefix-3); - i_level_prefix++; - } - } - else - { -#ifdef RDO_SKIP_BS - /* Weight highly against overflows. */ - s->i_bits_encoded += 1000000; -#else - x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d is only allowed in High Profile", i_level_code ); - /* clip level, preserving sign */ - i_level_code = (1<<12) - 2 + (i_level_code & 1); -#endif - } + bs_write_vlc( s, x264_level_token[i_suffix_length][val] ); + i_suffix_length = x264_level_token[i_suffix_length][val].i_next; } - bs_write( s, i_level_prefix + 1, 1 ); - bs_write( s, i_level_prefix - 3, i_level_code & ((1<<(i_level_prefix-3))-1) ); + else + i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 ); } - - if( i_suffix_length == 0 ) - i_suffix_length++; - if( abs_level > (3 << (i_suffix_length-1)) && i_suffix_length < 6 ) - i_suffix_length++; } - if( i_total < i_count ) + if( i_total < count_cat[i_ctxBlockCat] ) { - if( i_idx == BLOCK_INDEX_CHROMA_DC ) + if( i_ctxBlockCat == DCT_CHROMA_DC ) bs_write_vlc( s, x264_total_zeros_dc[i_total-1][i_total_zero] ); else bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] ); @@ -208,10 +181,24 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * for( i = 0; i < i_total-1 && i_total_zero > 0; i++ ) { - int i_zl = X264_MIN( i_total_zero - 1, 6 ); - bs_write_vlc( s, x264_run_before[i_zl][run[i]] ); - i_total_zero -= run[i]; + int i_zl = X264_MIN( i_total_zero, 7 ); + bs_write_vlc( s, x264_run_before[i_zl-1][runlevel.run[i]] ); + i_total_zero -= runlevel.run[i]; } + + return i_total; +} + +static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3}; + +#define block_residual_write_cavlc(h,s,cat,idx,l)\ +{\ + int nC = cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, cat == DCT_LUMA_DC ? 0 : idx )];\ + uint8_t *nnz = &h->mb.cache.non_zero_count[x264_scan8[idx]];\ + if( !*nnz )\ + bs_write_vlc( s, x264_coeff0_token[nC] );\ + else\ + *nnz = block_residual_write_cavlc(h,s,cat,l,nC);\ } static void cavlc_qp_delta( x264_t *h, bs_t *s ) @@ -220,9 +207,9 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s ) /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */ if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) - && !array_non_zero(h->dct.luma16x16_dc) ) + && !h->mb.cache.non_zero_count[x264_scan8[24]] ) { -#ifndef RDO_SKIP_BS +#if !RDO_SKIP_BS h->mb.i_qp = h->mb.i_last_qp; #endif i_dqp = 0; @@ -240,67 +227,51 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s ) static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width ) { - DECLARE_ALIGNED_4( int16_t mvp[2] ); + ALIGNED_4( int16_t mvp[2] ); x264_mb_predict_mv( h, i_list, idx, width, mvp ); bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] ); bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] ); } -static void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i_list, int i ) +static inline void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i ) { - if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) - return; - switch( h->mb.i_sub_partition[i] ) { case D_L0_8x8: - case D_L1_8x8: - case D_BI_8x8: - cavlc_mb_mvd( h, s, i_list, 4*i, 2 ); + cavlc_mb_mvd( h, s, 0, 4*i, 2 ); break; case D_L0_8x4: - case D_L1_8x4: - case D_BI_8x4: - cavlc_mb_mvd( h, s, i_list, 4*i+0, 2 ); - cavlc_mb_mvd( h, s, i_list, 4*i+2, 2 ); + cavlc_mb_mvd( h, s, 0, 4*i+0, 2 ); + cavlc_mb_mvd( h, s, 0, 4*i+2, 2 ); break; case D_L0_4x8: - case D_L1_4x8: - case D_BI_4x8: - cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 ); - cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 ); + cavlc_mb_mvd( h, s, 0, 4*i+0, 1 ); + cavlc_mb_mvd( h, s, 0, 4*i+1, 1 ); break; case D_L0_4x4: - case D_L1_4x4: - case D_BI_4x4: - cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 ); - cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 ); - cavlc_mb_mvd( h, s, i_list, 4*i+2, 1 ); - cavlc_mb_mvd( h, s, i_list, 4*i+3, 1 ); + cavlc_mb_mvd( h, s, 0, 4*i+0, 1 ); + cavlc_mb_mvd( h, s, 0, 4*i+1, 1 ); + cavlc_mb_mvd( h, s, 0, 4*i+2, 1 ); + cavlc_mb_mvd( h, s, 0, 4*i+3, 1 ); break; } } static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end ) { - int i8, i4, i; + int i8, i4; if( h->mb.b_transform_8x8 ) { /* shuffle 8x8 dct coeffs into 4x4 lists */ for( i8 = i8start; i8 <= i8end; i8++ ) if( h->mb.i_cbp_luma & (1 << i8) ) - for( i4 = 0; i4 < 4; i4++ ) - for( i = 0; i < 16; i++ ) - h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4]; + h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4], h->dct.luma8x8[i8], &h->mb.cache.non_zero_count[x264_scan8[i8*4]] ); } for( i8 = i8start; i8 <= i8end; i8++ ) if( h->mb.i_cbp_luma & (1 << i8) ) for( i4 = 0; i4 < 4; i4++ ) - { - h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = array_non_zero_count( h->dct.luma4x4[i4+i8*4] ); - block_residual_write_cavlc( h, s, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 ); - } + block_residual_write_cavlc( h, s, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] ); } /***************************************************************************** @@ -309,42 +280,28 @@ static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8s void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) { const int i_mb_type = h->mb.i_type; - int i_mb_i_offset; + static const int i_offsets[3] = {5,23,0}; + int i_mb_i_offset = i_offsets[h->sh.i_type]; int i; -#ifndef RDO_SKIP_BS +#if !RDO_SKIP_BS const int i_mb_pos_start = bs_pos( s ); int i_mb_pos_tex; #endif - switch( h->sh.i_type ) - { - case SLICE_TYPE_I: - i_mb_i_offset = 0; - break; - case SLICE_TYPE_P: - i_mb_i_offset = 5; - break; - case SLICE_TYPE_B: - i_mb_i_offset = 23; - break; - default: - x264_log(h, X264_LOG_ERROR, "internal error or slice unsupported\n" ); - return; - } - if( h->sh.b_mbaff && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) ) { bs_write1( s, h->mb.b_interlaced ); } -#ifndef RDO_SKIP_BS - if( i_mb_type == I_PCM) +#if !RDO_SKIP_BS + if( i_mb_type == I_PCM ) { + uint8_t *p_start = s->p_start; bs_write_ue( s, i_mb_i_offset + 25 ); i_mb_pos_tex = bs_pos( s ); - h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start; + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start; bs_align_0( s ); @@ -357,12 +314,15 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 ); s->p += 64; + bs_init( s, s->p, s->p_end - s->p ); + s->p_start = p_start; + /* if PCM is chosen, we need to store reconstructed frame data */ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 ); h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 ); h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 ); - h->stat.frame.i_itex_bits += bs_pos(s) - i_mb_pos_tex; + h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex; return; } #endif @@ -384,16 +344,10 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) int i_pred = x264_mb_predict_intra4x4_mode( h, i ); int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] ); - if( i_pred == i_mode) - { + if( i_pred == i_mode ) bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */ - } else - { - if( i_mode >= i_pred ) - i_mode--; - bs_write( s, 4, i_mode ); - } + bs_write( s, 4, i_mode - (i_mode > i_pred) ); } bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] ); } @@ -405,19 +359,13 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) } else if( i_mb_type == P_L0 ) { - DECLARE_ALIGNED_4( int16_t mvp[2] ); - if( h->mb.i_partition == D_16x16 ) { - bs_write_ue( s, 0 ); + bs_write1( s, 1 ); if( h->mb.pic.i_fref[0] > 1 ) - { bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); - } - x264_mb_predict_mv( h, 0, 0, 4, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][1] - mvp[1] ); + cavlc_mb_mvd( h, s, 0, 0, 4 ); } else if( h->mb.i_partition == D_16x8 ) { @@ -427,14 +375,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] ); } - - x264_mb_predict_mv( h, 0, 0, 4, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][1] - mvp[1] ); - - x264_mb_predict_mv( h, 0, 8, 4, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[8]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[8]][1] - mvp[1] ); + cavlc_mb_mvd( h, s, 0, 0, 4 ); + cavlc_mb_mvd( h, s, 0, 8, 4 ); } else if( h->mb.i_partition == D_8x16 ) { @@ -444,38 +386,34 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] ); } - - x264_mb_predict_mv( h, 0, 0, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][1] - mvp[1] ); - - x264_mb_predict_mv( h, 0, 4, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4]][1] - mvp[1] ); + cavlc_mb_mvd( h, s, 0, 0, 2 ); + cavlc_mb_mvd( h, s, 0, 4, 2 ); } } else if( i_mb_type == P_8x8 ) { - int b_sub_ref0; - - if( h->mb.cache.ref[0][x264_scan8[0]] == 0 && h->mb.cache.ref[0][x264_scan8[4]] == 0 && - h->mb.cache.ref[0][x264_scan8[8]] == 0 && h->mb.cache.ref[0][x264_scan8[12]] == 0 ) + int b_sub_ref; + if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] | + h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 ) { bs_write_ue( s, 4 ); - b_sub_ref0 = 0; + b_sub_ref = 0; } else { bs_write_ue( s, 3 ); - b_sub_ref0 = 1; + b_sub_ref = 1; } + /* sub mb type */ - for( i = 0; i < 4; i++ ) - { - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] ); - } + if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 ) + for( i = 0; i < 4; i++ ) + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] ); + else + bs_write( s, 4, 0xf ); + /* ref0 */ - if( h->mb.pic.i_fref[0] > 1 && b_sub_ref0 ) + if( b_sub_ref ) { bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] ); @@ -484,7 +422,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) } for( i = 0; i < 4; i++ ) - cavlc_mb8x8_mvd( h, s, 0, i ); + cavlc_mb8x8_mvd( h, s, i ); } else if( i_mb_type == B_8x8 ) { @@ -492,144 +430,81 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) /* sub mb type */ for( i = 0; i < 4; i++ ) - { bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] ); - } + /* ref */ + if( h->mb.pic.i_fref[0] > 1 ) + for( i = 0; i < 4; i++ ) + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] ); + if( h->mb.pic.i_fref[1] > 1 ) + for( i = 0; i < 4; i++ ) + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) + bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] ); + + /* mvd */ for( i = 0; i < 4; i++ ) - { if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) - { - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] ); - } - } + cavlc_mb_mvd( h, s, 0, 4*i, 2 ); for( i = 0; i < 4; i++ ) - { if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) - { - bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] ); - } - } - /* mvd */ - for( i = 0; i < 4; i++ ) - cavlc_mb8x8_mvd( h, s, 0, i ); - for( i = 0; i < 4; i++ ) - cavlc_mb8x8_mvd( h, s, 1, i ); + cavlc_mb_mvd( h, s, 1, 4*i, 2 ); } else if( i_mb_type != B_DIRECT ) { /* All B mode */ /* Motion Vector */ - int i_list; - DECLARE_ALIGNED_4( int16_t mvp[2] ); - - int b_list[2][2]; + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type]; + const int i_ref0_max = h->mb.pic.i_fref[0] - 1; + const int i_ref1_max = h->mb.pic.i_fref[1] - 1; - /* init ref list utilisations */ - for( i = 0; i < 2; i++ ) + bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] ); + if( h->mb.i_partition == D_16x16 ) { - b_list[0][i] = x264_mb_type_list0_table[i_mb_type][i]; - b_list[1][i] = x264_mb_type_list1_table[i_mb_type][i]; + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] ); + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] ); + if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 ); + if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 ); } - - - bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] ); - - for( i_list = 0; i_list < 2; i_list++ ) + else { - const int i_ref_max = i_list == 0 ? h->mb.pic.i_fref[0] : h->mb.pic.i_fref[1]; - - if( i_ref_max > 1 ) + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] ); + if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] ); + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] ); + if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] ); + if( h->mb.i_partition == D_16x8 ) { - switch( h->mb.i_partition ) - { - case D_16x16: - if( b_list[i_list][0] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[0]] ); - break; - case D_16x8: - if( b_list[i_list][0] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[0]] ); - if( b_list[i_list][1] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[8]] ); - break; - case D_8x16: - if( b_list[i_list][0] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[0]] ); - if( b_list[i_list][1] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[4]] ); - break; - } + if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 ); + if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 8, 4 ); + if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 ); + if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 8, 4 ); } - } - for( i_list = 0; i_list < 2; i_list++ ) - { - switch( h->mb.i_partition ) + else //if( h->mb.i_partition == D_8x16 ) { - case D_16x16: - if( b_list[i_list][0] ) - { - x264_mb_predict_mv( h, i_list, 0, 4, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][1] - mvp[1] ); - } - break; - case D_16x8: - if( b_list[i_list][0] ) - { - x264_mb_predict_mv( h, i_list, 0, 4, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][1] - mvp[1] ); - } - if( b_list[i_list][1] ) - { - x264_mb_predict_mv( h, i_list, 8, 4, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[8]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[8]][1] - mvp[1] ); - } - break; - case D_8x16: - if( b_list[i_list][0] ) - { - x264_mb_predict_mv( h, i_list, 0, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[0]][1] - mvp[1] ); - } - if( b_list[i_list][1] ) - { - x264_mb_predict_mv( h, i_list, 4, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4]][1] - mvp[1] ); - } - break; + if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 2 ); + if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 4, 2 ); + if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 2 ); + if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 4, 2 ); } } } - else if( i_mb_type == B_DIRECT ) - { - bs_write_ue( s, 0 ); - } - else - { - x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" ); - return; - } + else //if( i_mb_type == B_DIRECT ) + bs_write1( s, 1 ); -#ifndef RDO_SKIP_BS +#if !RDO_SKIP_BS i_mb_pos_tex = bs_pos( s ); - h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start; + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start; #endif /* Coded block patern */ if( i_mb_type == I_4x4 || i_mb_type == I_8x8 ) - { bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] ); - } else if( i_mb_type != I_16x16 ) - { bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] ); - } /* transform size 8x8 flag */ if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma ) - { bs_write1( s, h->mb.b_transform_8x8 ); - } /* write residual */ if( i_mb_type == I_16x16 ) @@ -637,104 +512,92 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) cavlc_qp_delta( h, s ); /* DC Luma */ - block_residual_write_cavlc( h, s, BLOCK_INDEX_LUMA_DC , h->dct.luma16x16_dc, 16 ); + block_residual_write_cavlc( h, s, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc ); /* AC Luma */ - if( h->mb.i_cbp_luma != 0 ) + if( h->mb.i_cbp_luma ) for( i = 0; i < 16; i++ ) - { - h->mb.cache.non_zero_count[x264_scan8[i]] = array_non_zero_count( h->dct.luma4x4[i] ); - block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 ); - } + block_residual_write_cavlc( h, s, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 ); } - else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 ) + else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma ) { cavlc_qp_delta( h, s ); x264_macroblock_luma_write_cavlc( h, s, 0, 3 ); } - if( h->mb.i_cbp_chroma != 0 ) + if( h->mb.i_cbp_chroma ) { /* Chroma DC residual present */ - block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[0], 4 ); - block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 ); + block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] ); + block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] ); if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */ for( i = 16; i < 24; i++ ) - { - h->mb.cache.non_zero_count[x264_scan8[i]] = array_non_zero_count( h->dct.luma4x4[i] ); - block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 ); - } + block_residual_write_cavlc( h, s, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 ); } -#ifndef RDO_SKIP_BS - if( IS_INTRA( i_mb_type ) ) - h->stat.frame.i_itex_bits += bs_pos(s) - i_mb_pos_tex; - else - h->stat.frame.i_ptex_bits += bs_pos(s) - i_mb_pos_tex; +#if !RDO_SKIP_BS + h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex; #endif } -#ifdef RDO_SKIP_BS +#if RDO_SKIP_BS /***************************************************************************** * RD only; doesn't generate a valid bitstream * doesn't write cbp or chroma dc (I don't know how much this matters) + * doesn't write ref (never varies between calls, so no point in doing so) + * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO * works on all partition sizes except 16x16 - * for sub8x8, call once per 8x8 block *****************************************************************************/ static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel ) { - bs_t s; const int i_mb_type = h->mb.i_type; + int b_8x16 = h->mb.i_partition == D_8x16; int j; - - s.i_bits_encoded = 0; + h->out.bs.i_bits_encoded = 0; if( i_mb_type == P_8x8 ) { - bs_write_ue( &s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] ); - if( h->mb.pic.i_fref[0] > 1 ) - bs_write_te( &s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4*i8]] ); - cavlc_mb8x8_mvd( h, &s, 0, i8 ); + cavlc_mb8x8_mvd( h, &h->out.bs, i8 ); + bs_write_ue( &h->out.bs, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] ); } else if( i_mb_type == P_L0 ) + cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 ); + else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 ) { - if( h->mb.pic.i_fref[0] > 1 ) - bs_write_te( &s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4*i8]] ); - if( h->mb.i_partition == D_16x8 ) - cavlc_mb_mvd( h, &s, 0, 4*i8, 4 ); - else //8x16 - cavlc_mb_mvd( h, &s, 0, 4*i8, 2 ); - } - else if( i_mb_type == B_8x8 ) - { - bs_write_ue( &s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i8] ] ); - - if( h->mb.pic.i_fref[0] > 1 - && x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] ) - bs_write_te( &s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4*i8]] ); - if( h->mb.pic.i_fref[1] > 1 - && x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) - bs_write_te( &s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[4*i8]] ); - - cavlc_mb8x8_mvd( h, &s, 0, i8 ); - cavlc_mb8x8_mvd( h, &s, 1, i8 ); + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 ); + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 4>>b_8x16 ); } - else + else //if( i_mb_type == B_8x8 ) { - x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" ); - return 0; + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] ) + cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 2 ); + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) + cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 2 ); } for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- ) { - x264_macroblock_luma_write_cavlc( h, &s, i8, i8 ); - h->mb.cache.non_zero_count[x264_scan8[16+i8]] = array_non_zero_count( h->dct.luma4x4[16+i8] ); - block_residual_write_cavlc( h, &s, 16+i8, h->dct.luma4x4[16+i8]+1, 15 ); - h->mb.cache.non_zero_count[x264_scan8[20+i8]] = array_non_zero_count( h->dct.luma4x4[20+i8] ); - block_residual_write_cavlc( h, &s, 20+i8, h->dct.luma4x4[20+i8]+1, 15 ); + x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 ); + block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 ); + block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 ); i8 += x264_pixel_size[i_pixel].h >> 3; } - return s.i_bits_encoded; + return h->out.bs.i_bits_encoded; +} + +static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel ) +{ + int b_8x4 = i_pixel == PIXEL_8x4; + h->out.bs.i_bits_encoded = 0; + cavlc_mb_mvd( h, &h->out.bs, 0, i4, 1+b_8x4 ); + block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] ); + if( i_pixel != PIXEL_4x4 ) + { + i4 += 2-b_8x4; + block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] ); + } + + return h->out.bs.i_bits_encoded; } static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode ) @@ -747,42 +610,32 @@ static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode ) static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode ) { - int i4, i; h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode ); - for( i4 = 0; i4 < 4; i4++ ) - { - for( i = 0; i < 16; i++ ) - h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4]; - h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = - array_non_zero_count( h->dct.luma4x4[i4+i8*4] ); - block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 ); - } + bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] ); + x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 ); return h->out.bs.i_bits_encoded; } static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode ) { h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode ); - block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.luma4x4[i4], 16 ); + block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] ); return h->out.bs.i_bits_encoded; } static int x264_i8x8_chroma_size_cavlc( x264_t *h ) { h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] ); - if( h->mb.i_cbp_chroma != 0 ) + if( h->mb.i_cbp_chroma ) { - block_residual_write_cavlc( h, &h->out.bs, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[0], 4 ); - block_residual_write_cavlc( h, &h->out.bs, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 ); + block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] ); + block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] ); if( h->mb.i_cbp_chroma == 2 ) { int i; for( i = 16; i < 24; i++ ) - { - h->mb.cache.non_zero_count[x264_scan8[i]] = array_non_zero_count( h->dct.luma4x4[i] ); - block_residual_write_cavlc( h, &h->out.bs, i, h->dct.luma4x4[i]+1, 15 ); - } + block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 ); } } return h->out.bs.i_bits_encoded;