X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=encoder%2Frdo.c;h=ded0413ee4f880d11d3a836ead5a2bce45a22099;hb=fd2c324731c2199e502ded9eff723d29c6eafe0b;hp=0067bb075f725e623f8f7e43e78a283287ec7ac9;hpb=27a7b05b8330d0756e5e3f6669282561030f54fa;p=x264 diff --git a/encoder/rdo.c b/encoder/rdo.c index 0067bb07..ded0413e 100644 --- a/encoder/rdo.c +++ b/encoder/rdo.c @@ -1,7 +1,7 @@ /***************************************************************************** * rdo.c: rate-distortion optimization ***************************************************************************** - * Copyright (C) 2005-2012 x264 project + * Copyright (C) 2005-2016 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -180,13 +180,13 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 ) else { x264_macroblock_size_cavlc( h ); - i_bits = ( h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8; + i_bits = ( (uint64_t)h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8; } h->mb.b_transform_8x8 = b_transform_bak; h->mb.i_type = type_bak; - return i_ssd + i_bits; + return X264_MIN( i_ssd + i_bits, COST_MAX ); } /* partition RD functions use 8 bits more precision to avoid large rounding errors at low QPs */ @@ -261,7 +261,7 @@ uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i4, int i_pixel ) i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2; + i_bits = (uint64_t)x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -297,7 +297,7 @@ static uint64_t x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode, i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2; + i_bits = (uint64_t)x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -331,7 +331,7 @@ static uint64_t x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2; + i_bits = (uint64_t)x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -357,7 +357,7 @@ static uint64_t x264_rd_cost_chroma( x264_t *h, int i_lambda2, int i_mode, int b i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_chroma_size_cavlc( h ) * i_lambda2; + i_bits = (uint64_t)x264_chroma_size_cavlc( h ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -634,13 +634,13 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct, const uint8_t *zigzag, int ctx_block_cat, int lambda2, int b_ac, int b_chroma, int dc, int num_coefs, int idx ) { - ALIGNED_ARRAY_16( dctcoef, orig_coefs, [64] ); - ALIGNED_ARRAY_16( dctcoef, quant_coefs, [64] ); + ALIGNED_ARRAY_N( dctcoef, orig_coefs, [64] ); + ALIGNED_ARRAY_N( dctcoef, quant_coefs, [64] ); const uint32_t *coef_weight1 = num_coefs == 64 ? x264_dct8_weight_tab : x264_dct4_weight_tab; const uint32_t *coef_weight2 = num_coefs == 64 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab; const int b_interlaced = MB_INTERLACED; - uint8_t *cabac_state_sig = &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][ctx_block_cat] ]; - uint8_t *cabac_state_last = &h->cabac.state[ last_coeff_flag_offset[b_interlaced][ctx_block_cat] ]; + uint8_t *cabac_state_sig = &h->cabac.state[ x264_significant_coeff_flag_offset[b_interlaced][ctx_block_cat] ]; + uint8_t *cabac_state_last = &h->cabac.state[ x264_last_coeff_flag_offset[b_interlaced][ctx_block_cat] ]; int levelgt1_ctx = b_chroma && dc ? 8 : 9; if( dc ) @@ -683,7 +683,7 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct, } int last_nnz = h->quantf.coeff_last[ctx_block_cat]( quant_coefs+b_ac )+b_ac; - uint8_t *cabac_state = &h->cabac.state[ coeff_abs_level_m1_offset[ctx_block_cat] ]; + uint8_t *cabac_state = &h->cabac.state[ x264_coeff_abs_level_m1_offset[ctx_block_cat] ]; /* shortcut for dc-only blocks. * this doesn't affect the output, but saves some unnecessary computation. */ @@ -1161,5 +1161,6 @@ int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, h->mb.cache.non_zero_count[x264_scan8[idx*4+i]] = nz; nzaccum |= nz; } + STORE_8x8_NNZ( 0, idx, 0 ); return nzaccum; }