/*****************************************************************************
* rdo.c: rate-distortion optimization
*****************************************************************************
- * Copyright (C) 2005-2011 x264 project
+ * Copyright (C) 2005-2015 x264 project
*
* Authors: Loren Merritt <lorenm@u.washington.edu>
* Fiona Glaser <fiona@x264.com>
else
{
x264_macroblock_size_cavlc( h );
- i_bits = ( h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8;
+ i_bits = ( (uint64_t)h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8;
}
h->mb.b_transform_8x8 = b_transform_bak;
h->mb.i_type = type_bak;
- return i_ssd + i_bits;
+ return X264_MIN( i_ssd + i_bits, COST_MAX );
}
/* partition RD functions use 8 bits more precision to avoid large rounding errors at low QPs */
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
else
- i_bits = x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2;
+ i_bits = (uint64_t)x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2;
return (i_ssd<<8) + i_bits;
}
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
else
- i_bits = x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2;
+ i_bits = (uint64_t)x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2;
return (i_ssd<<8) + i_bits;
}
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
else
- i_bits = x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2;
+ i_bits = (uint64_t)x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2;
return (i_ssd<<8) + i_bits;
}
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
else
- i_bits = x264_chroma_size_cavlc( h ) * i_lambda2;
+ i_bits = (uint64_t)x264_chroma_size_cavlc( h ) * i_lambda2;
return (i_ssd<<8) + i_bits;
}
const uint8_t *zigzag, int ctx_block_cat, int lambda2, int b_ac,
int b_chroma, int dc, int num_coefs, int idx )
{
- ALIGNED_ARRAY_16( dctcoef, orig_coefs, [64] );
- ALIGNED_ARRAY_16( dctcoef, quant_coefs, [64] );
+ ALIGNED_ARRAY_N( dctcoef, orig_coefs, [64] );
+ ALIGNED_ARRAY_N( dctcoef, quant_coefs, [64] );
const uint32_t *coef_weight1 = num_coefs == 64 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
const uint32_t *coef_weight2 = num_coefs == 64 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
const int b_interlaced = MB_INTERLACED;
- uint8_t *cabac_state_sig = &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
- uint8_t *cabac_state_last = &h->cabac.state[ last_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
+ uint8_t *cabac_state_sig = &h->cabac.state[ x264_significant_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
+ uint8_t *cabac_state_last = &h->cabac.state[ x264_last_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
int levelgt1_ctx = b_chroma && dc ? 8 : 9;
if( dc )
}
int last_nnz = h->quantf.coeff_last[ctx_block_cat]( quant_coefs+b_ac )+b_ac;
- uint8_t *cabac_state = &h->cabac.state[ coeff_abs_level_m1_offset[ctx_block_cat] ];
+ uint8_t *cabac_state = &h->cabac.state[ x264_coeff_abs_level_m1_offset[ctx_block_cat] ];
/* shortcut for dc-only blocks.
* this doesn't affect the output, but saves some unnecessary computation. */
{
int cost_sig = x264_cabac_size_decision_noup2( &cabac_state_sig[0], 1 )
+ x264_cabac_size_decision_noup2( &cabac_state_last[0], 1 );
- return dct[0] = trellis_dc_shortcut( orig_coefs[0], quant_coefs[0], unquant_mf[0], coef_weight2[0], lambda2, cabac_state, cost_sig );
+ dct[0] = trellis_dc_shortcut( orig_coefs[0], quant_coefs[0], unquant_mf[0], coef_weight2[0], lambda2, cabac_state, cost_sig );
+ return !!dct[0];
}
+#if HAVE_MMX && ARCH_X86_64
+#define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\
+ cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8)
+ if( num_coefs == 16 && !dc )
+ if( b_chroma || !h->mb.i_psy_trellis )
+ return h->quantf.trellis_cabac_4x4( TRELLIS_ARGS, b_ac );
+ else
+ return h->quantf.trellis_cabac_4x4_psy( TRELLIS_ARGS, b_ac, h->mb.pic.fenc_dct4[idx&15], h->mb.i_psy_trellis );
+ else if( num_coefs == 64 && !dc )
+ if( b_chroma || !h->mb.i_psy_trellis )
+ return h->quantf.trellis_cabac_8x8( TRELLIS_ARGS, b_interlaced );
+ else
+ return h->quantf.trellis_cabac_8x8_psy( TRELLIS_ARGS, b_interlaced, h->mb.pic.fenc_dct8[idx&3], h->mb.i_psy_trellis);
+ else if( num_coefs == 8 && dc )
+ return h->quantf.trellis_cabac_chroma_422_dc( TRELLIS_ARGS );
+ else if( dc )
+ return h->quantf.trellis_cabac_dc( TRELLIS_ARGS, num_coefs-1 );
+#endif
+
// (# of coefs) * (# of ctx) * (# of levels tried) = 1024
// we don't need to keep all of those: (# of coefs) * (# of ctx) would be enough,
// but it takes more time to remove dead states than you gain in reduced memory.
h->mb.cache.non_zero_count[x264_scan8[idx*4+i]] = nz;
nzaccum |= nz;
}
+ STORE_8x8_NNZ( 0, idx, 0 );
return nzaccum;
}