25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */
64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */
161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
+ 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
+1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
+2048,2299, /* 80-81 */
};
/* lambda2 = pow(lambda,2) * .9 * 256 */
+/* Capped to avoid overflow */
const int x264_lambda2_tab[QP_MAX_MAX+1] = {
- 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
- 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
- 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
- 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
- 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
- 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
- 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
-5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
+ 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
+ 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
+ 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
+ 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
+ 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
+ 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
+ 943718, 1189010, 1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
+ 5992238, 7549747, 9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
+ 38048341, 47937906, 60397977, 76096683, 95875813,120795955, /* 64-69 */
+134217727,134217727,134217727,134217727,134217727,134217727, /* 70-75 */
+134217727,134217727,134217727,134217727,134217727,134217727, /* 76-81 */
};
const uint8_t x264_exp2_lut[64] = {
// I'm just matching the behaviour of deadzone quant.
static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
// inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
- { 46, 58, 73, 92, 117, 147,
- 185, 233, 294, 370, 466, 587,
- 740, 932, 1174, 1480, 1864, 2349,
- 2959, 3728, 4697, 5918, 7457, 9395,
- 11837, 14914, 18790, 23674, 29828, 37581,
- 47349, 59656, 75163, 94699, 119313, 150326,
- 189399, 238627, 300652, 378798, 477255, 601304,
- 757596, 954511, 1202608, 1515192, 1909022, 2405217,
- 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
- 12121539,15272182,19241743,24243077,30544363,38483486,
- 48486154,61088726,76966972,96972308 },
+ {
+ 46, 58, 73, 92, 117, 147,
+ 185, 233, 294, 370, 466, 587,
+ 740, 932, 1174, 1480, 1864, 2349,
+ 2959, 3728, 4697, 5918, 7457, 9395,
+ 11837, 14914, 18790, 23674, 29828, 37581,
+ 47349, 59656, 75163, 94699, 119313, 150326,
+ 189399, 238627, 300652, 378798, 477255, 601304,
+ 757596, 954511, 1202608, 1515192, 1909022, 2405217,
+ 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
+ 12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
+ 48486154, 61088726, 76966972, 96972308,
+ 122177453,134217727,134217727,134217727,134217727,134217727,
+ 134217727,134217727,134217727,134217727,134217727,134217727,
+ },
// intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
- { 27, 34, 43, 54, 68, 86,
- 108, 136, 172, 216, 273, 343,
- 433, 545, 687, 865, 1090, 1374,
- 1731, 2180, 2747, 3461, 4361, 5494,
- 6922, 8721, 10988, 13844, 17442, 21976,
- 27688, 34885, 43953, 55377, 69771, 87906,
- 110755, 139543, 175813, 221511, 279087, 351627,
- 443023, 558174, 703255, 886046, 1116348, 1406511,
- 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
- 7088374, 8930791,11252092,14176748,17861583,22504184,
- 28353495,35723165,45008368,56706990 }
+ {
+ 27, 34, 43, 54, 68, 86,
+ 108, 136, 172, 216, 273, 343,
+ 433, 545, 687, 865, 1090, 1374,
+ 1731, 2180, 2747, 3461, 4361, 5494,
+ 6922, 8721, 10988, 13844, 17442, 21976,
+ 27688, 34885, 43953, 55377, 69771, 87906,
+ 110755, 139543, 175813, 221511, 279087, 351627,
+ 443023, 558174, 703255, 886046, 1116348, 1406511,
+ 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
+ 7088374, 8930791, 11252092, 14176748, 17861583, 22504184,
+ 28353495, 35723165, 45008368, 56706990,
+ 71446330, 90016736,113413980,134217727,134217727,134217727,
+ 134217727,134217727,134217727,134217727,134217727,134217727,
+ 134217727,134217727,134217727,134217727,134217727,134217727,
+ }
};
-static const uint16_t x264_chroma_lambda2_offset_tab[] = {
+#define MAX_CHROMA_LAMBDA_OFFSET 36
+static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] = {
16, 20, 25, 32, 40, 50,
64, 80, 101, 128, 161, 203,
256, 322, 406, 512, 645, 812,
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
-static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
+static uint16_t x264_cost_ref[QP_MAX+1][3][33];
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
int x264_analyse_init_costs( x264_t *h, int qp )
{
int lambda = x264_lambda_tab[qp];
- if( h->cost_mv[lambda] )
+ if( h->cost_mv[qp] )
return 0;
/* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
- CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) );
- h->cost_mv[lambda] += 2*4*2048;
+ CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
+ h->cost_mv[qp] += 2*4*2048;
for( int i = 0; i <= 2*4*2048; i++ )
{
- h->cost_mv[lambda][-i] =
- h->cost_mv[lambda][i] = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
+ h->cost_mv[qp][-i] =
+ h->cost_mv[qp][i] = X264_MIN( lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f, (1<<16)-1 );
}
x264_pthread_mutex_lock( &cost_ref_mutex );
for( int i = 0; i < 3; i++ )
for( int j = 0; j < 33; j++ )
- x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0;
+ x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
x264_pthread_mutex_unlock( &cost_ref_mutex );
- if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] )
+ if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
{
for( int j = 0; j < 4; j++ )
{
- CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) );
- h->cost_mv_fpel[lambda][j] += 2*2048;
+ CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
+ h->cost_mv_fpel[qp][j] += 2*2048;
for( int i = -2*2048; i < 2*2048; i++ )
- h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j];
+ h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
}
}
return 0;
void x264_analyse_free_costs( x264_t *h )
{
- for( int i = 0; i < LAMBDA_MAX+1; i++ )
+ for( int i = 0; i < QP_MAX+1; i++ )
{
if( h->cost_mv[i] )
x264_free( h->cost_mv[i] - 2*4*2048 );
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
- a->p_cost_mv = h->cost_mv[a->i_lambda];
- a->p_cost_ref[0] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
- a->p_cost_ref[1] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
+ a->p_cost_mv = h->cost_mv[a->i_qp];
+ a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
+ a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
}
-static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp )
+static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
{
- /* conduct the analysis using this lamda and QP */
- a->i_qp = h->mb.i_qp = i_qp;
- h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];
-
- a->i_lambda = x264_lambda_tab[i_qp];
- a->i_lambda2 = x264_lambda2_tab[i_qp];
+ int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
+ a->i_lambda = x264_lambda_tab[qp];
+ a->i_lambda2 = x264_lambda2_tab[qp];
h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
if( h->param.analyse.i_trellis )
{
- h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp];
- h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp];
- h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp];
- h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
+ h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][qp];
+ h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][qp];
+ h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
+ h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
}
h->mb.i_psy_rd_lambda = a->i_lambda;
/* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
- h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
+ int chroma_offset_idx = X264_MIN( qp-effective_chroma_qp+12, MAX_CHROMA_LAMBDA_OFFSET );
+ h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
+
+ if( qp > QP_MAX_SPEC )
+ {
+ h->nr_offset = h->nr_offset_emergency[qp-QP_MAX_SPEC-1];
+ h->nr_residual_sum = h->nr_residual_sum_buf[1];
+ h->nr_count = h->nr_count_buf[1];
+ h->mb.b_noise_reduction = 1;
+ qp = QP_MAX_SPEC; /* Out-of-spec QPs are just used for calculating lambda values. */
+ }
+ else
+ {
+ h->nr_offset = h->nr_offset_denoise;
+ h->nr_residual_sum = h->nr_residual_sum_buf[0];
+ h->nr_count = h->nr_count_buf[0];
+ h->mb.b_noise_reduction = 0;
+ }
+
+ a->i_qp = h->mb.i_qp = qp;
+ h->mb.i_chroma_qp = h->chroma_qp_table[qp];
}
-static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
+static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
{
int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
- x264_mb_analyse_init_qp( h, a, i_qp );
+ x264_mb_analyse_init_qp( h, a, qp );
h->mb.b_transform_8x8 = 0;
- h->mb.b_noise_reduction = 0;
/* I: Intra part */
a->i_satd_i16x16 =
x264_mb_analyse_qp_rd( h, &analysis );
h->mb.b_trellis = h->param.analyse.i_trellis;
- h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction;
+ h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type ));
+
if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
x264_psy_trellis_init( h, 0 );
if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
+ if( h->mb.b_noise_reduction && ctx_block_cat != DCT_LUMA_AC )
+ h->quantf.denoise_dct( dct, h->nr_residual_sum[0], h->nr_offset[0], 16 );
if( h->mb.b_trellis )
return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, 0, idx );
else
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct, h->nr_residual_sum[1], h->nr_offset[1], 64 );
if( h->mb.b_trellis )
return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
else
for( int i = 0; i < 16; i++ )
{
/* copy dc coeff */
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[0], h->nr_offset[0], 16 );
dct_dc4x4[block_idx_xy_1d[i]] = dct4x4[i][0];
dct4x4[i][0] = 0;
int b_decimate = b_inter && h->mb.b_dct_decimate;
ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] );
h->mb.i_cbp_chroma = 0;
+ h->nr_count[2] += h->mb.b_noise_reduction * 4;
/* Early termination: check variance of chroma residual before encoding.
* Don't bother trying early termination at low QPs.
* Values are experimentally derived. */
- if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) )
+ if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) && !h->mb.b_noise_reduction )
{
int thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
int ssd[2];
}
h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
+ if( h->mb.b_noise_reduction )
+ for( int i = 0; i < 4; i++ )
+ h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[2], h->nr_offset[2], 16 );
dct2x2dc( dct2x2, dct4x4 );
/* calculate dct coeffs */
for( int i = 0; i < 4; i++ )
for( int idx = 0; idx < 4; idx++ )
{
- if( h->mb.b_noise_reduction )
- h->quantf.denoise_dct( dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
if( nz )
{
int idx = i8x8 * 4 + i4x4;
- if( h->mb.b_noise_reduction )
- h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
/* encode one 4x4 block */
for( int i4x4 = 0; i4x4 < 4; i4x4++ )
{
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[0], h->nr_offset[0], 16 );
if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ) )
continue;
h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
/* The vast majority of chroma checks will terminate during the DC check or the higher
* threshold check, so we can save time by doing a DC-only DCT. */
- h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );
+ if( h->mb.b_noise_reduction )
+ {
+ h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
+ for( int i4x4 = 0; i4x4 < 4; i4x4++ )
+ {
+ h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
+ dct2x2[i4x4] = dct4x4[i4x4][0];
+ }
+ }
+ else
+ h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );
if( h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 ) )
return 0;
if( ssd < thresh*4 )
continue;
- h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
+ if( !h->mb.b_noise_reduction )
+ h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
/* calculate dct coeffs */
for( int i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
{
dct4x4[i4x4][0] = 0;
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ) )
continue;
h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
void x264_noise_reduction_update( x264_t *h )
{
- for( int cat = 0; cat < 2; cat++ )
+ h->nr_offset = h->nr_offset_denoise;
+ h->nr_residual_sum = h->nr_residual_sum_buf[0];
+ h->nr_count = h->nr_count_buf[0];
+ for( int cat = 0; cat < 3; cat++ )
{
- int size = cat ? 64 : 16;
- const uint16_t *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
+ int dct8x8 = cat == 1;
+ int size = dct8x8 ? 64 : 16;
+ const uint16_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
- if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
+ if( h->nr_count[cat] > (dct8x8 ? (1<<16) : (1<<18)) )
{
for( int i = 0; i < size; i++ )
h->nr_residual_sum[cat][i] >>= 1;
((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
+ h->nr_residual_sum[cat][i]/2)
/ ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
+
+ /* Don't denoise DC coefficients */
+ h->nr_offset[cat][0] = 0;
}
}
ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
-
h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
+ if( h->mb.b_noise_reduction );
+ h->quantf.denoise_dct( dct4x4, h->nr_residual_sum[2], h->nr_offset[2], 16 );
dct4x4[0] = 0;
if( h->mb.b_trellis )