X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Faaccoder.c;h=fd9785e26a46b91b076360b0867021dbb15f3dfe;hb=7bbd060324f05a32aa3cc748ea484abf499cfbd8;hp=10ea14b1410380490c9eb2c38398ea742fed7112;hpb=a2426798d6020d944985c478d08d432f8dbdbf02;p=ffmpeg diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 10ea14b1410..fd9785e26a4 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -33,7 +33,9 @@ #include "libavutil/libm.h" // brought forward to work around cygwin header breakage #include + #include "libavutil/mathematics.h" +#include "mathops.h" #include "avcodec.h" #include "put_bits.h" #include "aac.h" @@ -46,13 +48,11 @@ #include "aacenc_is.h" #include "aacenc_tns.h" +#include "aacenc_ltp.h" #include "aacenc_pred.h" #include "libavcodec/aaccoder_twoloop.h" -/** Frequency in Hz for lower limit of noise substitution **/ -#define NOISE_LOW_LIMIT 4000 - /* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread * beyond which no PNS is used (since the SFBs contain tone rather than noise) */ #define NOISE_SPREAD_THRESHOLD 0.5073f @@ -124,7 +124,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce rd += quantize_band_cost(s, &sce->coeffs[start + w*128], &s->scoefs[start + w*128], size, sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb], - lambda / band->threshold, INFINITY, NULL, 0); + lambda / band->threshold, INFINITY, NULL, NULL, 0); } cost_stay_here = path[swb][cb].cost + rd; cost_get_here = minrd + rd + run_bits + 4; @@ -335,7 +335,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], - q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0); + q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0); } minrd = FFMIN(minrd, dist); @@ -499,7 +499,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, ESC_BT, lambda, INFINITY, - &b, + &b, NULL, 0); dist -= b; } @@ -588,12 +588,36 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne { FFPsyBand *band; int w, g, w2, i; + int wlen = 1024 / sce->ics.num_windows; + int bandwidth, cutoff; float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128]; float *NOR34 = &s->scoefs[3*128]; const float lambda = s->lambda; - const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f; + const float freq_mult = avctx->sample_rate*0.5f/wlen; const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda); - const float spread_threshold = NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f); + const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); + const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f); + const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); + + int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate + / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) + * (lambda / 120.f); + + /** Keep this in sync with twoloop's cutoff selection */ + float rate_bandwidth_multiplier = 1.5f; + int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE) + ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) + : (avctx->bit_rate / avctx->channels); + + frame_bit_rate *= 1.15f; + + if (avctx->cutoff > 0) { + bandwidth = avctx->cutoff; + } else { + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); + } + + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { @@ -602,32 +626,44 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne int noise_sfi; float dist1 = 0.0f, dist2 = 0.0f, noise_amp; float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh; - float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f; + float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; + float min_energy = -1.0f, max_energy = 0.0f; const int start = wstart+sce->ics.swb_offset[g]; const float freq = (start-wstart)*freq_mult; const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); - if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff) + if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) continue; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; sfb_energy += band->energy; - spread += band->spread; + spread = FFMIN(spread, band->spread); threshold += band->threshold; + if (!w2) { + min_energy = max_energy = band->energy; + } else { + min_energy = FFMIN(min_energy, band->energy); + max_energy = FFMAX(max_energy, band->energy); + } } /* Ramps down at ~8000Hz and loosens the dist threshold */ - dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 2.5f); - - /* zero and energy close to threshold usually means hole avoidance, - * we do want to remain avoiding holes with PNS + dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias; + + /* PNS is acceptable when all of these are true: + * 1. high spread energy (noise-like band) + * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) + * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) + * + * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important) */ if (((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.5f/freq_boost)) || spread < spread_threshold || - (sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost)) { + (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) || + min_energy < pns_transient_energy_r * max_energy ) { sce->pns_ener[w*16+g] = sfb_energy; continue; } - pns_tgt_energy = sfb_energy*spread*spread/sce->ics.group_len[w]; + pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread); noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */ noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { @@ -648,13 +684,18 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne sce->ics.swb_sizes[g], sce->sf_idx[(w+w2)*16+g], sce->band_alt[(w+w2)*16+g], - lambda/band->threshold, INFINITY, NULL, 0); - /* Estimate rd on average as 9 bits for CB and sf + spread energy * lambda/thr */ - dist2 += 9+band->energy/(band->spread*band->spread)*lambda/band->threshold; + lambda/band->threshold, INFINITY, NULL, NULL, 0); + /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */ + dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold; + } + if (g && sce->sf_idx[(w+w2)*16+g-1] == NOISE_BT) { + dist2 += 5; + } else { + dist2 += 9; } energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */ sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy; - if (energy_ratio > 0.85f && energy_ratio < 1.25f && (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || dist2*dist_thresh < dist1)) { + if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) { sce->band_type[w*16+g] = NOISE_BT; sce->zeroes[w*16+g] = 0; } @@ -662,62 +703,203 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne } } +static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) +{ + FFPsyBand *band; + int w, g, w2; + int wlen = 1024 / sce->ics.num_windows; + int bandwidth, cutoff; + const float lambda = s->lambda; + const float freq_mult = avctx->sample_rate*0.5f/wlen; + const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); + const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); + + int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate + / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) + * (lambda / 120.f); + + /** Keep this in sync with twoloop's cutoff selection */ + float rate_bandwidth_multiplier = 1.5f; + int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE) + ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) + : (avctx->bit_rate / avctx->channels); + + frame_bit_rate *= 1.15f; + + if (avctx->cutoff > 0) { + bandwidth = avctx->cutoff; + } else { + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); + } + + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; + + memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; + float min_energy = -1.0f, max_energy = 0.0f; + const int start = sce->ics.swb_offset[g]; + const float freq = start*freq_mult; + const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); + if (freq < NOISE_LOW_LIMIT || start >= cutoff) { + sce->can_pns[w*16+g] = 0; + continue; + } + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + sfb_energy += band->energy; + spread = FFMIN(spread, band->spread); + threshold += band->threshold; + if (!w2) { + min_energy = max_energy = band->energy; + } else { + min_energy = FFMIN(min_energy, band->energy); + max_energy = FFMAX(max_energy, band->energy); + } + } + + /* PNS is acceptable when all of these are true: + * 1. high spread energy (noise-like band) + * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) + * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) + */ + sce->pns_ener[w*16+g] = sfb_energy; + if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) { + sce->can_pns[w*16+g] = 0; + } else { + sce->can_pns[w*16+g] = 1; + } + } + } +} + static void search_for_ms(AACEncContext *s, ChannelElement *cpe) { - int start = 0, i, w, w2, g; + int start = 0, i, w, w2, g, sid_sf_boost; float M[128], S[128]; float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3; const float lambda = s->lambda; + const float mslambda = FFMIN(1.0f, lambda / 120.f); SingleChannelElement *sce0 = &cpe->ch[0]; SingleChannelElement *sce1 = &cpe->ch[1]; if (!cpe->common_window) return; for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { + int min_sf_idx_mid = SCALE_MAX_POS; + int min_sf_idx_side = SCALE_MAX_POS; + for (g = 0; g < sce0->ics.num_swb; g++) { + if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT) + min_sf_idx_mid = FFMIN(min_sf_idx_mid, sce0->sf_idx[w*16+g]); + if (!sce1->zeroes[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT) + min_sf_idx_side = FFMIN(min_sf_idx_side, sce1->sf_idx[w*16+g]); + } + start = 0; for (g = 0; g < sce0->ics.num_swb; g++) { + float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; + cpe->ms_mask[w*16+g] = 0; if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { - float dist1 = 0.0f, dist2 = 0.0f; + float Mmax = 0.0f, Smax = 0.0f; + + /* Must compute mid/side SF and book for the whole window group */ for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { - FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; - FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; - float minthr = FFMIN(band0->threshold, band1->threshold); - float maxthr = FFMAX(band0->threshold, band1->threshold); for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { M[i] = (sce0->coeffs[start+(w+w2)*128+i] + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; S[i] = M[i] - sce1->coeffs[start+(w+w2)*128+i]; } - abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); - abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); - dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], - L34, - sce0->ics.swb_sizes[g], - sce0->sf_idx[(w+w2)*16+g], - sce0->band_type[(w+w2)*16+g], - lambda / band0->threshold, INFINITY, NULL, 0); - dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], - R34, - sce1->ics.swb_sizes[g], - sce1->sf_idx[(w+w2)*16+g], - sce1->band_type[(w+w2)*16+g], - lambda / band1->threshold, INFINITY, NULL, 0); - dist2 += quantize_band_cost(s, M, - M34, - sce0->ics.swb_sizes[g], - sce0->sf_idx[(w+w2)*16+g], - sce0->band_type[(w+w2)*16+g], - lambda / maxthr, INFINITY, NULL, 0); - dist2 += quantize_band_cost(s, S, - S34, - sce1->ics.swb_sizes[g], - sce1->sf_idx[(w+w2)*16+g], - sce1->band_type[(w+w2)*16+g], - lambda / minthr, INFINITY, NULL, 0); + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); + for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) { + Mmax = FFMAX(Mmax, M34[i]); + Smax = FFMAX(Smax, S34[i]); + } + } + + for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) { + float dist1 = 0.0f, dist2 = 0.0f; + int B0 = 0, B1 = 0; + int minidx; + int mididx, sididx; + int midcb, sidcb; + + minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); + mididx = av_clip(minidx, min_sf_idx_mid, min_sf_idx_mid + SCALE_MAX_DIFF); + sididx = av_clip(minidx - sid_sf_boost * 3, min_sf_idx_side, min_sf_idx_side + SCALE_MAX_DIFF); + midcb = find_min_book(Mmax, mididx); + sidcb = find_min_book(Smax, sididx); + + if ((mididx > minidx) || (sididx > minidx)) { + /* scalefactor range violation, bad stuff, will decrease quality unacceptably */ + continue; + } + + /* No CB can be zero */ + midcb = FFMAX(1,midcb); + sidcb = FFMAX(1,sidcb); + + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; + FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; + float minthr = FFMIN(band0->threshold, band1->threshold); + int b1,b2,b3,b4; + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + M[i] = (sce0->coeffs[start+(w+w2)*128+i] + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; + S[i] = M[i] + - sce1->coeffs[start+(w+w2)*128+i]; + } + + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); + dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], + L34, + sce0->ics.swb_sizes[g], + sce0->sf_idx[(w+w2)*16+g], + sce0->band_type[(w+w2)*16+g], + lambda / band0->threshold, INFINITY, &b1, NULL, 0); + dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], + R34, + sce1->ics.swb_sizes[g], + sce1->sf_idx[(w+w2)*16+g], + sce1->band_type[(w+w2)*16+g], + lambda / band1->threshold, INFINITY, &b2, NULL, 0); + dist2 += quantize_band_cost(s, M, + M34, + sce0->ics.swb_sizes[g], + sce0->sf_idx[(w+w2)*16+g], + sce0->band_type[(w+w2)*16+g], + lambda / minthr, INFINITY, &b3, NULL, 0); + dist2 += quantize_band_cost(s, S, + S34, + sce1->ics.swb_sizes[g], + sce1->sf_idx[(w+w2)*16+g], + sce1->band_type[(w+w2)*16+g], + mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0); + B0 += b1+b2; + B1 += b3+b4; + dist1 -= B0; + dist2 -= B1; + } + cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; + if (cpe->ms_mask[w*16+g]) { + /* Setting the M/S mask is useful with I/S, but only the flag */ + if (!cpe->is_mask[w*16+g]) { + sce0->sf_idx[w*16+g] = mididx; + sce1->sf_idx[w*16+g] = sididx; + sce0->band_type[w*16+g] = midcb; + sce1->band_type[w*16+g] = sidcb; + } + break; + } else if (B1 > B0) { + /* More boost won't fix this */ + break; + } } - cpe->ms_mask[w*16+g] = dist2 < dist1; } start += sce0->ics.swb_sizes[g]; } @@ -730,13 +912,19 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { encode_window_bands_info, quantize_and_encode_band, ff_aac_encode_tns_info, + ff_aac_encode_ltp_info, ff_aac_encode_main_pred, - ff_aac_adjust_common_prediction, + ff_aac_adjust_common_pred, + ff_aac_adjust_common_ltp, ff_aac_apply_main_pred, ff_aac_apply_tns, + ff_aac_update_ltp, + ff_aac_ltp_insert_new_frame, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, + ff_aac_search_for_ltp, search_for_ms, ff_aac_search_for_is, ff_aac_search_for_pred, @@ -746,13 +934,19 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { encode_window_bands_info, quantize_and_encode_band, ff_aac_encode_tns_info, + ff_aac_encode_ltp_info, ff_aac_encode_main_pred, - ff_aac_adjust_common_prediction, + ff_aac_adjust_common_pred, + ff_aac_adjust_common_ltp, ff_aac_apply_main_pred, ff_aac_apply_tns, + ff_aac_update_ltp, + ff_aac_ltp_insert_new_frame, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, + ff_aac_search_for_ltp, search_for_ms, ff_aac_search_for_is, ff_aac_search_for_pred, @@ -762,13 +956,19 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { codebook_trellis_rate, quantize_and_encode_band, ff_aac_encode_tns_info, + ff_aac_encode_ltp_info, ff_aac_encode_main_pred, - ff_aac_adjust_common_prediction, + ff_aac_adjust_common_pred, + ff_aac_adjust_common_ltp, ff_aac_apply_main_pred, ff_aac_apply_tns, + ff_aac_update_ltp, + ff_aac_ltp_insert_new_frame, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, + ff_aac_search_for_ltp, search_for_ms, ff_aac_search_for_is, ff_aac_search_for_pred, @@ -778,13 +978,19 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { encode_window_bands_info, quantize_and_encode_band, ff_aac_encode_tns_info, + ff_aac_encode_ltp_info, ff_aac_encode_main_pred, - ff_aac_adjust_common_prediction, + ff_aac_adjust_common_pred, + ff_aac_adjust_common_ltp, ff_aac_apply_main_pred, ff_aac_apply_tns, + ff_aac_update_ltp, + ff_aac_ltp_insert_new_frame, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, + ff_aac_search_for_ltp, search_for_ms, ff_aac_search_for_is, ff_aac_search_for_pred,