2 * Copyright (c) 2018 The FFmpeg Project
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/audio_fifo.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/channel_layout.h"
26 #include "libavutil/opt.h"
27 #include "libavcodec/avfft.h"
33 #define C (M_LN10 * 0.1)
35 #define RRATIO (1.0 - RATIO)
52 typedef struct DeNoiseChannel {
54 double noise_band_auto_var[15];
55 double noise_band_sample[15];
61 double *prior_band_excit;
65 double *spread_function;
70 FFTContext *fft, *ifft;
72 double noise_band_norm[15];
73 double noise_band_avr[15];
74 double noise_band_avi[15];
75 double noise_band_var[15];
79 double sfm_results[3];
80 int sfm_fail_flags[512];
84 typedef struct AudioFFTDeNoiseContext {
87 float noise_reduction;
96 float last_residual_floor;
97 float last_noise_floor;
98 float last_noise_reduction;
99 float last_noise_balance;
105 int sample_noise_start;
106 int sample_noise_end;
123 DeNoiseChannel *dnch;
128 double window_weight;
133 int noise_band_edge[17];
134 int noise_band_count;
141 } AudioFFTDeNoiseContext;
143 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
144 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
146 static const AVOption afftdn_options[] = {
147 { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, A },
148 { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, A },
149 { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, A, "type" },
150 { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, A, "type" },
151 { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, A, "type" },
152 { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, A, "type" },
153 { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, A, "type" },
154 { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, A },
155 { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, A },
156 { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, A },
157 { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, A },
158 { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, A, "mode" },
159 { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, A, "mode" },
160 { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, A, "mode" },
161 { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, A, "mode" },
165 AVFILTER_DEFINE_CLASS(afftdn);
167 static int get_band_noise(AudioFFTDeNoiseContext *s,
173 d1 = a / s->band_centre[band];
174 d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
175 d2 = b / s->band_centre[band];
176 d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
177 d3 = s->band_centre[band] / c;
178 d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
180 return lrint(-d1 + d2 - d3);
183 static void factor(double *array, int size)
185 for (int i = 0; i < size - 1; i++) {
186 for (int j = i + 1; j < size; j++) {
187 double d = array[j + i * size] / array[i + i * size];
189 array[j + i * size] = d;
190 for (int k = i + 1; k < size; k++) {
191 array[j + k * size] -= d * array[i + k * size];
197 static void solve(double *matrix, double *vector, int size)
199 for (int i = 0; i < size - 1; i++) {
200 for (int j = i + 1; j < size; j++) {
201 double d = matrix[j + i * size];
202 vector[j] -= d * vector[i];
206 vector[size - 1] /= matrix[size * size - 1];
208 for (int i = size - 2; i >= 0; i--) {
209 double d = vector[i];
210 for (int j = i + 1; j < size; j++)
211 d -= matrix[i + j * size] * vector[j];
212 vector[i] = d / matrix[i + i * size];
216 static int process_get_band_noise(AudioFFTDeNoiseContext *s,
217 DeNoiseChannel *dnch,
220 double product, sum, f;
224 return dnch->band_noise[band];
226 for (int j = 0; j < 5; j++) {
228 for (int k = 0; k < 15; k++)
229 sum += s->matrix_b[i++] * dnch->band_noise[k];
230 s->vector_b[j] = sum;
233 solve(s->matrix_a, s->vector_b, 5);
234 f = (0.5 * s->sample_rate) / s->band_centre[14];
235 f = 15.0 + log(f / 1.5) / log(1.5);
238 for (int j = 0; j < 5; j++) {
239 sum += product * s->vector_b[j];
246 static void calculate_sfm(AudioFFTDeNoiseContext *s,
247 DeNoiseChannel *dnch,
250 double d1 = 0.0, d2 = 1.0;
253 for (int k = start; k < end; k++) {
254 if (dnch->noisy_data[k] > s->sample_floor) {
256 d1 += dnch->noisy_data[k];
257 d2 *= dnch->noisy_data[k];
261 } else if (d2 < 1.0E-100) {
269 dnch->sfm_results[0] = d1;
270 d2 = log(d2) + 230.2585 * i;
273 dnch->sfm_results[1] = d1;
274 dnch->sfm_results[2] = d1 - d2;
276 dnch->sfm_results[0] = s->auto_floor;
277 dnch->sfm_results[1] = dnch->sfm_threshold;
278 dnch->sfm_results[2] = dnch->sfm_threshold;
282 static double limit_gain(double a, double b)
285 return (b * a - 1.0) / (b + a - 2.0);
287 return (b * a - 2.0 * a + 1.0) / (b - a);
291 static void process_frame(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
292 FFTComplex *fft_data,
293 double *prior, double *prior_band_excit, int track_noise)
295 double d1, d2, d3, gain;
298 d1 = fft_data[0].re * fft_data[0].re;
299 dnch->noisy_data[0] = d1;
300 d2 = d1 / dnch->abs_var[0];
301 d3 = RATIO * prior[0] + RRATIO * fmax(d2 - 1.0, 0.0);
302 gain = d3 / (1.0 + d3);
303 gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
304 prior[0] = (d2 * gain);
305 dnch->clean_data[0] = (d1 * gain);
307 dnch->gain[0] = gain;
309 for (int i = 1; i < s->fft_length2; i++) {
310 d1 = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im;
311 if (d1 > s->sample_floor)
314 dnch->noisy_data[i] = d1;
315 d2 = d1 / dnch->abs_var[i];
316 d3 = RATIO * prior[i] + RRATIO * fmax(d2 - 1.0, 0.0);
317 gain = d3 / (1.0 + d3);
318 gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
319 prior[i] = d2 * gain;
320 dnch->clean_data[i] = d1 * gain;
322 dnch->gain[i] = gain;
324 d1 = fft_data[0].im * fft_data[0].im;
325 if (d1 > s->sample_floor)
328 dnch->noisy_data[s->fft_length2] = d1;
329 d2 = d1 / dnch->abs_var[s->fft_length2];
330 d3 = RATIO * prior[s->fft_length2] + RRATIO * fmax(d2 - 1.0, 0.0);
331 gain = d3 / (1.0 + d3);
332 gain *= gain + M_PI_4 / fmax(d2, 1.0E-6);
333 prior[s->fft_length2] = d2 * gain;
334 dnch->clean_data[s->fft_length2] = d1 * gain;
336 dnch->gain[s->fft_length2] = gain;
337 if (n > s->fft_length2 - 2) {
339 i1 = s->noise_band_count;
342 for (int i = 0; i <= s->noise_band_count; i++) {
343 if (n > 1.1 * s->noise_band_edge[i]) {
349 if (track_noise && (i1 > s->noise_band_count / 2)) {
350 int j = FFMIN(n, s->noise_band_edge[i1]);
353 for (k = i1 - 1; k >= 0; k--) {
354 int i = s->noise_band_edge[k];
355 calculate_sfm(s, dnch, i, j);
356 dnch->noise_band_sample[k] = dnch->sfm_results[0];
357 if (dnch->sfm_results[2] + 0.013 * m * fmax(0.0, dnch->sfm_results[1] - 20.53) >= dnch->sfm_threshold) {
365 double sum = 0.0, min, max;
368 for (i = i1 - 1; i > k; i--) {
369 min = log(dnch->noise_band_sample[i] / dnch->noise_band_auto_var[i]);
375 min = 3.0E-4 * i * i;
377 min = 3.0E-4 * (8 * i - 16);
380 max = 2.0E-4 * i * i;
382 max = 2.0E-4 * (4 * i - 4);
385 if (s->track_residual) {
386 if (s->last_noise_floor > s->last_residual_floor + 9) {
389 } else if (s->last_noise_floor > s->last_residual_floor + 6) {
392 } else if (s->last_noise_floor > s->last_residual_floor + 4) {
395 } else if (s->last_noise_floor > s->last_residual_floor + 2) {
398 } else if (s->last_noise_floor > s->last_residual_floor) {
407 sum = av_clipd(sum, -min, max);
409 for (int i = 0; i < 15; i++)
410 dnch->noise_band_auto_var[i] *= sum;
411 } else if (dnch->sfm_results[2] >= dnch->sfm_threshold) {
412 dnch->sfm_fail_flags[s->block_count & 0x1FF] = 1;
413 dnch->sfm_fail_total += 1;
417 for (int i = 0; i < s->number_of_bands; i++) {
418 dnch->band_excit[i] = 0.0;
419 dnch->band_amt[i] = 0.0;
422 for (int i = 0; i < s->bin_count; i++) {
423 dnch->band_excit[s->bin2band[i]] += dnch->clean_data[i];
426 for (int i = 0; i < s->number_of_bands; i++) {
427 dnch->band_excit[i] = fmax(dnch->band_excit[i],
428 s->band_alpha[i] * dnch->band_excit[i] +
429 s->band_beta[i] * prior_band_excit[i]);
430 prior_band_excit[i] = dnch->band_excit[i];
433 for (int j = 0, i = 0; j < s->number_of_bands; j++) {
434 for (int k = 0; k < s->number_of_bands; k++) {
435 dnch->band_amt[j] += dnch->spread_function[i++] * dnch->band_excit[k];
439 for (int i = 0; i < s->bin_count; i++)
440 dnch->amt[i] = dnch->band_amt[s->bin2band[i]];
442 if (dnch->amt[0] > dnch->abs_var[0]) {
444 } else if (dnch->amt[0] > dnch->min_abs_var[0]) {
445 double limit = sqrt(dnch->abs_var[0] / dnch->amt[0]);
446 dnch->gain[0] = limit_gain(dnch->gain[0], limit);
448 dnch->gain[0] = limit_gain(dnch->gain[0], s->max_gain);
450 if (dnch->amt[s->fft_length2] > dnch->abs_var[s->fft_length2]) {
451 dnch->gain[s->fft_length2] = 1.0;
452 } else if (dnch->amt[s->fft_length2] > dnch->min_abs_var[s->fft_length2]) {
453 double limit = sqrt(dnch->abs_var[s->fft_length2] / dnch->amt[s->fft_length2]);
454 dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], limit);
456 dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], s->max_gain);
459 for (int i = 1; i < s->fft_length2; i++) {
460 if (dnch->amt[i] > dnch->abs_var[i]) {
462 } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
463 double limit = sqrt(dnch->abs_var[i] / dnch->amt[i]);
464 dnch->gain[i] = limit_gain(dnch->gain[i], limit);
466 dnch->gain[i] = limit_gain(dnch->gain[i], s->max_gain);
470 gain = dnch->gain[0];
471 dnch->clean_data[0] = (gain * gain * dnch->noisy_data[0]);
472 fft_data[0].re *= gain;
473 gain = dnch->gain[s->fft_length2];
474 dnch->clean_data[s->fft_length2] = (gain * gain * dnch->noisy_data[s->fft_length2]);
475 fft_data[0].im *= gain;
476 for (int i = 1; i < s->fft_length2; i++) {
477 gain = dnch->gain[i];
478 dnch->clean_data[i] = (gain * gain * dnch->noisy_data[i]);
479 fft_data[i].re *= gain;
480 fft_data[i].im *= gain;
484 static double freq2bark(double x)
486 double d = x / 7500.0;
488 return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
491 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
494 return lrint(s->band_centre[0] / 1.5);
496 return s->band_centre[band];
499 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
504 i = lrint(s->band_centre[14] * 1.224745);
506 i = lrint(s->band_centre[band] / 1.224745);
509 return FFMIN(i, s->sample_rate / 2);
512 static void set_band_parameters(AudioFFTDeNoiseContext *s,
513 DeNoiseChannel *dnch)
515 double band_noise, d2, d3, d4, d5;
516 int i = 0, j = 0, k = 0;
519 band_noise = process_get_band_noise(s, dnch, 0);
520 for (int m = j; m <= s->fft_length2; m++) {
527 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
530 band_noise = process_get_band_noise(s, dnch, k);
535 dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
537 dnch->rel_var[s->fft_length2] = exp(band_noise * C);
539 for (i = 0; i < 15; i++)
540 dnch->noise_band_auto_var[i] = s->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
542 for (i = 0; i <= s->fft_length2; i++) {
543 dnch->abs_var[i] = fmax(s->max_var * dnch->rel_var[i], 1.0);
544 dnch->min_abs_var[i] = s->gain_scale * dnch->abs_var[i];
548 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
550 DeNoiseChannel *dnch = &s->dnch[ch];
551 char *p, *arg, *saveptr = NULL;
552 int i, ret, band_noise[15] = { 0 };
554 if (!s->band_noise_str)
557 p = av_strdup(s->band_noise_str);
561 for (i = 0; i < 15; i++) {
562 if (!(arg = av_strtok(p, "| ", &saveptr)))
567 ret = sscanf(arg, "%d", &band_noise[i]);
569 av_log(s, AV_LOG_ERROR, "Custom band noise must be integer.\n");
573 band_noise[i] = av_clip(band_noise[i], -24, 24);
577 memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
580 static void set_parameters(AudioFFTDeNoiseContext *s)
582 if (s->last_noise_floor != s->noise_floor)
583 s->last_noise_floor = s->noise_floor;
585 if (s->track_residual)
586 s->last_noise_floor = fmaxf(s->last_noise_floor, s->residual_floor);
588 s->max_var = s->floor * exp((100.0 + s->last_noise_floor) * C);
590 if (s->track_residual) {
591 s->last_residual_floor = s->residual_floor;
592 s->last_noise_reduction = fmax(s->last_noise_floor - s->last_residual_floor, 0);
593 s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
594 } else if (s->noise_reduction != s->last_noise_reduction) {
595 s->last_noise_reduction = s->noise_reduction;
596 s->last_residual_floor = av_clipf(s->last_noise_floor - s->last_noise_reduction, -80, -20);
597 s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
600 s->gain_scale = 1.0 / (s->max_gain * s->max_gain);
602 for (int ch = 0; ch < s->channels; ch++) {
603 DeNoiseChannel *dnch = &s->dnch[ch];
605 set_band_parameters(s, dnch);
609 static int config_input(AVFilterLink *inlink)
611 AVFilterContext *ctx = inlink->dst;
612 AudioFFTDeNoiseContext *s = ctx->priv;
613 double wscale, sar, sum, sdiv;
616 s->dnch = av_calloc(inlink->channels, sizeof(*s->dnch));
618 return AVERROR(ENOMEM);
620 s->pts = AV_NOPTS_VALUE;
621 s->channels = inlink->channels;
622 s->sample_rate = inlink->sample_rate;
623 s->sample_advance = s->sample_rate / 80;
624 s->window_length = 3 * s->sample_advance;
625 s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
626 s->fft_length = s->fft_length2 * 2;
627 s->buffer_length = s->fft_length * 2;
628 s->bin_count = s->fft_length2 + 1;
630 s->band_centre[0] = 80;
631 for (i = 1; i < 15; i++) {
632 s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
633 if (s->band_centre[i] < 1000) {
634 s->band_centre[i] = 10 * (s->band_centre[i] / 10);
635 } else if (s->band_centre[i] < 5000) {
636 s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
637 } else if (s->band_centre[i] < 15000) {
638 s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
640 s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
644 for (j = 0; j < 5; j++) {
645 for (k = 0; k < 5; k++) {
646 s->matrix_a[j + k * 5] = 0.0;
647 for (m = 0; m < 15; m++)
648 s->matrix_a[j + k * 5] += pow(m, j + k);
652 factor(s->matrix_a, 5);
655 for (j = 0; j < 5; j++)
656 for (k = 0; k < 15; k++)
657 s->matrix_b[i++] = pow(k, j);
660 for (j = 0; j < 15; j++)
661 for (k = 0; k < 5; k++)
662 s->matrix_c[i++] = pow(j, k);
664 s->window = av_calloc(s->window_length, sizeof(*s->window));
665 s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
666 if (!s->window || !s->bin2band)
667 return AVERROR(ENOMEM);
669 sdiv = s->sample_rate / 17640.0;
670 for (i = 0; i <= s->fft_length2; i++)
671 s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
673 s->number_of_bands = s->bin2band[s->fft_length2] + 1;
675 s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
676 s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
677 if (!s->band_alpha || !s->band_beta)
678 return AVERROR(ENOMEM);
680 for (int ch = 0; ch < inlink->channels; ch++) {
681 DeNoiseChannel *dnch = &s->dnch[ch];
683 switch (s->noise_type) {
685 for (i = 0; i < 15; i++)
686 dnch->band_noise[i] = 0;
689 for (i = 0; i < 15; i++)
690 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0) + FFMAX(i - 7, 0);
693 for (i = 0; i < 15; i++)
694 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10) + FFMAX(i - 12, -5);
697 read_custom_noise(s, ch);
704 dnch->sfm_threshold = 0.8;
705 dnch->sfm_alpha = 0.05;
706 for (i = 0; i < 512; i++)
707 dnch->sfm_fail_flags[i] = 0;
709 dnch->sfm_fail_total = 0;
710 j = FFMAX((int)(10.0 * (1.3 - dnch->sfm_threshold)), 1);
712 for (i = 0; i < 512; i += j) {
713 dnch->sfm_fail_flags[i] = 1;
714 dnch->sfm_fail_total += 1;
717 dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
718 dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
719 dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
720 dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
721 dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
722 dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
723 dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
724 dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
725 dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
726 dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
727 dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
728 dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
729 dnch->fft_data = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_data));
730 dnch->fft = av_fft_init(av_log2(s->fft_length2), 0);
731 dnch->ifft = av_fft_init(av_log2(s->fft_length2), 1);
732 dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
733 sizeof(*dnch->spread_function));
740 !dnch->prior_band_excit ||
743 !dnch->out_samples ||
747 !dnch->min_abs_var ||
748 !dnch->spread_function ||
751 return AVERROR(ENOMEM);
754 for (int ch = 0; ch < inlink->channels; ch++) {
755 DeNoiseChannel *dnch = &s->dnch[ch];
756 double *prior_band_excit = dnch->prior_band_excit;
757 double *prior = dnch->prior;
761 p1 = pow(0.1, 2.5 / sdiv);
762 p2 = pow(0.1, 1.0 / sdiv);
764 for (m = 0; m < s->number_of_bands; m++) {
765 for (n = 0; n < s->number_of_bands; n++) {
767 dnch->spread_function[j++] = pow(p2, m - n);
769 dnch->spread_function[j++] = pow(p1, n - m);
771 dnch->spread_function[j++] = 1.0;
776 for (m = 0; m < s->number_of_bands; m++) {
777 dnch->band_excit[m] = 0.0;
778 prior_band_excit[m] = 0.0;
781 for (m = 0; m <= s->fft_length2; m++)
782 dnch->band_excit[s->bin2band[m]] += 1.0;
785 for (m = 0; m < s->number_of_bands; m++) {
786 for (n = 0; n < s->number_of_bands; n++)
787 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
792 for (int i = 0; i < s->number_of_bands; i++) {
793 if (i < lrint(12.0 * sdiv)) {
794 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
796 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
798 dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
801 for (int i = 0; i <= s->fft_length2; i++)
803 for (int i = 0; i < s->buffer_length; i++)
804 dnch->out_samples[i] = 0;
807 for (int i = 0; i < s->number_of_bands; i++)
808 for (int k = 0; k < s->number_of_bands; k++)
809 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
813 sar = s->sample_advance / s->sample_rate;
814 for (int i = 0; i <= s->fft_length2; i++) {
815 if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
816 double d6 = (i - 1) * s->sample_rate / s->fft_length;
817 double d7 = fmin(0.008 + 2.2 / d6, 0.03);
818 s->band_alpha[j] = exp(-sar / d7);
819 s->band_beta[j] = 1.0 - s->band_alpha[j];
824 wscale = sqrt(16.0 / (9.0 * s->fft_length));
826 for (int i = 0; i < s->window_length; i++) {
827 double d10 = sin(i * M_PI / s->window_length);
833 s->window_weight = 0.5 * sum;
834 s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
835 s->sample_floor = s->floor * exp(4.144600506562284);
836 s->auto_floor = s->floor * exp(6.907667510937141);
840 s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
842 for (int j = 1; j < 16; j++) {
843 s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
844 if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
846 s->noise_band_edge[16] = i;
848 s->noise_band_count = s->noise_band_edge[16];
850 s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->fft_length);
852 return AVERROR(ENOMEM);
857 static void preprocess(FFTComplex *in, int len)
859 double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
862 d5 = 2.0 * M_PI / len;
870 for (i = 1; i < len / 4; i++) {
872 d2 = 0.5 * (in[i].re + in[k].re);
873 d1 = 0.5 * (in[i].im - in[k].im);
874 d4 = 0.5 * (in[i].im + in[k].im);
875 d3 = 0.5 * (in[k].re - in[i].re);
876 in[i].re = d2 + d9 * d4 + d6 * d3;
877 in[i].im = d1 + d9 * d3 - d6 * d4;
878 in[k].re = d2 - d9 * d4 - d6 * d3;
879 in[k].im = -d1 + d9 * d3 - d6 * d4;
881 d9 += d9 * d8 - d6 * d7;
882 d6 += d6 * d8 + d10 * d7;
886 in[0].re = d2 + in[0].im;
887 in[0].im = d2 - in[0].im;
890 static void postprocess(FFTComplex *in, int len)
892 double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
895 d5 = 2.0 * M_PI / len;
902 for (i = 1; i < len / 4; i++) {
904 d2 = 0.5 * (in[i].re + in[k].re);
905 d1 = 0.5 * (in[i].im - in[k].im);
906 d4 = 0.5 * (in[i].re - in[k].re);
907 d3 = 0.5 * (in[i].im + in[k].im);
908 in[i].re = d2 - d9 * d3 - d6 * d4;
909 in[i].im = d1 + d9 * d4 - d6 * d3;
910 in[k].re = d2 + d9 * d3 + d6 * d4;
911 in[k].im = -d1 + d9 * d4 - d6 * d3;
913 d9 += d9 * d8 - d6 * d7;
914 d6 += d6 * d8 + d10 * d7;
917 in[0].re = 0.5 * (d2 + in[0].im);
918 in[0].im = 0.5 * (d2 - in[0].im);
921 static void init_sample_noise(DeNoiseChannel *dnch)
923 for (int i = 0; i < 15; i++) {
924 dnch->noise_band_norm[i] = 0.0;
925 dnch->noise_band_avr[i] = 0.0;
926 dnch->noise_band_avi[i] = 0.0;
927 dnch->noise_band_var[i] = 0.0;
931 static void sample_noise_block(AudioFFTDeNoiseContext *s,
932 DeNoiseChannel *dnch,
935 float *src = (float *)in->extended_data[ch];
936 double mag2, var = 0.0, avr = 0.0, avi = 0.0;
937 int edge, j, k, n, edgemax;
939 for (int i = 0; i < s->window_length; i++) {
940 dnch->fft_data[i].re = s->window[i] * src[i] * (1LL << 24);
941 dnch->fft_data[i].im = 0.0;
944 for (int i = s->window_length; i < s->fft_length2; i++) {
945 dnch->fft_data[i].re = 0.0;
946 dnch->fft_data[i].im = 0.0;
949 av_fft_permute(dnch->fft, dnch->fft_data);
950 av_fft_calc(dnch->fft, dnch->fft_data);
952 preprocess(dnch->fft_data, s->fft_length);
954 edge = s->noise_band_edge[0];
958 edgemax = fmin(s->fft_length2, s->noise_band_edge[15]);
959 dnch->fft_data[s->fft_length2].re = dnch->fft_data[0].im;
960 dnch->fft_data[0].im = 0.0;
961 dnch->fft_data[s->fft_length2].im = 0.0;
963 for (int i = j; i <= edgemax; i++) {
964 if ((i == j) && (i < edgemax)) {
966 dnch->noise_band_norm[k - 1] += j - edge;
967 dnch->noise_band_avr[k - 1] += avr;
968 dnch->noise_band_avi[k - 1] += avi;
969 dnch->noise_band_var[k - 1] += var;
973 j = s->noise_band_edge[k];
981 avr += dnch->fft_data[n].re;
982 avi += dnch->fft_data[n].im;
983 mag2 = dnch->fft_data[n].re * dnch->fft_data[n].re +
984 dnch->fft_data[n].im * dnch->fft_data[n].im;
986 mag2 = fmax(mag2, s->sample_floor);
988 dnch->noisy_data[i] = mag2;
993 dnch->noise_band_norm[k - 1] += j - edge;
994 dnch->noise_band_avr[k - 1] += avr;
995 dnch->noise_band_avi[k - 1] += avi;
996 dnch->noise_band_var[k - 1] += var;
999 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
1000 DeNoiseChannel *dnch,
1001 double *sample_noise)
1003 for (int i = 0; i < s->noise_band_count; i++) {
1004 dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
1005 dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
1006 dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
1007 dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
1008 dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
1009 dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
1010 sample_noise[i] = (1.0 / C) * log(dnch->noise_band_var[i] / s->floor) - 100.0;
1012 if (s->noise_band_count < 15) {
1013 for (int i = s->noise_band_count; i < 15; i++)
1014 sample_noise[i] = sample_noise[i - 1];
1018 static void set_noise_profile(AudioFFTDeNoiseContext *s,
1019 DeNoiseChannel *dnch,
1020 double *sample_noise,
1023 int new_band_noise[15];
1025 double sum = 0.0, d1;
1026 float new_noise_floor;
1029 for (int m = 0; m < 15; m++)
1030 temp[m] = sample_noise[m];
1034 for (int m = 0; m < 5; m++) {
1036 for (n = 0; n < 15; n++)
1037 sum += s->matrix_b[i++] * temp[n];
1038 s->vector_b[m] = sum;
1040 solve(s->matrix_a, s->vector_b, 5);
1042 for (int m = 0; m < 15; m++) {
1044 for (n = 0; n < 5; n++)
1045 sum += s->matrix_c[i++] * s->vector_b[n];
1051 for (int m = 0; m < 15; m++)
1054 d1 = (int)(sum / 15.0 - 0.5);
1056 i = lrint(temp[7] - d1);
1058 for (d1 -= dnch->band_noise[7] - i; d1 > -20.0; d1 -= 1.0)
1061 for (int m = 0; m < 15; m++)
1064 new_noise_floor = d1 + 2.5;
1067 av_log(s, AV_LOG_INFO, "bn=");
1068 for (int m = 0; m < 15; m++) {
1069 new_band_noise[m] = lrint(temp[m]);
1070 new_band_noise[m] = av_clip(new_band_noise[m], -24, 24);
1071 av_log(s, AV_LOG_INFO, "%d ", new_band_noise[m]);
1073 av_log(s, AV_LOG_INFO, "\n");
1074 memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
1078 s->noise_floor = new_noise_floor;
1081 typedef struct ThreadData {
1085 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1087 AudioFFTDeNoiseContext *s = ctx->priv;
1088 ThreadData *td = arg;
1089 AVFrame *in = td->in;
1090 const int start = (in->channels * jobnr) / nb_jobs;
1091 const int end = (in->channels * (jobnr+1)) / nb_jobs;
1093 for (int ch = start; ch < end; ch++) {
1094 DeNoiseChannel *dnch = &s->dnch[ch];
1095 const float *src = (const float *)in->extended_data[ch];
1096 double *dst = dnch->out_samples;
1098 if (s->track_noise) {
1099 int i = s->block_count & 0x1FF;
1101 if (dnch->sfm_fail_flags[i])
1102 dnch->sfm_fail_total--;
1103 dnch->sfm_fail_flags[i] = 0;
1104 dnch->sfm_threshold *= 1.0 - dnch->sfm_alpha;
1105 dnch->sfm_threshold += dnch->sfm_alpha * (0.5 + (1.0 / 640) * dnch->sfm_fail_total);
1108 for (int m = 0; m < s->window_length; m++) {
1109 dnch->fft_data[m].re = s->window[m] * src[m] * (1LL << 24);
1110 dnch->fft_data[m].im = 0;
1113 for (int m = s->window_length; m < s->fft_length2; m++) {
1114 dnch->fft_data[m].re = 0;
1115 dnch->fft_data[m].im = 0;
1118 av_fft_permute(dnch->fft, dnch->fft_data);
1119 av_fft_calc(dnch->fft, dnch->fft_data);
1121 preprocess(dnch->fft_data, s->fft_length);
1122 process_frame(s, dnch, dnch->fft_data,
1124 dnch->prior_band_excit,
1126 postprocess(dnch->fft_data, s->fft_length);
1128 av_fft_permute(dnch->ifft, dnch->fft_data);
1129 av_fft_calc(dnch->ifft, dnch->fft_data);
1131 for (int m = 0; m < s->window_length; m++)
1132 dst[m] += s->window[m] * dnch->fft_data[m].re / (1LL << 24);
1138 static void get_auto_noise_levels(AudioFFTDeNoiseContext *s,
1139 DeNoiseChannel *dnch,
1142 if (s->noise_band_count > 0) {
1143 for (int i = 0; i < s->noise_band_count; i++) {
1144 levels[i] = (1.0 / C) * log(dnch->noise_band_auto_var[i] / s->floor) - 100.0;
1146 if (s->noise_band_count < 15) {
1147 for (int i = s->noise_band_count; i < 15; i++)
1148 levels[i] = levels[i - 1];
1151 for (int i = 0; i < 15; i++) {
1157 static int output_frame(AVFilterLink *inlink)
1159 AVFilterContext *ctx = inlink->dst;
1160 AVFilterLink *outlink = ctx->outputs[0];
1161 AudioFFTDeNoiseContext *s = ctx->priv;
1162 AVFrame *out = NULL, *in = NULL;
1167 in = ff_get_audio_buffer(outlink, s->window_length);
1169 return AVERROR(ENOMEM);
1172 ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, s->window_length);
1176 if (s->track_noise) {
1177 for (int ch = 0; ch < inlink->channels; ch++) {
1178 DeNoiseChannel *dnch = &s->dnch[ch];
1181 get_auto_noise_levels(s, dnch, levels);
1182 set_noise_profile(s, dnch, levels, 0);
1185 if (s->noise_floor != s->last_noise_floor)
1189 if (s->sample_noise_start) {
1190 for (int ch = 0; ch < inlink->channels; ch++) {
1191 DeNoiseChannel *dnch = &s->dnch[ch];
1193 init_sample_noise(dnch);
1195 s->sample_noise_start = 0;
1196 s->sample_noise = 1;
1199 if (s->sample_noise) {
1200 for (int ch = 0; ch < inlink->channels; ch++) {
1201 DeNoiseChannel *dnch = &s->dnch[ch];
1203 sample_noise_block(s, dnch, in, ch);
1207 if (s->sample_noise_end) {
1208 for (int ch = 0; ch < inlink->channels; ch++) {
1209 DeNoiseChannel *dnch = &s->dnch[ch];
1210 double sample_noise[15];
1212 finish_sample_noise(s, dnch, sample_noise);
1213 set_noise_profile(s, dnch, sample_noise, 1);
1214 set_band_parameters(s, dnch);
1216 s->sample_noise = 0;
1217 s->sample_noise_end = 0;
1222 ctx->internal->execute(ctx, filter_channel, &td, NULL,
1223 FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
1225 out = ff_get_audio_buffer(outlink, s->sample_advance);
1227 ret = AVERROR(ENOMEM);
1231 for (int ch = 0; ch < inlink->channels; ch++) {
1232 DeNoiseChannel *dnch = &s->dnch[ch];
1233 double *src = dnch->out_samples;
1234 float *orig = (float *)in->extended_data[ch];
1235 float *dst = (float *)out->extended_data[ch];
1237 switch (s->output_mode) {
1239 for (int m = 0; m < s->sample_advance; m++)
1243 for (int m = 0; m < s->sample_advance; m++)
1247 for (int m = 0; m < s->sample_advance; m++)
1248 dst[m] = orig[m] - src[m];
1253 memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1254 memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1257 av_audio_fifo_drain(s->fifo, s->sample_advance);
1260 ret = ff_filter_frame(outlink, out);
1263 s->pts += s->sample_advance;
1270 static int activate(AVFilterContext *ctx)
1272 AVFilterLink *inlink = ctx->inputs[0];
1273 AVFilterLink *outlink = ctx->outputs[0];
1274 AudioFFTDeNoiseContext *s = ctx->priv;
1275 AVFrame *frame = NULL;
1278 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1280 ret = ff_inlink_consume_frame(inlink, &frame);
1285 if (s->pts == AV_NOPTS_VALUE)
1286 s->pts = frame->pts;
1288 ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
1289 av_frame_free(&frame);
1294 if (av_audio_fifo_size(s->fifo) >= s->window_length)
1295 return output_frame(inlink);
1297 FF_FILTER_FORWARD_STATUS(inlink, outlink);
1298 if (ff_outlink_frame_wanted(outlink) &&
1299 av_audio_fifo_size(s->fifo) < s->window_length) {
1300 ff_inlink_request_frame(inlink);
1304 return FFERROR_NOT_READY;
1307 static av_cold void uninit(AVFilterContext *ctx)
1309 AudioFFTDeNoiseContext *s = ctx->priv;
1311 av_freep(&s->window);
1312 av_freep(&s->bin2band);
1313 av_freep(&s->band_alpha);
1314 av_freep(&s->band_beta);
1317 for (int ch = 0; ch < s->channels; ch++) {
1318 DeNoiseChannel *dnch = &s->dnch[ch];
1319 av_freep(&dnch->amt);
1320 av_freep(&dnch->band_amt);
1321 av_freep(&dnch->band_excit);
1322 av_freep(&dnch->gain);
1323 av_freep(&dnch->prior);
1324 av_freep(&dnch->prior_band_excit);
1325 av_freep(&dnch->clean_data);
1326 av_freep(&dnch->noisy_data);
1327 av_freep(&dnch->out_samples);
1328 av_freep(&dnch->spread_function);
1329 av_freep(&dnch->abs_var);
1330 av_freep(&dnch->rel_var);
1331 av_freep(&dnch->min_abs_var);
1332 av_freep(&dnch->fft_data);
1333 av_fft_end(dnch->fft);
1335 av_fft_end(dnch->ifft);
1341 av_audio_fifo_free(s->fifo);
1344 static int query_formats(AVFilterContext *ctx)
1346 AVFilterFormats *formats = NULL;
1347 AVFilterChannelLayouts *layouts = NULL;
1348 static const enum AVSampleFormat sample_fmts[] = {
1354 formats = ff_make_format_list(sample_fmts);
1356 return AVERROR(ENOMEM);
1357 ret = ff_set_common_formats(ctx, formats);
1361 layouts = ff_all_channel_counts();
1363 return AVERROR(ENOMEM);
1365 ret = ff_set_common_channel_layouts(ctx, layouts);
1369 formats = ff_all_samplerates();
1370 return ff_set_common_samplerates(ctx, formats);
1373 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1374 char *res, int res_len, int flags)
1376 AudioFFTDeNoiseContext *s = ctx->priv;
1379 if (!strcmp(cmd, "sample_noise") ||
1380 !strcmp(cmd, "sn")) {
1381 if (!strcmp(args, "start")) {
1382 s->sample_noise_start = 1;
1383 s->sample_noise_end = 0;
1384 } else if (!strcmp(args, "end")) {
1385 s->sample_noise_start = 0;
1386 s->sample_noise_end = 1;
1388 } else if (!strcmp(cmd, "nr") ||
1389 !strcmp(cmd, "noise_reduction")) {
1392 if (sscanf(args, "%f", &nr) == 1) {
1393 s->noise_reduction = av_clipf(nr, 0.01, 97);
1396 } else if (!strcmp(cmd, "nf") ||
1397 !strcmp(cmd, "noise_floor")) {
1400 if (sscanf(args, "%f", &nf) == 1) {
1401 s->noise_floor = av_clipf(nf, -80, -20);
1404 } else if (!strcmp(cmd, "output_mode") ||
1405 !strcmp(cmd, "om")) {
1406 if (!strcmp(args, "i")) {
1407 s->output_mode = IN_MODE;
1408 } else if (!strcmp(args, "o")) {
1409 s->output_mode = OUT_MODE;
1410 } else if (!strcmp(args, "n")) {
1411 s->output_mode = NOISE_MODE;
1421 static const AVFilterPad inputs[] = {
1424 .type = AVMEDIA_TYPE_AUDIO,
1425 .config_props = config_input,
1430 static const AVFilterPad outputs[] = {
1433 .type = AVMEDIA_TYPE_AUDIO,
1438 AVFilter ff_af_afftdn = {
1440 .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1441 .query_formats = query_formats,
1442 .priv_size = sizeof(AudioFFTDeNoiseContext),
1443 .priv_class = &afftdn_class,
1444 .activate = activate,
1448 .process_command = process_command,
1449 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC |
1450 AVFILTER_FLAG_SLICE_THREADS,