2 * Copyright (c) 2018 Paul B Mahol
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/audio_fifo.h"
22 #include "libavutil/opt.h"
27 typedef struct DeclickChannel {
30 double *acoefficients;
46 typedef struct AudioDeclickContext {
74 uint64_t detected_errors;
78 double *window_func_lut;
80 int (*detector)(struct AudioDeclickContext *s, DeclickChannel *c,
81 double sigmae, double *detection,
82 double *acoefficients, uint8_t *click, int *index,
83 const double *src, double *dst);
84 } AudioDeclickContext;
86 #define OFFSET(x) offsetof(AudioDeclickContext, x)
87 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
89 static const AVOption adeclick_options[] = {
90 { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=55}, 10, 100, AF },
91 { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF },
92 { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, 25, AF },
93 { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 1, 100, AF },
94 { "b", "set burst fusion", OFFSET(burst), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, 10, AF },
95 { "m", "set overlap method", OFFSET(method), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "m" },
96 { "a", "overlap-add", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "m" },
97 { "s", "overlap-save", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "m" },
101 AVFILTER_DEFINE_CLASS(adeclick);
103 static int query_formats(AVFilterContext *ctx)
105 AVFilterFormats *formats = NULL;
106 AVFilterChannelLayouts *layouts = NULL;
107 static const enum AVSampleFormat sample_fmts[] = {
113 formats = ff_make_format_list(sample_fmts);
115 return AVERROR(ENOMEM);
116 ret = ff_set_common_formats(ctx, formats);
120 layouts = ff_all_channel_counts();
122 return AVERROR(ENOMEM);
124 ret = ff_set_common_channel_layouts(ctx, layouts);
128 formats = ff_all_samplerates();
129 return ff_set_common_samplerates(ctx, formats);
132 static int config_input(AVFilterLink *inlink)
134 AVFilterContext *ctx = inlink->dst;
135 AudioDeclickContext *s = ctx->priv;
138 s->pts = AV_NOPTS_VALUE;
139 s->window_size = inlink->sample_rate * s->w / 1000.;
140 if (s->window_size < 100)
141 return AVERROR(EINVAL);
142 s->ar_order = FFMAX(s->window_size * s->ar / 100., 1);
143 s->nb_burst_samples = s->window_size * s->burst / 1000.;
144 s->hop_size = s->window_size * (1. - (s->overlap / 100.));
146 return AVERROR(EINVAL);
148 s->window_func_lut = av_calloc(s->window_size, sizeof(*s->window_func_lut));
149 if (!s->window_func_lut)
150 return AVERROR(ENOMEM);
151 for (i = 0; i < s->window_size; i++)
152 s->window_func_lut[i] = sin(M_PI * i / s->window_size) *
153 (1. - (s->overlap / 100.)) * M_PI_2;
155 av_frame_free(&s->in);
156 av_frame_free(&s->out);
157 av_frame_free(&s->buffer);
158 av_frame_free(&s->is);
159 s->in = ff_get_audio_buffer(inlink, s->window_size);
160 s->out = ff_get_audio_buffer(inlink, s->window_size);
161 s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2);
162 s->is = ff_get_audio_buffer(inlink, s->window_size);
163 if (!s->in || !s->out || !s->buffer || !s->is)
164 return AVERROR(ENOMEM);
166 s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size);
168 return AVERROR(ENOMEM);
169 s->overlap_skip = s->method ? (s->window_size - s->hop_size) / 2 : 0;
170 if (s->overlap_skip > 0) {
171 av_audio_fifo_write(s->fifo, (void **)s->in->extended_data,
175 s->nb_channels = inlink->channels;
176 s->chan = av_calloc(inlink->channels, sizeof(*s->chan));
178 return AVERROR(ENOMEM);
180 for (i = 0; i < inlink->channels; i++) {
181 DeclickChannel *c = &s->chan[i];
183 c->detection = av_calloc(s->window_size, sizeof(*c->detection));
184 c->auxiliary = av_calloc(s->ar_order + 1, sizeof(*c->auxiliary));
185 c->acoefficients = av_calloc(s->ar_order + 1, sizeof(*c->acoefficients));
186 c->acorrelation = av_calloc(s->ar_order + 1, sizeof(*c->acorrelation));
187 c->tmp = av_calloc(s->ar_order, sizeof(*c->tmp));
188 c->click = av_calloc(s->window_size, sizeof(*c->click));
189 c->index = av_calloc(s->window_size, sizeof(*c->index));
190 c->interpolated = av_calloc(s->window_size, sizeof(*c->interpolated));
191 if (!c->auxiliary || !c->acoefficients || !c->detection || !c->click ||
192 !c->index || !c->interpolated || !c->acorrelation || !c->tmp)
193 return AVERROR(ENOMEM);
199 static void autocorrelation(const double *input, int order, int size,
200 double *output, double scale)
204 for (i = 0; i <= order; i++) {
207 for (j = i; j < size; j++)
208 value += input[j] * input[j - i];
210 output[i] = value * scale;
214 static double autoregression(const double *samples, int ar_order,
215 int nb_samples, double *k, double *r, double *a)
220 memset(a, 0, ar_order * sizeof(*a));
222 autocorrelation(samples, ar_order, nb_samples, r, 1. / nb_samples);
224 /* Levinson-Durbin algorithm */
225 k[0] = a[0] = -r[1] / r[0];
226 alpha = r[0] * (1. - k[0] * k[0]);
227 for (i = 1; i < ar_order; i++) {
230 for (j = 0; j < i; j++)
231 epsilon += a[j] * r[i - j];
234 k[i] = -epsilon / alpha;
235 alpha *= (1. - k[i] * k[i]);
236 for (j = i - 1; j >= 0; j--)
237 k[j] = a[j] + k[i] * a[i - j - 1];
238 for (j = 0; j <= i; j++)
243 for (i = 1; i <= ar_order; i++)
249 static int isfinite_array(double *samples, int nb_samples)
253 for (i = 0; i < nb_samples; i++)
254 if (!isfinite(samples[i]))
260 static int find_index(int *index, int value, int size)
264 if ((value < index[0]) || (value > index[size - 1]))
270 while (start <= end) {
271 i = (end + start) / 2;
272 if (index[i] == value)
274 if (value < index[i])
276 if (value > index[i])
283 static int factorization(double *matrix, int n)
287 for (i = 0; i < n; i++) {
288 const int in = i * n;
291 value = matrix[in + i];
292 for (j = 0; j < i; j++)
293 value -= matrix[j * n + j] * matrix[in + j] * matrix[in + j];
299 matrix[in + i] = value;
300 for (j = i + 1; j < n; j++) {
301 const int jn = j * n;
305 for (k = 0; k < i; k++)
306 x -= matrix[k * n + k] * matrix[in + k] * matrix[jn + k];
307 matrix[jn + i] = x / matrix[in + i];
314 static int do_interpolation(DeclickChannel *c, double *matrix,
315 double *vector, int n, double *out)
320 ret = factorization(matrix, n);
324 av_fast_malloc(&c->y, &c->y_size, n * sizeof(*c->y));
327 return AVERROR(ENOMEM);
329 for (i = 0; i < n; i++) {
330 const int in = i * n;
334 for (j = 0; j < i; j++)
335 value -= matrix[in + j] * y[j];
339 for (i = n - 1; i >= 0; i--) {
340 out[i] = y[i] / matrix[i * n + i];
341 for (j = i + 1; j < n; j++)
342 out[i] -= matrix[j * n + i] * out[j];
348 static int interpolation(DeclickChannel *c, const double *src, int ar_order,
349 double *acoefficients, int *index, int nb_errors,
350 double *auxiliary, double *interpolated)
352 double *vector, *matrix;
355 av_fast_malloc(&c->matrix, &c->matrix_size, nb_errors * nb_errors * sizeof(*c->matrix));
358 return AVERROR(ENOMEM);
360 av_fast_malloc(&c->vector, &c->vector_size, nb_errors * sizeof(*c->vector));
363 return AVERROR(ENOMEM);
365 autocorrelation(acoefficients, ar_order, ar_order + 1, auxiliary, 1.);
367 for (i = 0; i < nb_errors; i++) {
368 const int im = i * nb_errors;
370 for (j = i; j < nb_errors; j++) {
371 if (abs(index[j] - index[i]) <= ar_order) {
372 matrix[j * nb_errors + i] = matrix[im + j] = auxiliary[abs(index[j] - index[i])];
374 matrix[j * nb_errors + i] = matrix[im + j] = 0;
379 for (i = 0; i < nb_errors; i++) {
382 for (j = -ar_order; j <= ar_order; j++)
383 if (find_index(index, index[i] - j, nb_errors))
384 value -= src[index[i] - j] * auxiliary[abs(j)];
389 return do_interpolation(c, matrix, vector, nb_errors, interpolated);
392 static int detect_clips(AudioDeclickContext *s, DeclickChannel *c,
394 double *unused1, double *unused2,
395 uint8_t *clip, int *index,
396 const double *src, double *dst)
398 const double threshold = s->threshold;
399 double max_amplitude = 0;
403 av_fast_malloc(&c->histogram, &c->histogram_size, s->nb_hbins * sizeof(*c->histogram));
405 return AVERROR(ENOMEM);
406 histogram = c->histogram;
407 memset(histogram, 0, sizeof(*histogram) * s->nb_hbins);
409 for (i = 0; i < s->window_size; i++) {
410 const unsigned index = fmin(fabs(src[i]), 1) * (s->nb_hbins - 1);
417 for (i = s->nb_hbins - 1; i > 1; i--) {
419 if (histogram[i] / (double)FFMAX(histogram[i - 1], 1) > threshold) {
420 max_amplitude = i / (double)s->nb_hbins;
426 if (max_amplitude > 0.) {
427 for (i = 0; i < s->window_size; i++) {
428 clip[i] = fabs(src[i]) >= max_amplitude;
432 memset(clip, 0, s->ar_order * sizeof(*clip));
433 memset(clip + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*clip));
435 for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
437 index[nb_clips++] = i;
442 static int detect_clicks(AudioDeclickContext *s, DeclickChannel *c,
444 double *detection, double *acoefficients,
445 uint8_t *click, int *index,
446 const double *src, double *dst)
448 const double threshold = s->threshold;
449 int i, j, nb_clicks = 0, prev = -1;
451 memset(detection, 0, s->window_size * sizeof(*detection));
453 for (i = s->ar_order; i < s->window_size; i++) {
454 for (j = 0; j <= s->ar_order; j++) {
455 detection[i] += acoefficients[j] * src[i - j];
459 for (i = 0; i < s->window_size; i++) {
460 click[i] = fabs(detection[i]) > sigmae * threshold;
464 for (i = 0; i < s->window_size; i++) {
468 if (prev >= 0 && (i > prev + 1) && (i <= s->nb_burst_samples + prev))
469 for (j = prev + 1; j < i; j++)
474 memset(click, 0, s->ar_order * sizeof(*click));
475 memset(click + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*click));
477 for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
479 index[nb_clicks++] = i;
484 typedef struct ThreadData {
488 static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
490 AudioDeclickContext *s = ctx->priv;
491 ThreadData *td = arg;
492 AVFrame *out = td->out;
493 const double *src = (const double *)s->in->extended_data[ch];
494 double *is = (double *)s->is->extended_data[ch];
495 double *dst = (double *)s->out->extended_data[ch];
496 double *ptr = (double *)out->extended_data[ch];
497 double *buf = (double *)s->buffer->extended_data[ch];
498 const double *w = s->window_func_lut;
499 DeclickChannel *c = &s->chan[ch];
503 sigmae = autoregression(src, s->ar_order, s->window_size, c->acoefficients, c->acorrelation, c->tmp);
505 if (isfinite_array(c->acoefficients, s->ar_order + 1)) {
506 double *interpolated = c->interpolated;
507 int *index = c->index;
510 nb_errors = s->detector(s, c, sigmae, c->detection, c->acoefficients,
511 c->click, index, src, dst);
513 ret = interpolation(c, src, s->ar_order, c->acoefficients, index,
514 nb_errors, c->auxiliary, interpolated);
518 for (j = 0; j < nb_errors; j++) {
519 dst[index[j]] = interpolated[j];
524 memcpy(dst, src, s->window_size * sizeof(*dst));
527 if (s->method == 0) {
528 for (j = 0; j < s->window_size; j++)
529 buf[j] += dst[j] * w[j];
531 const int skip = s->overlap_skip;
533 for (j = 0; j < s->hop_size; j++)
534 buf[j] = dst[skip + j];
536 for (j = 0; j < s->hop_size; j++)
539 memmove(buf, buf + s->hop_size, (s->window_size * 2 - s->hop_size) * sizeof(*buf));
540 memmove(is, is + s->hop_size, (s->window_size - s->hop_size) * sizeof(*is));
541 memset(buf + s->window_size * 2 - s->hop_size, 0, s->hop_size * sizeof(*buf));
542 memset(is + s->window_size - s->hop_size, 0, s->hop_size * sizeof(*is));
547 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
549 AVFilterContext *ctx = inlink->dst;
550 AVFilterLink *outlink = ctx->outputs[0];
551 AudioDeclickContext *s = ctx->priv;
555 if (s->pts == AV_NOPTS_VALUE)
558 ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data,
562 while (av_audio_fifo_size(s->fifo) >= s->window_size) {
563 int j, ch, detected_errors = 0;
566 out = ff_get_audio_buffer(outlink, s->hop_size);
568 return AVERROR(ENOMEM);
570 ret = av_audio_fifo_peek(s->fifo, (void **)s->in->extended_data,
576 ret = ctx->internal->execute(ctx, filter_channel, &td, NULL, inlink->channels);
580 for (ch = 0; ch < s->in->channels; ch++) {
581 double *is = (double *)s->is->extended_data[ch];
583 for (j = 0; j < s->hop_size; j++) {
589 av_audio_fifo_drain(s->fifo, s->hop_size);
591 if (s->samples_left > 0)
592 out->nb_samples = FFMIN(s->hop_size, s->samples_left);
595 s->pts += s->hop_size;
597 s->detected_errors += detected_errors;
598 s->nb_samples += out->nb_samples * inlink->channels;
600 ret = ff_filter_frame(outlink, out);
604 if (s->samples_left > 0) {
605 s->samples_left -= s->hop_size;
606 if (s->samples_left <= 0)
607 av_audio_fifo_drain(s->fifo, av_audio_fifo_size(s->fifo));
617 static int request_frame(AVFilterLink *outlink)
619 AVFilterContext *ctx = outlink->src;
620 AudioDeclickContext *s = ctx->priv;
623 ret = ff_request_frame(ctx->inputs[0]);
625 if (ret == AVERROR_EOF && av_audio_fifo_size(s->fifo) > 0) {
626 if (!s->samples_left)
627 s->samples_left = av_audio_fifo_size(s->fifo) - s->overlap_skip;
629 if (s->samples_left > 0) {
630 AVFrame *in = ff_get_audio_buffer(outlink, s->window_size - s->samples_left);
632 return AVERROR(ENOMEM);
633 ret = filter_frame(ctx->inputs[0], in);
640 static av_cold int init(AVFilterContext *ctx)
642 AudioDeclickContext *s = ctx->priv;
644 s->is_declip = !strcmp(ctx->filter->name, "adeclip");
646 s->detector = detect_clips;
648 s->detector = detect_clicks;
654 static av_cold void uninit(AVFilterContext *ctx)
656 AudioDeclickContext *s = ctx->priv;
659 av_log(ctx, AV_LOG_INFO, "Detected %s in %"PRId64" of %"PRId64" samples (%g%%).\n",
660 s->is_declip ? "clips" : "clicks", s->detected_errors,
661 s->nb_samples, 100. * s->detected_errors / s->nb_samples);
663 av_audio_fifo_free(s->fifo);
664 av_freep(&s->window_func_lut);
665 av_frame_free(&s->in);
666 av_frame_free(&s->out);
667 av_frame_free(&s->buffer);
668 av_frame_free(&s->is);
671 for (i = 0; i < s->nb_channels; i++) {
672 DeclickChannel *c = &s->chan[i];
674 av_freep(&c->detection);
675 av_freep(&c->auxiliary);
676 av_freep(&c->acoefficients);
677 av_freep(&c->acorrelation);
681 av_freep(&c->interpolated);
682 av_freep(&c->matrix);
684 av_freep(&c->histogram);
685 c->histogram_size = 0;
686 av_freep(&c->vector);
696 static const AVFilterPad inputs[] = {
699 .type = AVMEDIA_TYPE_AUDIO,
700 .filter_frame = filter_frame,
701 .config_props = config_input,
706 static const AVFilterPad outputs[] = {
709 .type = AVMEDIA_TYPE_AUDIO,
710 .request_frame = request_frame,
715 AVFilter ff_af_adeclick = {
717 .description = NULL_IF_CONFIG_SMALL("Remove impulsive noise from input audio."),
718 .query_formats = query_formats,
719 .priv_size = sizeof(AudioDeclickContext),
720 .priv_class = &adeclick_class,
725 .flags = AVFILTER_FLAG_SLICE_THREADS,
728 static const AVOption adeclip_options[] = {
729 { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=55}, 10, 100, AF },
730 { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF },
731 { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=8}, 0, 25, AF },
732 { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=10}, 1, 100, AF },
733 { "n", "set histogram size", OFFSET(nb_hbins), AV_OPT_TYPE_INT, {.i64=1000}, 100, 9999, AF },
734 { "m", "set overlap method", OFFSET(method), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "m" },
735 { "a", "overlap-add", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "m" },
736 { "s", "overlap-save", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "m" },
740 AVFILTER_DEFINE_CLASS(adeclip);
742 AVFilter ff_af_adeclip = {
744 .description = NULL_IF_CONFIG_SMALL("Remove clipping from input audio."),
745 .query_formats = query_formats,
746 .priv_size = sizeof(AudioDeclickContext),
747 .priv_class = &adeclip_class,
752 .flags = AVFILTER_FLAG_SLICE_THREADS,