2 * Copyright (c) 2018 Paul B Mahol
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/audio_fifo.h"
22 #include "libavutil/opt.h"
29 typedef struct DeclickChannel {
32 double *acoefficients;
48 typedef struct AudioDeclickContext {
76 uint64_t detected_errors;
81 double *window_func_lut;
83 int (*detector)(struct AudioDeclickContext *s, DeclickChannel *c,
84 double sigmae, double *detection,
85 double *acoefficients, uint8_t *click, int *index,
86 const double *src, double *dst);
87 } AudioDeclickContext;
89 #define OFFSET(x) offsetof(AudioDeclickContext, x)
90 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
92 static const AVOption adeclick_options[] = {
93 { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=55}, 10, 100, AF },
94 { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF },
95 { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, 25, AF },
96 { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 1, 100, AF },
97 { "b", "set burst fusion", OFFSET(burst), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, 10, AF },
98 { "m", "set overlap method", OFFSET(method), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "m" },
99 { "a", "overlap-add", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "m" },
100 { "s", "overlap-save", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "m" },
104 AVFILTER_DEFINE_CLASS(adeclick);
106 static int query_formats(AVFilterContext *ctx)
108 AVFilterFormats *formats = NULL;
109 AVFilterChannelLayouts *layouts = NULL;
110 static const enum AVSampleFormat sample_fmts[] = {
116 formats = ff_make_format_list(sample_fmts);
118 return AVERROR(ENOMEM);
119 ret = ff_set_common_formats(ctx, formats);
123 layouts = ff_all_channel_counts();
125 return AVERROR(ENOMEM);
127 ret = ff_set_common_channel_layouts(ctx, layouts);
131 formats = ff_all_samplerates();
132 return ff_set_common_samplerates(ctx, formats);
135 static int config_input(AVFilterLink *inlink)
137 AVFilterContext *ctx = inlink->dst;
138 AudioDeclickContext *s = ctx->priv;
141 s->pts = AV_NOPTS_VALUE;
142 s->window_size = inlink->sample_rate * s->w / 1000.;
143 if (s->window_size < 100)
144 return AVERROR(EINVAL);
145 s->ar_order = FFMAX(s->window_size * s->ar / 100., 1);
146 s->nb_burst_samples = s->window_size * s->burst / 1000.;
147 s->hop_size = s->window_size * (1. - (s->overlap / 100.));
149 return AVERROR(EINVAL);
151 s->window_func_lut = av_calloc(s->window_size, sizeof(*s->window_func_lut));
152 if (!s->window_func_lut)
153 return AVERROR(ENOMEM);
154 for (i = 0; i < s->window_size; i++)
155 s->window_func_lut[i] = sin(M_PI * i / s->window_size) *
156 (1. - (s->overlap / 100.)) * M_PI_2;
158 av_frame_free(&s->in);
159 av_frame_free(&s->out);
160 av_frame_free(&s->buffer);
161 av_frame_free(&s->is);
162 s->in = ff_get_audio_buffer(inlink, s->window_size);
163 s->out = ff_get_audio_buffer(inlink, s->window_size);
164 s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2);
165 s->is = ff_get_audio_buffer(inlink, s->window_size);
166 if (!s->in || !s->out || !s->buffer || !s->is)
167 return AVERROR(ENOMEM);
169 s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size);
171 return AVERROR(ENOMEM);
172 s->overlap_skip = s->method ? (s->window_size - s->hop_size) / 2 : 0;
173 if (s->overlap_skip > 0) {
174 av_audio_fifo_write(s->fifo, (void **)s->in->extended_data,
178 s->nb_channels = inlink->channels;
179 s->chan = av_calloc(inlink->channels, sizeof(*s->chan));
181 return AVERROR(ENOMEM);
183 for (i = 0; i < inlink->channels; i++) {
184 DeclickChannel *c = &s->chan[i];
186 c->detection = av_calloc(s->window_size, sizeof(*c->detection));
187 c->auxiliary = av_calloc(s->ar_order + 1, sizeof(*c->auxiliary));
188 c->acoefficients = av_calloc(s->ar_order + 1, sizeof(*c->acoefficients));
189 c->acorrelation = av_calloc(s->ar_order + 1, sizeof(*c->acorrelation));
190 c->tmp = av_calloc(s->ar_order, sizeof(*c->tmp));
191 c->click = av_calloc(s->window_size, sizeof(*c->click));
192 c->index = av_calloc(s->window_size, sizeof(*c->index));
193 c->interpolated = av_calloc(s->window_size, sizeof(*c->interpolated));
194 if (!c->auxiliary || !c->acoefficients || !c->detection || !c->click ||
195 !c->index || !c->interpolated || !c->acorrelation || !c->tmp)
196 return AVERROR(ENOMEM);
202 static void autocorrelation(const double *input, int order, int size,
203 double *output, double scale)
207 for (i = 0; i <= order; i++) {
210 for (j = i; j < size; j++)
211 value += input[j] * input[j - i];
213 output[i] = value * scale;
217 static double autoregression(const double *samples, int ar_order,
218 int nb_samples, double *k, double *r, double *a)
223 memset(a, 0, ar_order * sizeof(*a));
225 autocorrelation(samples, ar_order, nb_samples, r, 1. / nb_samples);
227 /* Levinson-Durbin algorithm */
228 k[0] = a[0] = -r[1] / r[0];
229 alpha = r[0] * (1. - k[0] * k[0]);
230 for (i = 1; i < ar_order; i++) {
233 for (j = 0; j < i; j++)
234 epsilon += a[j] * r[i - j];
237 k[i] = -epsilon / alpha;
238 alpha *= (1. - k[i] * k[i]);
239 for (j = i - 1; j >= 0; j--)
240 k[j] = a[j] + k[i] * a[i - j - 1];
241 for (j = 0; j <= i; j++)
246 for (i = 1; i <= ar_order; i++)
252 static int isfinite_array(double *samples, int nb_samples)
256 for (i = 0; i < nb_samples; i++)
257 if (!isfinite(samples[i]))
263 static int find_index(int *index, int value, int size)
267 if ((value < index[0]) || (value > index[size - 1]))
273 while (start <= end) {
274 i = (end + start) / 2;
275 if (index[i] == value)
277 if (value < index[i])
279 if (value > index[i])
286 static int factorization(double *matrix, int n)
290 for (i = 0; i < n; i++) {
291 const int in = i * n;
294 value = matrix[in + i];
295 for (j = 0; j < i; j++)
296 value -= matrix[j * n + j] * matrix[in + j] * matrix[in + j];
302 matrix[in + i] = value;
303 for (j = i + 1; j < n; j++) {
304 const int jn = j * n;
308 for (k = 0; k < i; k++)
309 x -= matrix[k * n + k] * matrix[in + k] * matrix[jn + k];
310 matrix[jn + i] = x / matrix[in + i];
317 static int do_interpolation(DeclickChannel *c, double *matrix,
318 double *vector, int n, double *out)
323 ret = factorization(matrix, n);
327 av_fast_malloc(&c->y, &c->y_size, n * sizeof(*c->y));
330 return AVERROR(ENOMEM);
332 for (i = 0; i < n; i++) {
333 const int in = i * n;
337 for (j = 0; j < i; j++)
338 value -= matrix[in + j] * y[j];
342 for (i = n - 1; i >= 0; i--) {
343 out[i] = y[i] / matrix[i * n + i];
344 for (j = i + 1; j < n; j++)
345 out[i] -= matrix[j * n + i] * out[j];
351 static int interpolation(DeclickChannel *c, const double *src, int ar_order,
352 double *acoefficients, int *index, int nb_errors,
353 double *auxiliary, double *interpolated)
355 double *vector, *matrix;
358 av_fast_malloc(&c->matrix, &c->matrix_size, nb_errors * nb_errors * sizeof(*c->matrix));
361 return AVERROR(ENOMEM);
363 av_fast_malloc(&c->vector, &c->vector_size, nb_errors * sizeof(*c->vector));
366 return AVERROR(ENOMEM);
368 autocorrelation(acoefficients, ar_order, ar_order + 1, auxiliary, 1.);
370 for (i = 0; i < nb_errors; i++) {
371 const int im = i * nb_errors;
373 for (j = i; j < nb_errors; j++) {
374 if (abs(index[j] - index[i]) <= ar_order) {
375 matrix[j * nb_errors + i] = matrix[im + j] = auxiliary[abs(index[j] - index[i])];
377 matrix[j * nb_errors + i] = matrix[im + j] = 0;
382 for (i = 0; i < nb_errors; i++) {
385 for (j = -ar_order; j <= ar_order; j++)
386 if (find_index(index, index[i] - j, nb_errors))
387 value -= src[index[i] - j] * auxiliary[abs(j)];
392 return do_interpolation(c, matrix, vector, nb_errors, interpolated);
395 static int detect_clips(AudioDeclickContext *s, DeclickChannel *c,
397 double *unused1, double *unused2,
398 uint8_t *clip, int *index,
399 const double *src, double *dst)
401 const double threshold = s->threshold;
402 double max_amplitude = 0;
406 av_fast_malloc(&c->histogram, &c->histogram_size, s->nb_hbins * sizeof(*c->histogram));
408 return AVERROR(ENOMEM);
409 histogram = c->histogram;
410 memset(histogram, 0, sizeof(*histogram) * s->nb_hbins);
412 for (i = 0; i < s->window_size; i++) {
413 const unsigned index = fmin(fabs(src[i]), 1) * (s->nb_hbins - 1);
420 for (i = s->nb_hbins - 1; i > 1; i--) {
422 if (histogram[i] / (double)FFMAX(histogram[i - 1], 1) > threshold) {
423 max_amplitude = i / (double)s->nb_hbins;
429 if (max_amplitude > 0.) {
430 for (i = 0; i < s->window_size; i++) {
431 clip[i] = fabs(src[i]) >= max_amplitude;
435 memset(clip, 0, s->ar_order * sizeof(*clip));
436 memset(clip + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*clip));
438 for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
440 index[nb_clips++] = i;
445 static int detect_clicks(AudioDeclickContext *s, DeclickChannel *c,
447 double *detection, double *acoefficients,
448 uint8_t *click, int *index,
449 const double *src, double *dst)
451 const double threshold = s->threshold;
452 int i, j, nb_clicks = 0, prev = -1;
454 memset(detection, 0, s->window_size * sizeof(*detection));
456 for (i = s->ar_order; i < s->window_size; i++) {
457 for (j = 0; j <= s->ar_order; j++) {
458 detection[i] += acoefficients[j] * src[i - j];
462 for (i = 0; i < s->window_size; i++) {
463 click[i] = fabs(detection[i]) > sigmae * threshold;
467 for (i = 0; i < s->window_size; i++) {
471 if (prev >= 0 && (i > prev + 1) && (i <= s->nb_burst_samples + prev))
472 for (j = prev + 1; j < i; j++)
477 memset(click, 0, s->ar_order * sizeof(*click));
478 memset(click + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*click));
480 for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
482 index[nb_clicks++] = i;
487 typedef struct ThreadData {
491 static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
493 AudioDeclickContext *s = ctx->priv;
494 ThreadData *td = arg;
495 AVFrame *out = td->out;
496 const double *src = (const double *)s->in->extended_data[ch];
497 double *is = (double *)s->is->extended_data[ch];
498 double *dst = (double *)s->out->extended_data[ch];
499 double *ptr = (double *)out->extended_data[ch];
500 double *buf = (double *)s->buffer->extended_data[ch];
501 const double *w = s->window_func_lut;
502 DeclickChannel *c = &s->chan[ch];
506 sigmae = autoregression(src, s->ar_order, s->window_size, c->acoefficients, c->acorrelation, c->tmp);
508 if (isfinite_array(c->acoefficients, s->ar_order + 1)) {
509 double *interpolated = c->interpolated;
510 int *index = c->index;
513 nb_errors = s->detector(s, c, sigmae, c->detection, c->acoefficients,
514 c->click, index, src, dst);
516 ret = interpolation(c, src, s->ar_order, c->acoefficients, index,
517 nb_errors, c->auxiliary, interpolated);
521 for (j = 0; j < nb_errors; j++) {
522 dst[index[j]] = interpolated[j];
527 memcpy(dst, src, s->window_size * sizeof(*dst));
530 if (s->method == 0) {
531 for (j = 0; j < s->window_size; j++)
532 buf[j] += dst[j] * w[j];
534 const int skip = s->overlap_skip;
536 for (j = 0; j < s->hop_size; j++)
537 buf[j] = dst[skip + j];
539 for (j = 0; j < s->hop_size; j++)
542 memmove(buf, buf + s->hop_size, (s->window_size * 2 - s->hop_size) * sizeof(*buf));
543 memmove(is, is + s->hop_size, (s->window_size - s->hop_size) * sizeof(*is));
544 memset(buf + s->window_size * 2 - s->hop_size, 0, s->hop_size * sizeof(*buf));
545 memset(is + s->window_size - s->hop_size, 0, s->hop_size * sizeof(*is));
550 static int filter_frame(AVFilterLink *inlink)
552 AVFilterContext *ctx = inlink->dst;
553 AVFilterLink *outlink = ctx->outputs[0];
554 AudioDeclickContext *s = ctx->priv;
556 int ret = 0, j, ch, detected_errors = 0;
559 out = ff_get_audio_buffer(outlink, s->hop_size);
561 return AVERROR(ENOMEM);
563 ret = av_audio_fifo_peek(s->fifo, (void **)s->in->extended_data,
569 ret = ctx->internal->execute(ctx, filter_channel, &td, NULL, inlink->channels);
573 for (ch = 0; ch < s->in->channels; ch++) {
574 double *is = (double *)s->is->extended_data[ch];
576 for (j = 0; j < s->hop_size; j++) {
582 av_audio_fifo_drain(s->fifo, s->hop_size);
584 if (s->samples_left > 0)
585 out->nb_samples = FFMIN(s->hop_size, s->samples_left);
588 s->pts += s->hop_size;
590 s->detected_errors += detected_errors;
591 s->nb_samples += out->nb_samples * inlink->channels;
593 ret = ff_filter_frame(outlink, out);
597 if (s->samples_left > 0) {
598 s->samples_left -= s->hop_size;
599 if (s->samples_left <= 0)
600 av_audio_fifo_drain(s->fifo, av_audio_fifo_size(s->fifo));
609 static int activate(AVFilterContext *ctx)
611 AVFilterLink *inlink = ctx->inputs[0];
612 AVFilterLink *outlink = ctx->outputs[0];
613 AudioDeclickContext *s = ctx->priv;
618 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
620 ret = ff_inlink_consume_samples(inlink, s->window_size, s->window_size, &in);
624 if (s->pts == AV_NOPTS_VALUE)
627 ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data,
634 if (av_audio_fifo_size(s->fifo) >= s->window_size ||
636 return filter_frame(inlink);
638 if (av_audio_fifo_size(s->fifo) >= s->window_size) {
639 ff_filter_set_ready(ctx, 100);
643 if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
644 if (status == AVERROR_EOF) {
646 s->samples_left = av_audio_fifo_size(s->fifo) - s->overlap_skip;
647 ff_filter_set_ready(ctx, 100);
652 if (s->eof && s->samples_left <= 0) {
653 ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
658 FF_FILTER_FORWARD_WANTED(outlink, inlink);
660 return FFERROR_NOT_READY;
663 static av_cold int init(AVFilterContext *ctx)
665 AudioDeclickContext *s = ctx->priv;
667 s->is_declip = !strcmp(ctx->filter->name, "adeclip");
669 s->detector = detect_clips;
671 s->detector = detect_clicks;
677 static av_cold void uninit(AVFilterContext *ctx)
679 AudioDeclickContext *s = ctx->priv;
682 av_log(ctx, AV_LOG_INFO, "Detected %s in %"PRId64" of %"PRId64" samples (%g%%).\n",
683 s->is_declip ? "clips" : "clicks", s->detected_errors,
684 s->nb_samples, 100. * s->detected_errors / s->nb_samples);
686 av_audio_fifo_free(s->fifo);
687 av_freep(&s->window_func_lut);
688 av_frame_free(&s->in);
689 av_frame_free(&s->out);
690 av_frame_free(&s->buffer);
691 av_frame_free(&s->is);
694 for (i = 0; i < s->nb_channels; i++) {
695 DeclickChannel *c = &s->chan[i];
697 av_freep(&c->detection);
698 av_freep(&c->auxiliary);
699 av_freep(&c->acoefficients);
700 av_freep(&c->acorrelation);
704 av_freep(&c->interpolated);
705 av_freep(&c->matrix);
707 av_freep(&c->histogram);
708 c->histogram_size = 0;
709 av_freep(&c->vector);
719 static const AVFilterPad inputs[] = {
722 .type = AVMEDIA_TYPE_AUDIO,
723 .config_props = config_input,
728 static const AVFilterPad outputs[] = {
731 .type = AVMEDIA_TYPE_AUDIO,
736 AVFilter ff_af_adeclick = {
738 .description = NULL_IF_CONFIG_SMALL("Remove impulsive noise from input audio."),
739 .query_formats = query_formats,
740 .priv_size = sizeof(AudioDeclickContext),
741 .priv_class = &adeclick_class,
743 .activate = activate,
747 .flags = AVFILTER_FLAG_SLICE_THREADS,
750 static const AVOption adeclip_options[] = {
751 { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=55}, 10, 100, AF },
752 { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF },
753 { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=8}, 0, 25, AF },
754 { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=10}, 1, 100, AF },
755 { "n", "set histogram size", OFFSET(nb_hbins), AV_OPT_TYPE_INT, {.i64=1000}, 100, 9999, AF },
756 { "m", "set overlap method", OFFSET(method), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "m" },
757 { "a", "overlap-add", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "m" },
758 { "s", "overlap-save", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "m" },
762 AVFILTER_DEFINE_CLASS(adeclip);
764 AVFilter ff_af_adeclip = {
766 .description = NULL_IF_CONFIG_SMALL("Remove clipping from input audio."),
767 .query_formats = query_formats,
768 .priv_size = sizeof(AudioDeclickContext),
769 .priv_class = &adeclip_class,
771 .activate = activate,
775 .flags = AVFILTER_FLAG_SLICE_THREADS,