2 * Copyright (c) 2018 Paul B Mahol
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/audio_fifo.h"
22 #include "libavutil/opt.h"
29 typedef struct DeclickChannel {
32 double *acoefficients;
48 typedef struct AudioDeclickContext {
77 uint64_t detected_errors;
83 double *window_func_lut;
85 int (*detector)(struct AudioDeclickContext *s, DeclickChannel *c,
86 double sigmae, double *detection,
87 double *acoefficients, uint8_t *click, int *index,
88 const double *src, double *dst);
89 } AudioDeclickContext;
91 #define OFFSET(x) offsetof(AudioDeclickContext, x)
92 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
94 static const AVOption adeclick_options[] = {
95 { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=55}, 10, 100, AF },
96 { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF },
97 { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, 25, AF },
98 { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 1, 100, AF },
99 { "b", "set burst fusion", OFFSET(burst), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, 10, AF },
100 { "m", "set overlap method", OFFSET(method), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "m" },
101 { "a", "overlap-add", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "m" },
102 { "s", "overlap-save", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "m" },
106 AVFILTER_DEFINE_CLASS(adeclick);
108 static int query_formats(AVFilterContext *ctx)
110 AVFilterFormats *formats = NULL;
111 AVFilterChannelLayouts *layouts = NULL;
112 static const enum AVSampleFormat sample_fmts[] = {
118 formats = ff_make_format_list(sample_fmts);
120 return AVERROR(ENOMEM);
121 ret = ff_set_common_formats(ctx, formats);
125 layouts = ff_all_channel_counts();
127 return AVERROR(ENOMEM);
129 ret = ff_set_common_channel_layouts(ctx, layouts);
133 formats = ff_all_samplerates();
134 return ff_set_common_samplerates(ctx, formats);
137 static int config_input(AVFilterLink *inlink)
139 AVFilterContext *ctx = inlink->dst;
140 AudioDeclickContext *s = ctx->priv;
143 s->pts = AV_NOPTS_VALUE;
144 s->window_size = inlink->sample_rate * s->w / 1000.;
145 if (s->window_size < 100)
146 return AVERROR(EINVAL);
147 s->ar_order = FFMAX(s->window_size * s->ar / 100., 1);
148 s->nb_burst_samples = s->window_size * s->burst / 1000.;
149 s->hop_size = s->window_size * (1. - (s->overlap / 100.));
151 return AVERROR(EINVAL);
153 s->window_func_lut = av_calloc(s->window_size, sizeof(*s->window_func_lut));
154 if (!s->window_func_lut)
155 return AVERROR(ENOMEM);
156 for (i = 0; i < s->window_size; i++)
157 s->window_func_lut[i] = sin(M_PI * i / s->window_size) *
158 (1. - (s->overlap / 100.)) * M_PI_2;
160 av_frame_free(&s->in);
161 av_frame_free(&s->out);
162 av_frame_free(&s->buffer);
163 av_frame_free(&s->is);
164 s->enabled = ff_get_audio_buffer(inlink, s->window_size);
165 s->in = ff_get_audio_buffer(inlink, s->window_size);
166 s->out = ff_get_audio_buffer(inlink, s->window_size);
167 s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2);
168 s->is = ff_get_audio_buffer(inlink, s->window_size);
169 if (!s->in || !s->out || !s->buffer || !s->is || !s->enabled)
170 return AVERROR(ENOMEM);
172 s->efifo = av_audio_fifo_alloc(inlink->format, 1, s->window_size);
174 return AVERROR(ENOMEM);
175 s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size);
177 return AVERROR(ENOMEM);
178 s->overlap_skip = s->method ? (s->window_size - s->hop_size) / 2 : 0;
179 if (s->overlap_skip > 0) {
180 av_audio_fifo_write(s->fifo, (void **)s->in->extended_data,
184 s->nb_channels = inlink->channels;
185 s->chan = av_calloc(inlink->channels, sizeof(*s->chan));
187 return AVERROR(ENOMEM);
189 for (i = 0; i < inlink->channels; i++) {
190 DeclickChannel *c = &s->chan[i];
192 c->detection = av_calloc(s->window_size, sizeof(*c->detection));
193 c->auxiliary = av_calloc(s->ar_order + 1, sizeof(*c->auxiliary));
194 c->acoefficients = av_calloc(s->ar_order + 1, sizeof(*c->acoefficients));
195 c->acorrelation = av_calloc(s->ar_order + 1, sizeof(*c->acorrelation));
196 c->tmp = av_calloc(s->ar_order, sizeof(*c->tmp));
197 c->click = av_calloc(s->window_size, sizeof(*c->click));
198 c->index = av_calloc(s->window_size, sizeof(*c->index));
199 c->interpolated = av_calloc(s->window_size, sizeof(*c->interpolated));
200 if (!c->auxiliary || !c->acoefficients || !c->detection || !c->click ||
201 !c->index || !c->interpolated || !c->acorrelation || !c->tmp)
202 return AVERROR(ENOMEM);
208 static void autocorrelation(const double *input, int order, int size,
209 double *output, double scale)
213 for (i = 0; i <= order; i++) {
216 for (j = i; j < size; j++)
217 value += input[j] * input[j - i];
219 output[i] = value * scale;
223 static double autoregression(const double *samples, int ar_order,
224 int nb_samples, double *k, double *r, double *a)
229 memset(a, 0, ar_order * sizeof(*a));
231 autocorrelation(samples, ar_order, nb_samples, r, 1. / nb_samples);
233 /* Levinson-Durbin algorithm */
234 k[0] = a[0] = -r[1] / r[0];
235 alpha = r[0] * (1. - k[0] * k[0]);
236 for (i = 1; i < ar_order; i++) {
239 for (j = 0; j < i; j++)
240 epsilon += a[j] * r[i - j];
243 k[i] = -epsilon / alpha;
244 alpha *= (1. - k[i] * k[i]);
245 for (j = i - 1; j >= 0; j--)
246 k[j] = a[j] + k[i] * a[i - j - 1];
247 for (j = 0; j <= i; j++)
252 for (i = 1; i <= ar_order; i++)
258 static int isfinite_array(double *samples, int nb_samples)
262 for (i = 0; i < nb_samples; i++)
263 if (!isfinite(samples[i]))
269 static int find_index(int *index, int value, int size)
273 if ((value < index[0]) || (value > index[size - 1]))
279 while (start <= end) {
280 i = (end + start) / 2;
281 if (index[i] == value)
283 if (value < index[i])
285 if (value > index[i])
292 static int factorization(double *matrix, int n)
296 for (i = 0; i < n; i++) {
297 const int in = i * n;
300 value = matrix[in + i];
301 for (j = 0; j < i; j++)
302 value -= matrix[j * n + j] * matrix[in + j] * matrix[in + j];
308 matrix[in + i] = value;
309 for (j = i + 1; j < n; j++) {
310 const int jn = j * n;
314 for (k = 0; k < i; k++)
315 x -= matrix[k * n + k] * matrix[in + k] * matrix[jn + k];
316 matrix[jn + i] = x / matrix[in + i];
323 static int do_interpolation(DeclickChannel *c, double *matrix,
324 double *vector, int n, double *out)
329 ret = factorization(matrix, n);
333 av_fast_malloc(&c->y, &c->y_size, n * sizeof(*c->y));
336 return AVERROR(ENOMEM);
338 for (i = 0; i < n; i++) {
339 const int in = i * n;
343 for (j = 0; j < i; j++)
344 value -= matrix[in + j] * y[j];
348 for (i = n - 1; i >= 0; i--) {
349 out[i] = y[i] / matrix[i * n + i];
350 for (j = i + 1; j < n; j++)
351 out[i] -= matrix[j * n + i] * out[j];
357 static int interpolation(DeclickChannel *c, const double *src, int ar_order,
358 double *acoefficients, int *index, int nb_errors,
359 double *auxiliary, double *interpolated)
361 double *vector, *matrix;
364 av_fast_malloc(&c->matrix, &c->matrix_size, nb_errors * nb_errors * sizeof(*c->matrix));
367 return AVERROR(ENOMEM);
369 av_fast_malloc(&c->vector, &c->vector_size, nb_errors * sizeof(*c->vector));
372 return AVERROR(ENOMEM);
374 autocorrelation(acoefficients, ar_order, ar_order + 1, auxiliary, 1.);
376 for (i = 0; i < nb_errors; i++) {
377 const int im = i * nb_errors;
379 for (j = i; j < nb_errors; j++) {
380 if (abs(index[j] - index[i]) <= ar_order) {
381 matrix[j * nb_errors + i] = matrix[im + j] = auxiliary[abs(index[j] - index[i])];
383 matrix[j * nb_errors + i] = matrix[im + j] = 0;
388 for (i = 0; i < nb_errors; i++) {
391 for (j = -ar_order; j <= ar_order; j++)
392 if (find_index(index, index[i] - j, nb_errors))
393 value -= src[index[i] - j] * auxiliary[abs(j)];
398 return do_interpolation(c, matrix, vector, nb_errors, interpolated);
401 static int detect_clips(AudioDeclickContext *s, DeclickChannel *c,
403 double *unused1, double *unused2,
404 uint8_t *clip, int *index,
405 const double *src, double *dst)
407 const double threshold = s->threshold;
408 double max_amplitude = 0;
412 av_fast_malloc(&c->histogram, &c->histogram_size, s->nb_hbins * sizeof(*c->histogram));
414 return AVERROR(ENOMEM);
415 histogram = c->histogram;
416 memset(histogram, 0, sizeof(*histogram) * s->nb_hbins);
418 for (i = 0; i < s->window_size; i++) {
419 const unsigned index = fmin(fabs(src[i]), 1) * (s->nb_hbins - 1);
426 for (i = s->nb_hbins - 1; i > 1; i--) {
428 if (histogram[i] / (double)FFMAX(histogram[i - 1], 1) > threshold) {
429 max_amplitude = i / (double)s->nb_hbins;
435 if (max_amplitude > 0.) {
436 for (i = 0; i < s->window_size; i++) {
437 clip[i] = fabs(src[i]) >= max_amplitude;
441 memset(clip, 0, s->ar_order * sizeof(*clip));
442 memset(clip + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*clip));
444 for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
446 index[nb_clips++] = i;
451 static int detect_clicks(AudioDeclickContext *s, DeclickChannel *c,
453 double *detection, double *acoefficients,
454 uint8_t *click, int *index,
455 const double *src, double *dst)
457 const double threshold = s->threshold;
458 int i, j, nb_clicks = 0, prev = -1;
460 memset(detection, 0, s->window_size * sizeof(*detection));
462 for (i = s->ar_order; i < s->window_size; i++) {
463 for (j = 0; j <= s->ar_order; j++) {
464 detection[i] += acoefficients[j] * src[i - j];
468 for (i = 0; i < s->window_size; i++) {
469 click[i] = fabs(detection[i]) > sigmae * threshold;
473 for (i = 0; i < s->window_size; i++) {
477 if (prev >= 0 && (i > prev + 1) && (i <= s->nb_burst_samples + prev))
478 for (j = prev + 1; j < i; j++)
483 memset(click, 0, s->ar_order * sizeof(*click));
484 memset(click + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*click));
486 for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
488 index[nb_clicks++] = i;
493 typedef struct ThreadData {
497 static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
499 AudioDeclickContext *s = ctx->priv;
500 ThreadData *td = arg;
501 AVFrame *out = td->out;
502 const double *src = (const double *)s->in->extended_data[ch];
503 double *is = (double *)s->is->extended_data[ch];
504 double *dst = (double *)s->out->extended_data[ch];
505 double *ptr = (double *)out->extended_data[ch];
506 double *buf = (double *)s->buffer->extended_data[ch];
507 const double *w = s->window_func_lut;
508 DeclickChannel *c = &s->chan[ch];
512 sigmae = autoregression(src, s->ar_order, s->window_size, c->acoefficients, c->acorrelation, c->tmp);
514 if (isfinite_array(c->acoefficients, s->ar_order + 1)) {
515 double *interpolated = c->interpolated;
516 int *index = c->index;
519 nb_errors = s->detector(s, c, sigmae, c->detection, c->acoefficients,
520 c->click, index, src, dst);
522 double *enabled = (double *)s->enabled->extended_data[0];
524 ret = interpolation(c, src, s->ar_order, c->acoefficients, index,
525 nb_errors, c->auxiliary, interpolated);
529 av_audio_fifo_peek(s->efifo, (void**)s->enabled->extended_data, s->window_size);
531 for (j = 0; j < nb_errors; j++) {
532 if (enabled[index[j]]) {
533 dst[index[j]] = interpolated[j];
539 memcpy(dst, src, s->window_size * sizeof(*dst));
542 if (s->method == 0) {
543 for (j = 0; j < s->window_size; j++)
544 buf[j] += dst[j] * w[j];
546 const int skip = s->overlap_skip;
548 for (j = 0; j < s->hop_size; j++)
549 buf[j] = dst[skip + j];
551 for (j = 0; j < s->hop_size; j++)
554 memmove(buf, buf + s->hop_size, (s->window_size * 2 - s->hop_size) * sizeof(*buf));
555 memmove(is, is + s->hop_size, (s->window_size - s->hop_size) * sizeof(*is));
556 memset(buf + s->window_size * 2 - s->hop_size, 0, s->hop_size * sizeof(*buf));
557 memset(is + s->window_size - s->hop_size, 0, s->hop_size * sizeof(*is));
562 static int filter_frame(AVFilterLink *inlink)
564 AVFilterContext *ctx = inlink->dst;
565 AVFilterLink *outlink = ctx->outputs[0];
566 AudioDeclickContext *s = ctx->priv;
568 int ret = 0, j, ch, detected_errors = 0;
571 out = ff_get_audio_buffer(outlink, s->hop_size);
573 return AVERROR(ENOMEM);
575 ret = av_audio_fifo_peek(s->fifo, (void **)s->in->extended_data,
581 ret = ctx->internal->execute(ctx, filter_channel, &td, NULL, inlink->channels);
585 for (ch = 0; ch < s->in->channels; ch++) {
586 double *is = (double *)s->is->extended_data[ch];
588 for (j = 0; j < s->hop_size; j++) {
594 av_audio_fifo_drain(s->fifo, s->hop_size);
595 av_audio_fifo_drain(s->efifo, s->hop_size);
597 if (s->samples_left > 0)
598 out->nb_samples = FFMIN(s->hop_size, s->samples_left);
601 s->pts += av_rescale_q(s->hop_size, (AVRational){1, outlink->sample_rate}, outlink->time_base);
603 s->detected_errors += detected_errors;
604 s->nb_samples += out->nb_samples * inlink->channels;
606 ret = ff_filter_frame(outlink, out);
610 if (s->samples_left > 0) {
611 s->samples_left -= s->hop_size;
612 if (s->samples_left <= 0)
613 av_audio_fifo_drain(s->fifo, av_audio_fifo_size(s->fifo));
622 static int activate(AVFilterContext *ctx)
624 AVFilterLink *inlink = ctx->inputs[0];
625 AVFilterLink *outlink = ctx->outputs[0];
626 AudioDeclickContext *s = ctx->priv;
631 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
633 ret = ff_inlink_consume_samples(inlink, s->window_size, s->window_size, &in);
637 double *e = (double *)s->enabled->extended_data[0];
639 if (s->pts == AV_NOPTS_VALUE)
642 ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data,
644 for (int i = 0; i < in->nb_samples; i++)
645 e[i] = !ctx->is_disabled;
647 av_audio_fifo_write(s->efifo, (void**)s->enabled->extended_data, in->nb_samples);
653 if (av_audio_fifo_size(s->fifo) >= s->window_size ||
655 return filter_frame(inlink);
657 if (av_audio_fifo_size(s->fifo) >= s->window_size) {
658 ff_filter_set_ready(ctx, 100);
662 if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
663 if (status == AVERROR_EOF) {
665 s->samples_left = av_audio_fifo_size(s->fifo) - s->overlap_skip;
666 ff_filter_set_ready(ctx, 100);
671 if (s->eof && s->samples_left <= 0) {
672 ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
677 FF_FILTER_FORWARD_WANTED(outlink, inlink);
679 return FFERROR_NOT_READY;
682 static av_cold int init(AVFilterContext *ctx)
684 AudioDeclickContext *s = ctx->priv;
686 s->is_declip = !strcmp(ctx->filter->name, "adeclip");
688 s->detector = detect_clips;
690 s->detector = detect_clicks;
696 static av_cold void uninit(AVFilterContext *ctx)
698 AudioDeclickContext *s = ctx->priv;
701 av_log(ctx, AV_LOG_INFO, "Detected %s in %"PRId64" of %"PRId64" samples (%g%%).\n",
702 s->is_declip ? "clips" : "clicks", s->detected_errors,
703 s->nb_samples, 100. * s->detected_errors / s->nb_samples);
705 av_audio_fifo_free(s->fifo);
706 av_audio_fifo_free(s->efifo);
707 av_freep(&s->window_func_lut);
708 av_frame_free(&s->enabled);
709 av_frame_free(&s->in);
710 av_frame_free(&s->out);
711 av_frame_free(&s->buffer);
712 av_frame_free(&s->is);
715 for (i = 0; i < s->nb_channels; i++) {
716 DeclickChannel *c = &s->chan[i];
718 av_freep(&c->detection);
719 av_freep(&c->auxiliary);
720 av_freep(&c->acoefficients);
721 av_freep(&c->acorrelation);
725 av_freep(&c->interpolated);
726 av_freep(&c->matrix);
728 av_freep(&c->histogram);
729 c->histogram_size = 0;
730 av_freep(&c->vector);
740 static const AVFilterPad inputs[] = {
743 .type = AVMEDIA_TYPE_AUDIO,
744 .config_props = config_input,
749 static const AVFilterPad outputs[] = {
752 .type = AVMEDIA_TYPE_AUDIO,
757 AVFilter ff_af_adeclick = {
759 .description = NULL_IF_CONFIG_SMALL("Remove impulsive noise from input audio."),
760 .query_formats = query_formats,
761 .priv_size = sizeof(AudioDeclickContext),
762 .priv_class = &adeclick_class,
764 .activate = activate,
768 .flags = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
771 static const AVOption adeclip_options[] = {
772 { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=55}, 10, 100, AF },
773 { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF },
774 { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=8}, 0, 25, AF },
775 { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=10}, 1, 100, AF },
776 { "n", "set histogram size", OFFSET(nb_hbins), AV_OPT_TYPE_INT, {.i64=1000}, 100, 9999, AF },
777 { "m", "set overlap method", OFFSET(method), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "m" },
778 { "a", "overlap-add", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "m" },
779 { "s", "overlap-save", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "m" },
783 AVFILTER_DEFINE_CLASS(adeclip);
785 AVFilter ff_af_adeclip = {
787 .description = NULL_IF_CONFIG_SMALL("Remove clipping from input audio."),
788 .query_formats = query_formats,
789 .priv_size = sizeof(AudioDeclickContext),
790 .priv_class = &adeclip_class,
792 .activate = activate,
796 .flags = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,