2 * Copyright (c) 2001 Heikki Leinonen
3 * Copyright (c) 2001 Chris Bagwell
4 * Copyright (c) 2003 Donnie Smith
5 * Copyright (c) 2014 Paul B Mahol
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <float.h> /* DBL_MAX */
26 #include "libavutil/opt.h"
27 #include "libavutil/timestamp.h"
41 typedef struct SilenceRemoveContext {
44 enum SilenceMode mode;
47 int64_t start_duration;
48 double start_threshold;
51 int64_t stop_duration;
52 double stop_threshold;
54 double *start_holdoff;
55 size_t start_holdoff_offset;
56 size_t start_holdoff_end;
57 int start_found_periods;
60 size_t stop_holdoff_offset;
61 size_t stop_holdoff_end;
62 int stop_found_periods;
65 double *window_current;
73 } SilenceRemoveContext;
75 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
76 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
77 static const AVOption silenceremove_options[] = {
78 { "start_periods", NULL, OFFSET(start_periods), AV_OPT_TYPE_INT, {.i64=0}, 0, 9000, FLAGS },
79 { "start_duration", NULL, OFFSET(start_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
80 { "start_threshold", NULL, OFFSET(start_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
81 { "stop_periods", NULL, OFFSET(stop_periods), AV_OPT_TYPE_INT, {.i64=0}, -9000, 9000, FLAGS },
82 { "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
83 { "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
84 { "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
88 AVFILTER_DEFINE_CLASS(silenceremove);
90 static av_cold int init(AVFilterContext *ctx)
92 SilenceRemoveContext *s = ctx->priv;
94 if (s->stop_periods < 0) {
95 s->stop_periods = -s->stop_periods;
102 static void clear_rms(SilenceRemoveContext *s)
104 memset(s->window, 0, s->window_size * sizeof(*s->window));
106 s->window_current = s->window;
107 s->window_end = s->window + s->window_size;
111 static int config_input(AVFilterLink *inlink)
113 AVFilterContext *ctx = inlink->dst;
114 SilenceRemoveContext *s = ctx->priv;
116 s->window_size = (inlink->sample_rate / 50) * inlink->channels;
117 s->window = av_malloc_array(s->window_size, sizeof(*s->window));
119 return AVERROR(ENOMEM);
123 s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
125 s->stop_duration = av_rescale(s->stop_duration, inlink->sample_rate,
128 s->start_holdoff = av_malloc_array(FFMAX(s->start_duration, 1),
129 sizeof(*s->start_holdoff) *
131 if (!s->start_holdoff)
132 return AVERROR(ENOMEM);
134 s->start_holdoff_offset = 0;
135 s->start_holdoff_end = 0;
136 s->start_found_periods = 0;
138 s->stop_holdoff = av_malloc_array(FFMAX(s->stop_duration, 1),
139 sizeof(*s->stop_holdoff) *
141 if (!s->stop_holdoff)
142 return AVERROR(ENOMEM);
144 s->stop_holdoff_offset = 0;
145 s->stop_holdoff_end = 0;
146 s->stop_found_periods = 0;
148 if (s->start_periods)
149 s->mode = SILENCE_TRIM;
151 s->mode = SILENCE_COPY;
156 static int config_output(AVFilterLink *outlink)
158 outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
163 static double compute_rms(SilenceRemoveContext *s, double sample)
167 new_sum = s->rms_sum;
168 new_sum -= *s->window_current;
169 new_sum += sample * sample;
171 return sqrt(new_sum / s->window_size);
174 static void update_rms(SilenceRemoveContext *s, double sample)
176 s->rms_sum -= *s->window_current;
177 *s->window_current = sample * sample;
178 s->rms_sum += *s->window_current;
181 if (s->window_current >= s->window_end)
182 s->window_current = s->window;
185 static void flush(AVFrame *out, AVFilterLink *outlink,
186 int *nb_samples_written, int *ret)
188 if (*nb_samples_written) {
189 out->nb_samples = *nb_samples_written / outlink->channels;
190 *ret = ff_filter_frame(outlink, out);
191 *nb_samples_written = 0;
197 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
199 AVFilterContext *ctx = inlink->dst;
200 AVFilterLink *outlink = ctx->outputs[0];
201 SilenceRemoveContext *s = ctx->priv;
202 int i, j, threshold, ret = 0;
203 int nbs, nb_samples_read, nb_samples_written;
204 double *obuf, *ibuf = (double *)in->data[0];
207 nb_samples_read = nb_samples_written = 0;
212 nbs = in->nb_samples - nb_samples_read / inlink->channels;
216 for (i = 0; i < nbs; i++) {
218 for (j = 0; j < inlink->channels; j++) {
219 threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
223 for (j = 0; j < inlink->channels; j++) {
224 update_rms(s, *ibuf);
225 s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
229 if (s->start_holdoff_end >= s->start_duration * inlink->channels) {
230 if (++s->start_found_periods >= s->start_periods) {
231 s->mode = SILENCE_TRIM_FLUSH;
232 goto silence_trim_flush;
235 s->start_holdoff_offset = 0;
236 s->start_holdoff_end = 0;
239 s->start_holdoff_end = 0;
241 for (j = 0; j < inlink->channels; j++)
242 update_rms(s, ibuf[j]);
244 ibuf += inlink->channels;
245 nb_samples_read += inlink->channels;
250 case SILENCE_TRIM_FLUSH:
252 nbs = s->start_holdoff_end - s->start_holdoff_offset;
253 nbs -= nbs % inlink->channels;
257 out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
260 return AVERROR(ENOMEM);
263 memcpy(out->data[0], &s->start_holdoff[s->start_holdoff_offset],
264 nbs * sizeof(double));
265 s->start_holdoff_offset += nbs;
267 ret = ff_filter_frame(outlink, out);
269 if (s->start_holdoff_offset == s->start_holdoff_end) {
270 s->start_holdoff_offset = 0;
271 s->start_holdoff_end = 0;
272 s->mode = SILENCE_COPY;
279 nbs = in->nb_samples - nb_samples_read / inlink->channels;
283 out = ff_get_audio_buffer(inlink, nbs);
286 return AVERROR(ENOMEM);
288 obuf = (double *)out->data[0];
290 if (s->stop_periods) {
291 for (i = 0; i < nbs; i++) {
293 for (j = 0; j < inlink->channels; j++)
294 threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
296 if (threshold && s->stop_holdoff_end && !s->leave_silence) {
297 s->mode = SILENCE_COPY_FLUSH;
298 flush(out, outlink, &nb_samples_written, &ret);
299 goto silence_copy_flush;
300 } else if (threshold) {
301 for (j = 0; j < inlink->channels; j++) {
302 update_rms(s, *ibuf);
305 nb_samples_written++;
307 } else if (!threshold) {
308 for (j = 0; j < inlink->channels; j++) {
309 update_rms(s, *ibuf);
310 if (s->leave_silence) {
312 nb_samples_written++;
315 s->stop_holdoff[s->stop_holdoff_end++] = *ibuf++;
319 if (s->stop_holdoff_end >= s->stop_duration * inlink->channels) {
320 if (++s->stop_found_periods >= s->stop_periods) {
321 s->stop_holdoff_offset = 0;
322 s->stop_holdoff_end = 0;
325 s->mode = SILENCE_STOP;
326 flush(out, outlink, &nb_samples_written, &ret);
329 s->stop_found_periods = 0;
330 s->start_found_periods = 0;
331 s->start_holdoff_offset = 0;
332 s->start_holdoff_end = 0;
334 s->mode = SILENCE_TRIM;
335 flush(out, outlink, &nb_samples_written, &ret);
339 s->mode = SILENCE_COPY_FLUSH;
340 flush(out, outlink, &nb_samples_written, &ret);
341 goto silence_copy_flush;
345 flush(out, outlink, &nb_samples_written, &ret);
347 memcpy(obuf, ibuf, sizeof(double) * nbs * inlink->channels);
348 ret = ff_filter_frame(outlink, out);
352 case SILENCE_COPY_FLUSH:
354 nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
355 nbs -= nbs % inlink->channels;
359 out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
362 return AVERROR(ENOMEM);
365 memcpy(out->data[0], &s->stop_holdoff[s->stop_holdoff_offset],
366 nbs * sizeof(double));
367 s->stop_holdoff_offset += nbs;
369 ret = ff_filter_frame(outlink, out);
371 if (s->stop_holdoff_offset == s->stop_holdoff_end) {
372 s->stop_holdoff_offset = 0;
373 s->stop_holdoff_end = 0;
374 s->mode = SILENCE_COPY;
388 static int request_frame(AVFilterLink *outlink)
390 AVFilterContext *ctx = outlink->src;
391 SilenceRemoveContext *s = ctx->priv;
394 ret = ff_request_frame(ctx->inputs[0]);
395 if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH ||
396 s->mode == SILENCE_COPY)) {
397 int nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
401 frame = ff_get_audio_buffer(outlink, nbs / outlink->channels);
403 return AVERROR(ENOMEM);
405 memcpy(frame->data[0], &s->stop_holdoff[s->stop_holdoff_offset],
406 nbs * sizeof(double));
407 ret = ff_filter_frame(ctx->inputs[0], frame);
409 s->mode = SILENCE_STOP;
414 static int query_formats(AVFilterContext *ctx)
416 AVFilterFormats *formats = NULL;
417 AVFilterChannelLayouts *layouts = NULL;
418 static const enum AVSampleFormat sample_fmts[] = {
419 AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_NONE
423 layouts = ff_all_channel_layouts();
425 return AVERROR(ENOMEM);
426 ret = ff_set_common_channel_layouts(ctx, layouts);
430 formats = ff_make_format_list(sample_fmts);
432 return AVERROR(ENOMEM);
433 ret = ff_set_common_formats(ctx, formats);
437 formats = ff_all_samplerates();
439 return AVERROR(ENOMEM);
440 return ff_set_common_samplerates(ctx, formats);
443 static av_cold void uninit(AVFilterContext *ctx)
445 SilenceRemoveContext *s = ctx->priv;
447 av_freep(&s->start_holdoff);
448 av_freep(&s->stop_holdoff);
449 av_freep(&s->window);
452 static const AVFilterPad silenceremove_inputs[] = {
455 .type = AVMEDIA_TYPE_AUDIO,
456 .config_props = config_input,
457 .filter_frame = filter_frame,
462 static const AVFilterPad silenceremove_outputs[] = {
465 .type = AVMEDIA_TYPE_AUDIO,
466 .config_props = config_output,
467 .request_frame = request_frame,
472 AVFilter ff_af_silenceremove = {
473 .name = "silenceremove",
474 .description = NULL_IF_CONFIG_SMALL("Remove silence."),
475 .priv_size = sizeof(SilenceRemoveContext),
476 .priv_class = &silenceremove_class,
479 .query_formats = query_formats,
480 .inputs = silenceremove_inputs,
481 .outputs = silenceremove_outputs,