X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Faf_silenceremove.c;h=f156d1883d5d7a010ad0bb845e92a5062fb3f300;hb=40231a58a0d8bffbe2264fe980483edaa6fc422e;hp=3389d647af5894c9313d3b36916ddb4e0b9d0633;hpb=357c62657a3d856262f1454816e5f94b90df54ff;p=ffmpeg diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c index 3389d647af5..f156d1883d5 100644 --- a/libavfilter/af_silenceremove.c +++ b/libavfilter/af_silenceremove.c @@ -61,15 +61,20 @@ typedef struct SilenceRemoveContext { size_t stop_holdoff_end; int stop_found_periods; + double window_ratio; double *window; double *window_current; double *window_end; int window_size; - double rms_sum; + double sum; int leave_silence; int restart; int64_t next_pts; + + int detection; + void (*update)(struct SilenceRemoveContext *s, double sample); + double(*compute)(struct SilenceRemoveContext *s, double sample); } SilenceRemoveContext; #define OFFSET(x) offsetof(SilenceRemoveContext, x) @@ -82,11 +87,59 @@ static const AVOption silenceremove_options[] = { { "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS }, { "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS }, { "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, + { "detection", NULL, OFFSET(detection), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "detection" }, + { "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" }, + { "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" }, + { "window", NULL, OFFSET(window_ratio), AV_OPT_TYPE_DOUBLE, {.dbl=0.02}, 0, 10, FLAGS }, { NULL } }; AVFILTER_DEFINE_CLASS(silenceremove); +static double compute_peak(SilenceRemoveContext *s, double sample) +{ + double new_sum; + + new_sum = s->sum; + new_sum -= *s->window_current; + new_sum += fabs(sample); + + return new_sum / s->window_size; +} + +static void update_peak(SilenceRemoveContext *s, double sample) +{ + s->sum -= *s->window_current; + *s->window_current = fabs(sample); + s->sum += *s->window_current; + + s->window_current++; + if (s->window_current >= s->window_end) + s->window_current = s->window; +} + +static double compute_rms(SilenceRemoveContext *s, double sample) +{ + double new_sum; + + new_sum = s->sum; + new_sum -= *s->window_current; + new_sum += sample * sample; + + return sqrt(new_sum / s->window_size); +} + +static void update_rms(SilenceRemoveContext *s, double sample) +{ + s->sum -= *s->window_current; + *s->window_current = sample * sample; + s->sum += *s->window_current; + + s->window_current++; + if (s->window_current >= s->window_end) + s->window_current = s->window; +} + static av_cold int init(AVFilterContext *ctx) { SilenceRemoveContext *s = ctx->priv; @@ -96,16 +149,27 @@ static av_cold int init(AVFilterContext *ctx) s->restart = 1; } + switch (s->detection) { + case 0: + s->update = update_peak; + s->compute = compute_peak; + break; + case 1: + s->update = update_rms; + s->compute = compute_rms; + break; + }; + return 0; } -static void clear_rms(SilenceRemoveContext *s) +static void clear_window(SilenceRemoveContext *s) { memset(s->window, 0, s->window_size * sizeof(*s->window)); s->window_current = s->window; s->window_end = s->window + s->window_size; - s->rms_sum = 0; + s->sum = 0; } static int config_input(AVFilterLink *inlink) @@ -113,12 +177,12 @@ static int config_input(AVFilterLink *inlink) AVFilterContext *ctx = inlink->dst; SilenceRemoveContext *s = ctx->priv; - s->window_size = (inlink->sample_rate / 50) * inlink->channels; + s->window_size = FFMAX((inlink->sample_rate * s->window_ratio), 1) * inlink->channels; s->window = av_malloc_array(s->window_size, sizeof(*s->window)); if (!s->window) return AVERROR(ENOMEM); - clear_rms(s); + clear_window(s); s->start_duration = av_rescale(s->start_duration, inlink->sample_rate, AV_TIME_BASE); @@ -153,28 +217,6 @@ static int config_input(AVFilterLink *inlink) return 0; } -static double compute_rms(SilenceRemoveContext *s, double sample) -{ - double new_sum; - - new_sum = s->rms_sum; - new_sum -= *s->window_current; - new_sum += sample * sample; - - return sqrt(new_sum / s->window_size); -} - -static void update_rms(SilenceRemoveContext *s, double sample) -{ - s->rms_sum -= *s->window_current; - *s->window_current = sample * sample; - s->rms_sum += *s->window_current; - - s->window_current++; - if (s->window_current >= s->window_end) - s->window_current = s->window; -} - static void flush(AVFrame *out, AVFilterLink *outlink, int *nb_samples_written, int *ret) { @@ -209,15 +251,15 @@ silence_trim: for (i = 0; i < nbs; i++) { threshold = 0; for (j = 0; j < inlink->channels; j++) { - threshold |= compute_rms(s, ibuf[j]) > s->start_threshold; + threshold |= s->compute(s, ibuf[j]) > s->start_threshold; } if (threshold) { for (j = 0; j < inlink->channels; j++) { - update_rms(s, *ibuf); + s->update(s, *ibuf); s->start_holdoff[s->start_holdoff_end++] = *ibuf++; - nb_samples_read++; } + nb_samples_read += inlink->channels; if (s->start_holdoff_end >= s->start_duration * inlink->channels) { if (++s->start_found_periods >= s->start_periods) { @@ -232,7 +274,7 @@ silence_trim: s->start_holdoff_end = 0; for (j = 0; j < inlink->channels; j++) - update_rms(s, ibuf[j]); + s->update(s, ibuf[j]); ibuf += inlink->channels; nb_samples_read += inlink->channels; @@ -284,7 +326,7 @@ silence_copy: for (i = 0; i < nbs; i++) { threshold = 1; for (j = 0; j < inlink->channels; j++) - threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold; + threshold &= s->compute(s, ibuf[j]) > s->stop_threshold; if (threshold && s->stop_holdoff_end && !s->leave_silence) { s->mode = SILENCE_COPY_FLUSH; @@ -292,22 +334,22 @@ silence_copy: goto silence_copy_flush; } else if (threshold) { for (j = 0; j < inlink->channels; j++) { - update_rms(s, *ibuf); + s->update(s, *ibuf); *obuf++ = *ibuf++; - nb_samples_read++; - nb_samples_written++; } + nb_samples_read += inlink->channels; + nb_samples_written += inlink->channels; } else if (!threshold) { for (j = 0; j < inlink->channels; j++) { - update_rms(s, *ibuf); + s->update(s, *ibuf); if (s->leave_silence) { *obuf++ = *ibuf; nb_samples_written++; } s->stop_holdoff[s->stop_holdoff_end++] = *ibuf++; - nb_samples_read++; } + nb_samples_read += inlink->channels; if (s->stop_holdoff_end >= s->stop_duration * inlink->channels) { if (++s->stop_found_periods >= s->stop_periods) { @@ -323,7 +365,7 @@ silence_copy: s->start_found_periods = 0; s->start_holdoff_offset = 0; s->start_holdoff_end = 0; - clear_rms(s); + clear_window(s); s->mode = SILENCE_TRIM; flush(out, outlink, &nb_samples_written, &ret); goto silence_trim;