avfilter/af_dynaudnorm: implement threshold option

author Paul B Mahol <onemda@gmail.com>

Sat, 4 Jan 2020 17:17:32 +0000 (18:17 +0100)

committer Paul B Mahol <onemda@gmail.com>

Sat, 4 Jan 2020 17:17:32 +0000 (18:17 +0100)
author Paul B Mahol <onemda@gmail.com>
Sat, 4 Jan 2020 17:17:32 +0000 (18:17 +0100)
committer Paul B Mahol <onemda@gmail.com>
Sat, 4 Jan 2020 17:17:32 +0000 (18:17 +0100)
diff --git a/doc/filters.texi b/doc/filters.texi

index ba009899871fd16b0c9a529eb31b84e69f0ce7b2..b5f136d79b35251dba2a6ec31a1d5f645be4d89a 100644 (file)
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -3438,6 +3438,14 @@ value. Instead, the threshold value will be adjusted for each individual
  frame.
  In general, smaller parameters result in stronger compression, and vice versa.
  Values below 3.0 are not recommended, because audible distortion may appear.
+
+@item threshold, t
+Set the target threshold value. This specifies the loweset permissible
+magnitude level for the audio input which will be normalized.
+If input frame volume is above this value frame will be normalized.
+Otherwise frame may not be normalized at all. The default value is set
+to 0, which means all input frames will be normalized.
+This option is mostly useful if digital noise is not wanted to be amplified.
  @end table
  
  @section earwax
diff --git a/libavfilter/af_dynaudnorm.c b/libavfilter/af_dynaudnorm.c

index 09d2aeb816436c133146714bd8e5a7d863857497..1330433fbc450569e572621998a8ca2f30202a54 100644 (file)
--- a/libavfilter/af_dynaudnorm.c
+++ b/libavfilter/af_dynaudnorm.c
@@ -37,6 +37,11 @@
  #include "filters.h"
  #include "internal.h"
  
+typedef struct local_gain {
+    double max_gain;
+    double threshold;
+} local_gain;
+
  typedef struct cqueue {
      double *elements;
      int size;
@@ -60,6 +65,7 @@ typedef struct DynamicAudioNormalizerContext {
      double max_amplification;
      double target_rms;
      double compress_factor;
+    double threshold;
      double *prev_amplification_factor;
      double *dc_correction_value;
      double *compress_threshold;
@@ -73,6 +79,7 @@ typedef struct DynamicAudioNormalizerContext {
      cqueue **gain_history_original;
      cqueue **gain_history_minimum;
      cqueue **gain_history_smoothed;
+    cqueue **threshold_history;
  
      cqueue *is_enabled;
  } DynamicAudioNormalizerContext;
@@ -99,6 +106,8 @@ static const AVOption dynaudnorm_options[] = {
      { "b",           "set alternative boundary mode",    OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
      { "compress",    "set the compress factor",          OFFSET(compress_factor),   AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,  30.0, FLAGS },
      { "s",           "set the compress factor",          OFFSET(compress_factor),   AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,  30.0, FLAGS },
+    { "threshold",   "set the threshold value",          OFFSET(threshold),         AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,   1.0, FLAGS },
+    { "t",           "set the threshold value",          OFFSET(threshold),         AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,   1.0, FLAGS },
      { NULL }
  };
  
@@ -286,11 +295,14 @@ static av_cold void uninit(AVFilterContext *ctx)
              cqueue_free(s->gain_history_minimum[c]);
          if (s->gain_history_smoothed)
              cqueue_free(s->gain_history_smoothed[c]);
+        if (s->threshold_history)
+            cqueue_free(s->threshold_history[c]);
      }
  
      av_freep(&s->gain_history_original);
      av_freep(&s->gain_history_minimum);
      av_freep(&s->gain_history_smoothed);
+    av_freep(&s->threshold_history);
  
      cqueue_free(s->is_enabled);
      s->is_enabled = NULL;
@@ -320,12 +332,14 @@ static int config_input(AVFilterLink *inlink)
      s->gain_history_original = av_calloc(inlink->channels, sizeof(*s->gain_history_original));
      s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum));
      s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed));
+    s->threshold_history = av_calloc(inlink->channels, sizeof(*s->threshold_history));
      s->weights = av_malloc_array(s->filter_size, sizeof(*s->weights));
      s->is_enabled = cqueue_create(s->filter_size);
      if (!s->prev_amplification_factor || !s->dc_correction_value ||
          !s->compress_threshold || !s->fade_factors[0] || !s->fade_factors[1] ||
          !s->gain_history_original || !s->gain_history_minimum ||
-        !s->gain_history_smoothed || !s->is_enabled || !s->weights)
+        !s->gain_history_smoothed || !s->threshold_history ||
+        !s->is_enabled || !s->weights)
          return AVERROR(ENOMEM);
  
      for (c = 0; c < inlink->channels; c++) {
@@ -334,9 +348,10 @@ static int config_input(AVFilterLink *inlink)
          s->gain_history_original[c] = cqueue_create(s->filter_size);
          s->gain_history_minimum[c]  = cqueue_create(s->filter_size);
          s->gain_history_smoothed[c] = cqueue_create(s->filter_size);
+        s->threshold_history[c]     = cqueue_create(s->filter_size);
  
          if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
-            !s->gain_history_smoothed[c])
+            !s->gain_history_smoothed[c] || !s->threshold_history[c])
              return AVERROR(ENOMEM);
      }
  
@@ -414,12 +429,18 @@ static double compute_frame_rms(AVFrame *frame, int channel)
      return FFMAX(sqrt(rms_value), DBL_EPSILON);
  }
  
-static double get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
-                                 int channel)
+static local_gain get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
+                                     int channel)
  {
-    const double maximum_gain = s->peak_value / find_peak_magnitude(frame, channel);
+    const double peak_magnitude = find_peak_magnitude(frame, channel);
+    const double maximum_gain = s->peak_value / peak_magnitude;
      const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX;
-    return bound(s->max_amplification, FFMIN(maximum_gain, rms_gain));
+    local_gain gain;
+
+    gain.threshold = peak_magnitude > s->threshold;
+    gain.max_gain  = bound(s->max_amplification, FFMIN(maximum_gain, rms_gain));
+
+    return gain;
  }
  
  static double minimum_filter(cqueue *q)
@@ -434,34 +455,40 @@ static double minimum_filter(cqueue *q)
      return min;
  }
  
-static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q)
+static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q, cqueue *tq)
  {
-    double result = 0.0;
+    double result = 0.0, tsum = 0.0;
      int i;
  
      for (i = 0; i < cqueue_size(q); i++) {
-        result += cqueue_peek(q, i) * s->weights[i];
+        tsum += cqueue_peek(tq, i) * s->weights[i];
+        result += cqueue_peek(q, i) * s->weights[i] * cqueue_peek(tq, i);
      }
  
+    if (tsum == 0.0)
+        result = 1.0;
+
      return result;
  }
  
  static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
-                                double current_gain_factor)
+                                local_gain gain)
  {
      if (cqueue_empty(s->gain_history_original[channel]) ||
          cqueue_empty(s->gain_history_minimum[channel])) {
          const int pre_fill_size = s->filter_size / 2;
-        const double initial_value = s->alt_boundary_mode ? current_gain_factor : 1.0;
+        const double initial_value = s->alt_boundary_mode ? gain.max_gain : 1.0;
  
          s->prev_amplification_factor[channel] = initial_value;
  
          while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) {
              cqueue_enqueue(s->gain_history_original[channel], initial_value);
+            cqueue_enqueue(s->threshold_history[channel], gain.threshold);
          }
      }
  
-    cqueue_enqueue(s->gain_history_original[channel], current_gain_factor);
+    cqueue_enqueue(s->gain_history_original[channel], gain.max_gain);
+    cqueue_enqueue(s->threshold_history[channel], gain.threshold);
  
      while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
          double minimum;
@@ -489,12 +516,13 @@ static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
      while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
          double smoothed;
          av_assert0(cqueue_size(s->gain_history_minimum[channel]) == s->filter_size);
-        smoothed = gaussian_filter(s, s->gain_history_minimum[channel]);
+        smoothed = gaussian_filter(s, s->gain_history_minimum[channel], s->threshold_history[channel]);
          smoothed = FFMIN(smoothed, cqueue_peek(s->gain_history_minimum[channel], s->filter_size / 2));
  
          cqueue_enqueue(s->gain_history_smoothed[channel], smoothed);
  
          cqueue_pop(s->gain_history_minimum[channel]);
+        cqueue_pop(s->threshold_history[channel]);
      }
  }
  
@@ -632,11 +660,11 @@ static void analyze_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
      }
  
      if (s->channels_coupled) {
-        const double current_gain_factor = get_max_local_gain(s, frame, -1);
+        const local_gain gain = get_max_local_gain(s, frame, -1);
          int c;
  
          for (c = 0; c < s->channels; c++)
-            update_gain_history(s, c, current_gain_factor);
+            update_gain_history(s, c, gain);
      } else {
          int c;
author	Paul B Mahol <onemda@gmail.com>
	Sat, 4 Jan 2020 17:17:32 +0000 (18:17 +0100)
committer	Paul B Mahol <onemda@gmail.com>
	Sat, 4 Jan 2020 17:17:32 +0000 (18:17 +0100)
doc/filters.texi		patch \| blob \| history
libavfilter/af_dynaudnorm.c		patch \| blob \| history