avfilter/af_arnndn: add mix option

author Paul B Mahol <onemda@gmail.com>

Fri, 4 Dec 2020 22:50:57 +0000 (23:50 +0100)

committer Paul B Mahol <onemda@gmail.com>

Fri, 4 Dec 2020 22:57:01 +0000 (23:57 +0100)
author Paul B Mahol <onemda@gmail.com>
Fri, 4 Dec 2020 22:50:57 +0000 (23:50 +0100)
committer Paul B Mahol <onemda@gmail.com>
Fri, 4 Dec 2020 22:57:01 +0000 (23:57 +0100)
diff --git a/doc/filters.texi b/doc/filters.texi

index b3cdff46f0fd95a25a7095214a28978e61aa50a0..537f8fee6fa730ad2b9da4358ec962db2e101488 100644 (file)
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -2307,6 +2307,13 @@ This filter accepts the following options:
  @table @option
  @item model, m
  Set train model file to load. This option is always required.
+
+@item mix
+Set how much to mix filtered samples into final output.
+Allowed range is from -1 to 1. Default value is 1.
+Negative values are special, they set how much to keep filtered noise
+in the final filter output. Set this option to -1 to hear actual
+noise removed from input signal.
  @end table
  
  @section asetnsamples
diff --git a/libavfilter/af_arnndn.c b/libavfilter/af_arnndn.c

index 00a509477c3b6aa3308012c4d0fd53096891b59d..4551ce2b8318fbc0cce8de376e079dd451e17cf8 100644 (file)
--- a/libavfilter/af_arnndn.c
+++ b/libavfilter/af_arnndn.c
@@ -127,6 +127,7 @@ typedef struct DenoiseState {
      int last_period;
      float mem_hp_x[2];
      float lastg[NB_BANDS];
+    float history[FRAME_SIZE];
      RNNState rnn;
      AVTXContext *tx, *txi;
      av_tx_fn tx_fn, txi_fn;
@@ -136,6 +137,7 @@ typedef struct AudioRNNContext {
      const AVClass *class;
  
      char *model_name;
+    float mix;
  
      int channels;
      DenoiseState *st;
@@ -496,12 +498,18 @@ static void frame_analysis(AudioRNNContext *s, DenoiseState *st, AVComplexFloat
  static void frame_synthesis(AudioRNNContext *s, DenoiseState *st, float *out, const AVComplexFloat *y)
  {
      LOCAL_ALIGNED_32(float, x, [WINDOW_SIZE]);
+    const float *src = st->history;
+    const float mix = s->mix;
+    const float imix = 1.f - FFMAX(mix, 0.f);
  
      inverse_transform(st, x, y);
      s->fdsp->vector_fmul(x, x, s->window, WINDOW_SIZE);
      s->fdsp->vector_fmac_scalar(x, st->synthesis_mem, 1.f, FRAME_SIZE);
      RNN_COPY(out, x, FRAME_SIZE);
      RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
+
+    for (int n = 0; n < FRAME_SIZE; n++)
+        out[n] = out[n] * mix + src[n] * imix;
  }
  
  static inline void xcorr_kernel(const float *x, const float *y, float sum[4], int len)
@@ -1350,6 +1358,7 @@ static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, c
      float g[NB_BANDS];
      float gf[FREQ_SIZE];
      float vad_prob = 0;
+    float *history = st->history;
      static const float a_hp[2] = {-1.99599, 0.99600};
      static const float b_hp[2] = {-2, 1};
      int silence;
@@ -1376,6 +1385,7 @@ static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, c
      }
  
      frame_synthesis(s, st, out, X);
+    memcpy(history, in, FRAME_SIZE * sizeof(*history));
  
      return vad_prob;
  }
@@ -1526,6 +1536,7 @@ static const AVFilterPad outputs[] = {
  static const AVOption arnndn_options[] = {
      { "model", "set model name", OFFSET(model_name), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, AF },
      { "m",     "set model name", OFFSET(model_name), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, AF },
+    { "mix",   "set output vs input mix", OFFSET(mix), AV_OPT_TYPE_FLOAT, {.dbl=1.0},-1, 1, AF },
      { NULL }
  };
author	Paul B Mahol <onemda@gmail.com>
	Fri, 4 Dec 2020 22:50:57 +0000 (23:50 +0100)
committer	Paul B Mahol <onemda@gmail.com>
	Fri, 4 Dec 2020 22:57:01 +0000 (23:57 +0100)
doc/filters.texi		patch \| blob \| history
libavfilter/af_arnndn.c		patch \| blob \| history