From 553eb0773763798a6b9656b621cb125e1f6edbcc Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Tue, 19 Jan 2021 14:49:45 +0100
Subject: [PATCH] avfilter/vf_nnedi: small cleanups

---
 libavfilter/vf_nnedi.c | 83 ++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 44 deletions(-)

diff --git a/libavfilter/vf_nnedi.c b/libavfilter/vf_nnedi.c
index 5cedae104bd..5863ec478c1 100644
--- a/libavfilter/vf_nnedi.c
+++ b/libavfilter/vf_nnedi.c
@@ -37,30 +37,27 @@ static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 };
 static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 };
 static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 };
 
-static const unsigned NNEDI_DIMS0 = 49 * 4 + 5 * 4 + 9 * 4;
-static const unsigned NNEDI_DIMS0_NEW = 4 * 65 + 4 * 5;
-
 typedef struct PrescreenerOldCoefficients {
     DECLARE_ALIGNED(32, float, kernel_l0)[4][14 * 4];
-    float bias_l0[4];
+    DECLARE_ALIGNED(32, float, bias_l0)[4];
 
     DECLARE_ALIGNED(32, float, kernel_l1)[4][4];
-    float bias_l1[4];
+    DECLARE_ALIGNED(32, float, bias_l1)[4];
 
     DECLARE_ALIGNED(32, float, kernel_l2)[4][8];
-    float bias_l2[4];
+    DECLARE_ALIGNED(32, float, bias_l2)[4];
 } PrescreenerOldCoefficients;
 
 typedef struct PrescreenerNewCoefficients {
     DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4];
-    float bias_l0[4];
+    DECLARE_ALIGNED(32, float, bias_l0)[4];
 
     DECLARE_ALIGNED(32, float, kernel_l1)[4][4];
-    float bias_l1[4];
+    DECLARE_ALIGNED(32, float, bias_l1)[4];
 } PrescreenerNewCoefficients;
 
 typedef struct PredictorCoefficients {
-    int xdim, ydim, nns;
+    int xdim, ydim, nns, nsize;
     float *data;
     float *softmax_q1;
     float *elliott_q1;
@@ -226,7 +223,7 @@ static int query_formats(AVFilterContext *ctx)
 }
 
 static float dot_dsp(NNEDIContext *s, const float *kernel, const float *input,
-                     unsigned n, float scale, float bias)
+                     int n, float scale, float bias)
 {
     float sum;
 
@@ -235,17 +232,6 @@ static float dot_dsp(NNEDIContext *s, const float *kernel, const float *input,
     return sum * scale + bias;
 }
 
-static float dot_product(const float *kernel, const float *input,
-                         unsigned n, float scale, float bias)
-{
-    float sum = 0.0f;
-
-    for (int i = 0; i < n; i++)
-        sum += kernel[i] * input[i];
-
-    return sum * scale + bias;
-}
-
 static float elliott(float x)
 {
     return x / (1.0f + fabsf(x));
@@ -263,7 +249,7 @@ static void process_old(AVFilterContext *ctx,
                         void *data)
 {
     NNEDIContext *s = ctx->priv;
-    PrescreenerOldCoefficients *m_data = data;
+    const PrescreenerOldCoefficients *const m_data = data;
     const float *src_p = src;
 
     // Adjust source pointer to point to top-left of filter window.
@@ -283,12 +269,12 @@ static void process_old(AVFilterContext *ctx,
 
         // Layer 1.
         for (int n = 0; n < 4; n++)
-            state[n + 4] = dot_product(m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
+            state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
         transform_elliott(state + 4, 3);
 
         // Layer 2.
         for (int n = 0; n < 4; n++)
-            state[n + 8] = dot_product(m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]);
+            state[n + 8] = dot_dsp(s, m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]);
 
         prescreen[j] = FFMAX(state[10], state[11]) <= FFMAX(state[8], state[9]) ? 255 : 0;
     }
@@ -300,7 +286,7 @@ static void process_new(AVFilterContext *ctx,
                         void *data)
 {
     NNEDIContext *s = ctx->priv;
-    PrescreenerNewCoefficients *m_data = data;
+    const PrescreenerNewCoefficients *const m_data = data;
     const float *src_p = src;
 
     // Adjust source pointer to point to top-left of filter window.
@@ -318,60 +304,68 @@ static void process_new(AVFilterContext *ctx,
         transform_elliott(state, 4);
 
         for (int n = 0; n < 4; n++)
-            state[n + 4] = dot_product(m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
+            state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
 
         for (int n = 0; n < 4; n++)
             prescreen[j + n] = state[n + 4] > 0.f;
     }
 }
 
-static int filter_offset(unsigned nn, PredictorCoefficients *model)
+static int filter_offset(int nn, const PredictorCoefficients *const model)
 {
-    return nn * model->xdim * model->ydim;
+    return nn * model->nsize;
 }
 
-static const float *softmax_q1_filter(unsigned nn, PredictorCoefficients *model)
+static const float *softmax_q1_filter(int nn,
+                                      const PredictorCoefficients *const model)
 {
     return model->softmax_q1 + filter_offset(nn, model);
 }
 
-static const float *elliott_q1_filter(unsigned nn, PredictorCoefficients *model)
+static const float *elliott_q1_filter(int nn,
+                                      const PredictorCoefficients *const model)
 {
     return model->elliott_q1 + filter_offset(nn, model);
 }
 
-static const float *softmax_q2_filter(unsigned nn, PredictorCoefficients *model)
+static const float *softmax_q2_filter(int nn,
+                                      const PredictorCoefficients *const model)
 {
     return model->softmax_q2 + filter_offset(nn, model);
 }
 
-static const float *elliott_q2_filter(unsigned nn, PredictorCoefficients *model)
+static const float *elliott_q2_filter(int nn,
+                                      const PredictorCoefficients *const model)
 {
     return model->elliott_q2 + filter_offset(nn, model);
 }
 
 static void gather_input(const float *src, ptrdiff_t src_stride,
                          float *buf, float mstd[4],
-                         PredictorCoefficients *model)
+                         const PredictorCoefficients *const model)
 {
     float sum = 0;
     float sum_sq = 0;
     float tmp;
 
     for (int i = 0; i < model->ydim; i++) {
+        memcpy(buf, src, model->xdim * sizeof(float));
+
         for (int j = 0; j < model->xdim; j++) {
-            float val = src[i * src_stride + j];
+            const float val = src[j];
 
-            buf[i * model->xdim + j] = val;
             sum += val;
             sum_sq += val * val;
         }
+
+        src += src_stride;
+        buf += model->xdim;
     }
 
-    mstd[0] = sum / (model->xdim * model->ydim);
+    mstd[0] = sum / model->nsize;
     mstd[3] = 0.f;
 
-    tmp = sum_sq / (model->xdim * model->ydim) - mstd[0] * mstd[0];
+    tmp = sum_sq / model->nsize - mstd[0] * mstd[0];
     if (tmp < FLT_EPSILON) {
         mstd[1] = 0.0f;
         mstd[2] = 0.0f;
@@ -393,7 +387,7 @@ static void transform_softmax_exp(float *input, int size)
 }
 
 static void wae5(const float *softmax, const float *el,
-                 unsigned n, float mstd[4])
+                 int n, float mstd[4])
 {
     float vsum = 0.0f, wsum = 0.0f;
 
@@ -414,13 +408,13 @@ static void predictor(AVFilterContext *ctx,
                       void *data, int use_q2)
 {
     NNEDIContext *s = ctx->priv;
-    PredictorCoefficients *model = data;
+    const PredictorCoefficients *const model = data;
     const float *src_p = src;
     float *dst_p = dst;
 
     // Adjust source pointer to point to top-left of filter window.
     const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1);
-    int filter_size = model->xdim * model->ydim;
+    int filter_size = model->nsize;
     int nns = model->nns;
 
     for (int i = 0; i < N; i++) {
@@ -534,7 +528,7 @@ static void write_words(const float *src, uint8_t *dstp,
 }
 
 static void interpolation(const void *src, ptrdiff_t src_stride,
-                          void *dst, const uint8_t *prescreen, unsigned n)
+                          void *dst, const uint8_t *prescreen, int n)
 {
     const float *src_p = src;
     float *dst_p = dst;
@@ -844,6 +838,7 @@ static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int
     coeffs->data = data;
     coeffs->xdim = xdim;
     coeffs->ydim = ydim;
+    coeffs->nsize = xdim * ydim;
     coeffs->nns  = nns;
 
     coeffs->softmax_q1 = allocate(&data, filter_size);
@@ -966,7 +961,7 @@ static void subtract_mean_new(PrescreenerNewCoefficients *coeffs, float half)
 
 static void subtract_mean_predictor(PredictorCoefficients *model)
 {
-    int filter_size = model->xdim * model->ydim;
+    int filter_size = model->nsize;
     int nns = model->nns;
 
     float softmax_means[256]; // Average of individual softmax filters.
@@ -1013,8 +1008,8 @@ static void subtract_mean_predictor(PredictorCoefficients *model)
 
     mean_bias = mean(model->softmax_bias_q2, nns);
 
-    for (unsigned nn = 0; nn < nns; nn++) {
-        for (unsigned k = 0; k < filter_size; k++) {
+    for (int nn = 0; nn < nns; nn++) {
+        for (int k = 0; k < filter_size; k++) {
             model->softmax_q2[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k];
             model->elliott_q2[nn * filter_size + k] -= elliott_means[nn];
         }
-- 
2.39.5