From 553eb0773763798a6b9656b621cb125e1f6edbcc Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Tue, 19 Jan 2021 14:49:45 +0100 Subject: [PATCH] avfilter/vf_nnedi: small cleanups --- libavfilter/vf_nnedi.c | 83 ++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/libavfilter/vf_nnedi.c b/libavfilter/vf_nnedi.c index 5cedae104bd..5863ec478c1 100644 --- a/libavfilter/vf_nnedi.c +++ b/libavfilter/vf_nnedi.c @@ -37,30 +37,27 @@ static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 }; static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 }; static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 }; -static const unsigned NNEDI_DIMS0 = 49 * 4 + 5 * 4 + 9 * 4; -static const unsigned NNEDI_DIMS0_NEW = 4 * 65 + 4 * 5; - typedef struct PrescreenerOldCoefficients { DECLARE_ALIGNED(32, float, kernel_l0)[4][14 * 4]; - float bias_l0[4]; + DECLARE_ALIGNED(32, float, bias_l0)[4]; DECLARE_ALIGNED(32, float, kernel_l1)[4][4]; - float bias_l1[4]; + DECLARE_ALIGNED(32, float, bias_l1)[4]; DECLARE_ALIGNED(32, float, kernel_l2)[4][8]; - float bias_l2[4]; + DECLARE_ALIGNED(32, float, bias_l2)[4]; } PrescreenerOldCoefficients; typedef struct PrescreenerNewCoefficients { DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4]; - float bias_l0[4]; + DECLARE_ALIGNED(32, float, bias_l0)[4]; DECLARE_ALIGNED(32, float, kernel_l1)[4][4]; - float bias_l1[4]; + DECLARE_ALIGNED(32, float, bias_l1)[4]; } PrescreenerNewCoefficients; typedef struct PredictorCoefficients { - int xdim, ydim, nns; + int xdim, ydim, nns, nsize; float *data; float *softmax_q1; float *elliott_q1; @@ -226,7 +223,7 @@ static int query_formats(AVFilterContext *ctx) } static float dot_dsp(NNEDIContext *s, const float *kernel, const float *input, - unsigned n, float scale, float bias) + int n, float scale, float bias) { float sum; @@ -235,17 +232,6 @@ static float dot_dsp(NNEDIContext *s, const float *kernel, const float *input, return sum * scale + bias; } -static float dot_product(const float *kernel, const float *input, - unsigned n, float scale, float bias) -{ - float sum = 0.0f; - - for (int i = 0; i < n; i++) - sum += kernel[i] * input[i]; - - return sum * scale + bias; -} - static float elliott(float x) { return x / (1.0f + fabsf(x)); @@ -263,7 +249,7 @@ static void process_old(AVFilterContext *ctx, void *data) { NNEDIContext *s = ctx->priv; - PrescreenerOldCoefficients *m_data = data; + const PrescreenerOldCoefficients *const m_data = data; const float *src_p = src; // Adjust source pointer to point to top-left of filter window. @@ -283,12 +269,12 @@ static void process_old(AVFilterContext *ctx, // Layer 1. for (int n = 0; n < 4; n++) - state[n + 4] = dot_product(m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]); + state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]); transform_elliott(state + 4, 3); // Layer 2. for (int n = 0; n < 4; n++) - state[n + 8] = dot_product(m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]); + state[n + 8] = dot_dsp(s, m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]); prescreen[j] = FFMAX(state[10], state[11]) <= FFMAX(state[8], state[9]) ? 255 : 0; } @@ -300,7 +286,7 @@ static void process_new(AVFilterContext *ctx, void *data) { NNEDIContext *s = ctx->priv; - PrescreenerNewCoefficients *m_data = data; + const PrescreenerNewCoefficients *const m_data = data; const float *src_p = src; // Adjust source pointer to point to top-left of filter window. @@ -318,60 +304,68 @@ static void process_new(AVFilterContext *ctx, transform_elliott(state, 4); for (int n = 0; n < 4; n++) - state[n + 4] = dot_product(m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]); + state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]); for (int n = 0; n < 4; n++) prescreen[j + n] = state[n + 4] > 0.f; } } -static int filter_offset(unsigned nn, PredictorCoefficients *model) +static int filter_offset(int nn, const PredictorCoefficients *const model) { - return nn * model->xdim * model->ydim; + return nn * model->nsize; } -static const float *softmax_q1_filter(unsigned nn, PredictorCoefficients *model) +static const float *softmax_q1_filter(int nn, + const PredictorCoefficients *const model) { return model->softmax_q1 + filter_offset(nn, model); } -static const float *elliott_q1_filter(unsigned nn, PredictorCoefficients *model) +static const float *elliott_q1_filter(int nn, + const PredictorCoefficients *const model) { return model->elliott_q1 + filter_offset(nn, model); } -static const float *softmax_q2_filter(unsigned nn, PredictorCoefficients *model) +static const float *softmax_q2_filter(int nn, + const PredictorCoefficients *const model) { return model->softmax_q2 + filter_offset(nn, model); } -static const float *elliott_q2_filter(unsigned nn, PredictorCoefficients *model) +static const float *elliott_q2_filter(int nn, + const PredictorCoefficients *const model) { return model->elliott_q2 + filter_offset(nn, model); } static void gather_input(const float *src, ptrdiff_t src_stride, float *buf, float mstd[4], - PredictorCoefficients *model) + const PredictorCoefficients *const model) { float sum = 0; float sum_sq = 0; float tmp; for (int i = 0; i < model->ydim; i++) { + memcpy(buf, src, model->xdim * sizeof(float)); + for (int j = 0; j < model->xdim; j++) { - float val = src[i * src_stride + j]; + const float val = src[j]; - buf[i * model->xdim + j] = val; sum += val; sum_sq += val * val; } + + src += src_stride; + buf += model->xdim; } - mstd[0] = sum / (model->xdim * model->ydim); + mstd[0] = sum / model->nsize; mstd[3] = 0.f; - tmp = sum_sq / (model->xdim * model->ydim) - mstd[0] * mstd[0]; + tmp = sum_sq / model->nsize - mstd[0] * mstd[0]; if (tmp < FLT_EPSILON) { mstd[1] = 0.0f; mstd[2] = 0.0f; @@ -393,7 +387,7 @@ static void transform_softmax_exp(float *input, int size) } static void wae5(const float *softmax, const float *el, - unsigned n, float mstd[4]) + int n, float mstd[4]) { float vsum = 0.0f, wsum = 0.0f; @@ -414,13 +408,13 @@ static void predictor(AVFilterContext *ctx, void *data, int use_q2) { NNEDIContext *s = ctx->priv; - PredictorCoefficients *model = data; + const PredictorCoefficients *const model = data; const float *src_p = src; float *dst_p = dst; // Adjust source pointer to point to top-left of filter window. const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1); - int filter_size = model->xdim * model->ydim; + int filter_size = model->nsize; int nns = model->nns; for (int i = 0; i < N; i++) { @@ -534,7 +528,7 @@ static void write_words(const float *src, uint8_t *dstp, } static void interpolation(const void *src, ptrdiff_t src_stride, - void *dst, const uint8_t *prescreen, unsigned n) + void *dst, const uint8_t *prescreen, int n) { const float *src_p = src; float *dst_p = dst; @@ -844,6 +838,7 @@ static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int coeffs->data = data; coeffs->xdim = xdim; coeffs->ydim = ydim; + coeffs->nsize = xdim * ydim; coeffs->nns = nns; coeffs->softmax_q1 = allocate(&data, filter_size); @@ -966,7 +961,7 @@ static void subtract_mean_new(PrescreenerNewCoefficients *coeffs, float half) static void subtract_mean_predictor(PredictorCoefficients *model) { - int filter_size = model->xdim * model->ydim; + int filter_size = model->nsize; int nns = model->nns; float softmax_means[256]; // Average of individual softmax filters. @@ -1013,8 +1008,8 @@ static void subtract_mean_predictor(PredictorCoefficients *model) mean_bias = mean(model->softmax_bias_q2, nns); - for (unsigned nn = 0; nn < nns; nn++) { - for (unsigned k = 0; k < filter_size; k++) { + for (int nn = 0; nn < nns; nn++) { + for (int k = 0; k < filter_size; k++) { model->softmax_q2[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k]; model->elliott_q2[nn * filter_size + k] -= elliott_means[nn]; } -- 2.39.5