]> git.sesse.net Git - ffmpeg/commitdiff
checkasm: add av_tx FFT SIMD testing code
authorLynne <dev@lynne.ee>
Sat, 10 Apr 2021 01:53:38 +0000 (03:53 +0200)
committerLynne <dev@lynne.ee>
Sat, 24 Apr 2021 15:19:17 +0000 (17:19 +0200)
This sadly required making changes to the code itself,
due to the same context needing to be reused for both versions.
The lookup table had to be duplicated for both versions.

libavutil/tx.c
libavutil/tx_priv.h
libavutil/tx_template.c
tests/checkasm/Makefile
tests/checkasm/av_tx.c [new file with mode: 0644]
tests/checkasm/checkasm.c
tests/checkasm/checkasm.h
tests/fate/checkasm.mak

index 6d0e854084de3987fe7e6a3ff363ffb426c65a4d..dcfb257899b95f441146dd92f83a46a60c510987 100644 (file)
@@ -106,22 +106,24 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
 {
     const int m = s->m, inv = s->inv;
 
-    if (!(s->revtab = av_malloc(m*sizeof(*s->revtab))))
+    if (!(s->revtab = av_malloc(s->m*sizeof(*s->revtab))))
+        return AVERROR(ENOMEM);
+    if (!(s->revtab_c = av_malloc(m*sizeof(*s->revtab_c))))
         return AVERROR(ENOMEM);
 
     /* Default */
     for (int i = 0; i < m; i++) {
         int k = -split_radix_permutation(i, m, inv) & (m - 1);
         if (invert_lookup)
-            s->revtab[i] = k;
+            s->revtab[i] = s->revtab_c[i] = k;
         else
-            s->revtab[k] = i;
+            s->revtab[i] = s->revtab_c[k] = i;
     }
 
     return 0;
 }
 
-int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
+int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab)
 {
     int nb_inplace_idx = 0;
 
@@ -130,7 +132,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
 
     /* The first coefficient is always already in-place */
     for (int src = 1; src < s->m; src++) {
-        int dst = s->revtab[src];
+        int dst = revtab[src];
         int found = 0;
 
         if (dst <= src)
@@ -146,7 +148,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
                     break;
                 }
             }
-            dst = s->revtab[dst];
+            dst = revtab[dst];
         } while (dst != src && !found);
 
         if (!found)
@@ -215,6 +217,7 @@ av_cold void av_tx_uninit(AVTXContext **ctx)
     av_free((*ctx)->pfatab);
     av_free((*ctx)->exptab);
     av_free((*ctx)->revtab);
+    av_free((*ctx)->revtab_c);
     av_free((*ctx)->inplace_idx);
     av_free((*ctx)->tmp);
 
index b889f6d3b46d5f0fbbd332cf7300e0c24f3b9353..88589fcbb47e293022578208829b53be47f89f09 100644 (file)
@@ -122,6 +122,9 @@ struct AVTXContext {
     int        *revtab; /* Input mapping for power of two transforms */
     int   *inplace_idx; /* Required indices to revtab for in-place transforms */
 
+    int      *revtab_c; /* Revtab for only the C transforms, needed because
+                         * checkasm makes us reuse the same context. */
+
     av_tx_fn    top_tx; /* Used for computing transforms derived from other
                          * transforms, like full-length iMDCTs and RDFTs.
                          * NOTE: Do NOT use this to mix assembly with C code. */
@@ -147,7 +150,7 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup);
  * specific order,  allows the revtab to be done in-place. AVTXContext->revtab
  * must already exist.
  */
-int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s);
+int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab);
 
 /*
  * This generates a parity-based revtab of length len and direction inv.
index a68a84dcd574517a854366e5d6c1fffc6a65d227..cad66a8bc0932ab2e180b412c1f41ce212a78305 100644 (file)
@@ -593,7 +593,7 @@ static void compound_fft_##N##xM(AVTXContext *s, void *_out,                   \
     for (int i = 0; i < m; i++) {                                              \
         for (int j = 0; j < N; j++)                                            \
             fft##N##in[j] = in[in_map[i*N + j]];                               \
-        fft##N(s->tmp + s->revtab[i], fft##N##in, m);                          \
+        fft##N(s->tmp + s->revtab_c[i], fft##N##in, m);                        \
     }                                                                          \
                                                                                \
     for (int i = 0; i < N; i++)                                                \
@@ -624,16 +624,16 @@ static void split_radix_fft(AVTXContext *s, void *_out, void *_in,
 
         do {
             tmp = out[src];
-            dst = s->revtab[src];
+            dst = s->revtab_c[src];
             do {
                 FFSWAP(FFTComplex, tmp, out[dst]);
-                dst = s->revtab[dst];
+                dst = s->revtab_c[dst];
             } while (dst != src); /* Can be > as well, but is less predictable */
             out[dst] = tmp;
         } while ((src = *inplace_idx++));
     } else {
         for (int i = 0; i < m; i++)
-            out[i] = in[s->revtab[i]];
+            out[i] = in[s->revtab_c[i]];
     }
 
     fft_dispatch[mb](out);
@@ -685,7 +685,7 @@ static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src,     \
             FFTComplex tmp = { in2[-k*stride], in1[k*stride] };                \
             CMUL3(fft##N##in[j], tmp, exp[k >> 1]);                            \
         }                                                                      \
-        fft##N(s->tmp + s->revtab[i], fft##N##in, m);                          \
+        fft##N(s->tmp + s->revtab_c[i], fft##N##in, m);                        \
     }                                                                          \
                                                                                \
     for (int i = 0; i < N; i++)                                                \
@@ -733,7 +733,7 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src,      \
             CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im,           \
                  exp[k >> 1].re, exp[k >> 1].im);                              \
         }                                                                      \
-        fft##N(s->tmp + s->revtab[i], fft##N##in, m);                          \
+        fft##N(s->tmp + s->revtab_c[i], fft##N##in, m);                        \
     }                                                                          \
                                                                                \
     for (int i = 0; i < N; i++)                                                \
@@ -772,7 +772,7 @@ static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
 
     for (int i = 0; i < m; i++) {
         FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
-        CMUL3(z[s->revtab[i]], tmp, exp[i]);
+        CMUL3(z[s->revtab_c[i]], tmp, exp[i]);
     }
 
     fftp(z);
@@ -806,7 +806,7 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
             tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
             tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
         }
-        CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
+        CMUL(z[s->revtab_c[i]].im, z[s->revtab_c[i]].re, tmp.re, tmp.im,
              exp[i].re, exp[i].im);
     }
 
@@ -1005,7 +1005,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
         if (flags & AV_TX_INPLACE) {
             if (is_mdct) /* In-place MDCTs are not supported yet */
                 return AVERROR(ENOSYS);
-            if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s)))
+            if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s, s->revtab_c)))
                 return err;
         }
         for (int i = 4; i <= av_log2(m); i++)
index 1827a4e134707a7c8d66550b9ad4b997e7aa1e65..4ef5fa87da4b4c96bb03229b8037ed107fe874b3 100644 (file)
@@ -50,6 +50,7 @@ SWSCALEOBJS                             += sw_rgb.o sw_scale.o
 CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
 
 # libavutil tests
+AVUTILOBJS                              += av_tx.o
 AVUTILOBJS                              += fixed_dsp.o
 AVUTILOBJS                              += float_dsp.o
 
diff --git a/tests/checkasm/av_tx.c b/tests/checkasm/av_tx.c
new file mode 100644 (file)
index 0000000..178fb61
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavutil/mem_internal.h"
+#include "libavutil/tx.h"
+#include "libavutil/error.h"
+
+#include "checkasm.h"
+
+#define EPS 0.00005
+
+#define SCALE_NOOP(x) (x)
+#define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12)
+
+#define randomize_complex(BUF, LEN, TYPE, SCALE)                \
+    do {                                                        \
+        TYPE *buf = (TYPE *)BUF;                                \
+        for (int i = 0; i < LEN; i++) {                         \
+            double fre = (double)rnd() / UINT_MAX;              \
+            double fim = (double)rnd() / UINT_MAX;              \
+            buf[i] = (TYPE){ SCALE(fre), SCALE(fim) };          \
+        }                                                       \
+    } while (0)
+
+static const int check_lens[] = {
+    2, 4, 8, 16, 32, 64, 1024, 16384,
+};
+
+#define CHECK_TEMPLATE(PREFIX, TYPE, DATA_TYPE, SCALE, LENGTHS, CHECK_EXPRESSION) \
+    do {                                                                          \
+        int err;                                                                  \
+        AVTXContext *tx;                                                          \
+        av_tx_fn fn;                                                              \
+        int num_checks = 0;                                                       \
+        int last_check = 0;                                                       \
+        const void *scale = &SCALE;                                               \
+                                                                                  \
+        for (int i = 0; i < FF_ARRAY_ELEMS(LENGTHS); i++) {                       \
+            int len = LENGTHS[i];                                                 \
+                                                                                  \
+            if ((err = av_tx_init(&tx, &fn, TYPE, 0, len, &scale, 0x0)) < 0) {    \
+                fprintf(stderr, "av_tx: %s\n", av_err2str(err));                  \
+                return;                                                           \
+            }                                                                     \
+                                                                                  \
+            if (check_func(fn, PREFIX "_%i", len)) {                              \
+                num_checks++;                                                     \
+                last_check = len;                                                 \
+                call_ref(tx, out_ref, in, sizeof(DATA_TYPE));                     \
+                call_new(tx, out_new, in, sizeof(DATA_TYPE));                     \
+                if (CHECK_EXPRESSION) {                                           \
+                    fail();                                                       \
+                    break;                                                        \
+                }                                                                 \
+                bench_new(tx, out_new, in, sizeof(DATA_TYPE));                    \
+            }                                                                     \
+                                                                                  \
+            av_tx_uninit(&tx);                                                    \
+            fn = NULL;                                                            \
+        }                                                                         \
+                                                                                  \
+        av_tx_uninit(&tx);                                                        \
+        fn = NULL;                                                                \
+                                                                                  \
+        if (num_checks == 1)                                                      \
+            report(PREFIX "_%i", last_check);                                     \
+        else if (num_checks)                                                      \
+            report(PREFIX);                                                       \
+    } while (0)
+
+void checkasm_check_av_tx(void)
+{
+    const float scale_float = 1.0f;
+    const double scale_double = 1.0f;
+
+    declare_func(void, AVTXContext *tx, void *out, void *in, ptrdiff_t stride);
+
+    void *in      = av_malloc(16384*2*8);
+    void *out_ref = av_malloc(16384*2*8);
+    void *out_new = av_malloc(16384*2*8);
+
+    randomize_complex(in, 16384, AVComplexFloat, SCALE_NOOP);
+    CHECK_TEMPLATE("float_fft", AV_TX_FLOAT_FFT, AVComplexFloat, scale_float, check_lens,
+                   !float_near_abs_eps_array(out_ref, out_new, EPS, len*2));
+
+    randomize_complex(in, 16384, AVComplexDouble, SCALE_NOOP);
+    CHECK_TEMPLATE("double_fft", AV_TX_DOUBLE_FFT, AVComplexDouble, scale_double, check_lens,
+                   !double_near_abs_eps_array(out_ref, out_new, EPS, len*2));
+
+    av_free(in);
+    av_free(out_ref);
+    av_free(out_new);
+}
index 8338e8ff5839c7c9ba916ae23c369d96f76f5432..e2e17d2b115a0444fb017b1374e9805964797b18 100644 (file)
@@ -198,6 +198,7 @@ static const struct {
 #if CONFIG_AVUTIL
         { "fixed_dsp", checkasm_check_fixed_dsp },
         { "float_dsp", checkasm_check_float_dsp },
+        { "av_tx",     checkasm_check_av_tx },
 #endif
     { NULL }
 };
index ef6645e3a2b0024a1200d90b704e337dbfde16ea..0593d0edac5ac8fa9cdf1f819ab31a25087034c3 100644 (file)
@@ -43,6 +43,7 @@ void checkasm_check_aacpsdsp(void);
 void checkasm_check_afir(void);
 void checkasm_check_alacdsp(void);
 void checkasm_check_audiodsp(void);
+void checkasm_check_av_tx(void);
 void checkasm_check_blend(void);
 void checkasm_check_blockdsp(void);
 void checkasm_check_bswapdsp(void);
index 07f1d8238ec195f48b1d587bac82e4d608fad224..3108fcd5107c52742f11524700a7c91958928d31 100644 (file)
@@ -2,6 +2,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
                 fate-checkasm-af_afir                                   \
                 fate-checkasm-alacdsp                                   \
                 fate-checkasm-audiodsp                                  \
+                fate-checkasm-av_tx                                     \
                 fate-checkasm-blockdsp                                  \
                 fate-checkasm-bswapdsp                                  \
                 fate-checkasm-exrdsp                                    \