git.sesse.net Git - ffmpeg/blob - tests/checkasm/float_dsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include "config.h"
  20
  21 #include <float.h>
  22 #include <stdint.h>
  23
  24 #include "libavutil/float_dsp.h"
  25 #include "libavutil/internal.h"
  26 #include "checkasm.h"
  27
  28 #define LEN 256
  29
  30 #define randomize_buffer(buf)                 \
  31 do {                                          \
  32     int i;                                    \
  33     double bmg[2], stddev = 10.0, mean = 0.0; \
  34                                               \
  35     for (i = 0; i < LEN; i += 2) {            \
  36         av_bmg_get(&checkasm_lfg, bmg);       \
  37         buf[i]     = bmg[0] * stddev + mean;  \
  38         buf[i + 1] = bmg[1] * stddev + mean;  \
  39     }                                         \
  40 } while(0);
  41
  42 static void test_vector_fmul(const float *src0, const float *src1)
  43 {
  44     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  45     LOCAL_ALIGNED_32(float, odst, [LEN]);
  46     int i;
  47
  48     declare_func(void, float *dst, const float *src0, const float *src1,
  49                  int len);
  50
  51     call_ref(cdst, src0, src1, LEN);
  52     call_new(odst, src0, src1, LEN);
  53     for (i = 0; i < LEN; i++) {
  54         if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON)) {
  55             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  56                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  57             fail();
  58             break;
  59         }
  60     }
  61     bench_new(odst, src0, src1, LEN);
  62 }
  63
  64 #define ARBITRARY_FMUL_ADD_CONST 0.005
  65 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
  66 {
  67     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  68     LOCAL_ALIGNED_32(float, odst, [LEN]);
  69     int i;
  70
  71     declare_func(void, float *dst, const float *src0, const float *src1,
  72                      const float *src2, int len);
  73
  74     call_ref(cdst, src0, src1, src2, LEN);
  75     call_new(odst, src0, src1, src2, LEN);
  76     for (i = 0; i < LEN; i++) {
  77         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
  78             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  79                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  80             fail();
  81             break;
  82         }
  83     }
  84     bench_new(odst, src0, src1, src2, LEN);
  85 }
  86
  87 static void test_vector_fmul_scalar(const float *src0, const float *src1)
  88 {
  89     LOCAL_ALIGNED_16(float, cdst, [LEN]);
  90     LOCAL_ALIGNED_16(float, odst, [LEN]);
  91     int i;
  92
  93     declare_func(void, float *dst, const float *src, float mul, int len);
  94
  95     call_ref(cdst, src0, src1[0], LEN);
  96     call_new(odst, src0, src1[0], LEN);
  97         for (i = 0; i < LEN; i++) {
  98             if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON)) {
  99                 fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 100                         i, cdst[i], odst[i], cdst[i] - odst[i]);
 101                 fail();
 102                 break;
 103             }
 104         }
 105     bench_new(odst, src0, src1[0], LEN);
 106 }
 107
 108 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 109 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
 110 {
 111     LOCAL_ALIGNED_16(float, cdst, [LEN]);
 112     LOCAL_ALIGNED_16(float, odst, [LEN]);
 113     int i;
 114
 115     declare_func(void, float *dst, const float *src0, const float *src1,
 116                  const float *win, int len);
 117
 118     call_ref(cdst, src0, src1, win, LEN / 2);
 119     call_new(odst, src0, src1, win, LEN / 2);
 120     for (i = 0; i < LEN; i++) {
 121         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
 122             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 123                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 124             fail();
 125             break;
 126         }
 127     }
 128     bench_new(odst, src0, src1, win, LEN / 2);
 129 }
 130
 131 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 132 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
 133 {
 134     LOCAL_ALIGNED_32(float, cdst, [LEN]);
 135     LOCAL_ALIGNED_32(float, odst, [LEN]);
 136     int i;
 137
 138     declare_func(void, float *dst, const float *src, float mul, int len);
 139
 140     memcpy(cdst, src2, LEN * sizeof(*src2));
 141     memcpy(odst, src2, LEN * sizeof(*src2));
 142
 143     call_ref(cdst, src0, src1[0], LEN);
 144     call_new(odst, src0, src1[0], LEN);
 145     for (i = 0; i < LEN; i++) {
 146         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
 147             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 148                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 149             fail();
 150             break;
 151         }
 152     }
 153     memcpy(odst, src2, LEN * sizeof(*src2));
 154     bench_new(odst, src0, src1[0], LEN);
 155 }
 156
 157 static void test_vector_dmul_scalar(const double *src0, const double *src1)
 158 {
 159     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 160     LOCAL_ALIGNED_32(double, odst, [LEN]);
 161     int i;
 162
 163     declare_func(void, double *dst, const double *src, double mul, int len);
 164
 165     call_ref(cdst, src0, src1[0], LEN);
 166     call_new(odst, src0, src1[0], LEN);
 167     for (i = 0; i < LEN; i++) {
 168         double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
 169         if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
 170             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
 171                     cdst[i], odst[i], cdst[i] - odst[i]);
 172             fail();
 173             break;
 174         }
 175     }
 176     bench_new(odst, src0, src1[0], LEN);
 177 }
 178
 179 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
 180 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
 181 {
 182     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 183     LOCAL_ALIGNED_32(double, odst, [LEN]);
 184     int i;
 185
 186     declare_func(void, double *dst, const double *src, double mul, int len);
 187
 188     memcpy(cdst, src2, LEN * sizeof(*src2));
 189     memcpy(odst, src2, LEN * sizeof(*src2));
 190     call_ref(cdst, src0, src1[0], LEN);
 191     call_new(odst, src0, src1[0], LEN);
 192     for (i = 0; i < LEN; i++) {
 193         if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
 194             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 195                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 196             fail();
 197             break;
 198         }
 199     }
 200     memcpy(odst, src2, LEN * sizeof(*src2));
 201     bench_new(odst, src0, src1[0], LEN);
 202 }
 203
 204 static void test_butterflies_float(const float *src0, const float *src1)
 205 {
 206     LOCAL_ALIGNED_16(float,  cdst,  [LEN]);
 207     LOCAL_ALIGNED_16(float,  odst,  [LEN]);
 208     LOCAL_ALIGNED_16(float,  cdst1, [LEN]);
 209     LOCAL_ALIGNED_16(float,  odst1, [LEN]);
 210     int i;
 211
 212     declare_func(void, float *av_restrict src0, float *av_restrict src1,
 213     int len);
 214
 215     memcpy(cdst,  src0, LEN * sizeof(*src0));
 216     memcpy(cdst1, src1, LEN * sizeof(*src1));
 217     memcpy(odst,  src0, LEN * sizeof(*src0));
 218     memcpy(odst1, src1, LEN * sizeof(*src1));
 219
 220     call_ref(cdst, cdst1, LEN);
 221     call_new(odst, odst1, LEN);
 222     for (i = 0; i < LEN; i++) {
 223         if (!float_near_abs_eps(cdst[i],  odst[i],  FLT_EPSILON) ||
 224             !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
 225             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 226                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 227             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 228                     i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
 229             fail();
 230             break;
 231         }
 232     }
 233     memcpy(odst,  src0, LEN * sizeof(*src0));
 234     memcpy(odst1, src1, LEN * sizeof(*src1));
 235     bench_new(odst, odst1, LEN);
 236 }
 237
 238 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 239 static void test_scalarproduct_float(const float *src0, const float *src1)
 240 {
 241     float cprod, oprod;
 242
 243     declare_func_float(float, const float *src0, const float *src1, int len);
 244
 245     cprod = call_ref(src0, src1, LEN);
 246     oprod = call_new(src0, src1, LEN);
 247     if (!float_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
 248         fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
 249                 cprod, oprod, cprod - oprod);
 250         fail();
 251     }
 252     bench_new(src0, src1, LEN);
 253 }
 254
 255 void checkasm_check_float_dsp(void)
 256 {
 257     LOCAL_ALIGNED_32(float,  src0,     [LEN]);
 258     LOCAL_ALIGNED_32(float,  src1,     [LEN]);
 259     LOCAL_ALIGNED_32(float,  src2,     [LEN]);
 260     LOCAL_ALIGNED_16(float,  src3,     [LEN]);
 261     LOCAL_ALIGNED_16(float,  src4,     [LEN]);
 262     LOCAL_ALIGNED_16(float,  src5,     [LEN]);
 263     LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
 264     LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
 265     LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
 266     AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(1);
 267
 268     if (!fdsp) {
 269         fprintf(stderr, "floatdsp: Out of memory error\n");
 270         return;
 271     }
 272
 273     randomize_buffer(src0);
 274     randomize_buffer(src1);
 275     randomize_buffer(src2);
 276     randomize_buffer(src3);
 277     randomize_buffer(src4);
 278     randomize_buffer(src5);
 279     randomize_buffer(dbl_src0);
 280     randomize_buffer(dbl_src1);
 281     randomize_buffer(dbl_src2);
 282
 283     if (check_func(fdsp->vector_fmul, "vector_fmul"))
 284         test_vector_fmul(src0, src1);
 285     if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
 286         test_vector_fmul_add(src0, src1, src2);
 287     if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
 288         test_vector_fmul_scalar(src3, src4);
 289     if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
 290         test_vector_fmul(src0, src1);
 291     if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
 292         test_vector_fmul_window(src3, src4, src5);
 293     report("vector_fmul");
 294     if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
 295         test_vector_fmac_scalar(src0, src1, src2);
 296     report("vector_fmac");
 297     if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
 298         test_vector_dmul_scalar(dbl_src0, dbl_src1);
 299     report("vector_dmul");
 300     if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
 301         test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
 302     report("vector_dmac");
 303     if (check_func(fdsp->butterflies_float, "butterflies_float"))
 304         test_butterflies_float(src3, src4);
 305     report("butterflies_float");
 306     if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
 307         test_scalarproduct_float(src3, src4);
 308     report("scalarproduct_float");
 309
 310     av_freep(&fdsp);
 311 }