git.sesse.net Git - ffmpeg/blob - tests/checkasm/float_dsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include "config.h"
  20
  21 #include <float.h>
  22 #include <stdint.h>
  23
  24 #include "libavutil/float_dsp.h"
  25 #include "libavutil/internal.h"
  26 #include "checkasm.h"
  27
  28 #define LEN 256
  29
  30 #define randomize_buffer(buf)                 \
  31 do {                                          \
  32     int i;                                    \
  33     double bmg[2], stddev = 10.0, mean = 0.0; \
  34                                               \
  35     for (i = 0; i < LEN; i += 2) {            \
  36         av_bmg_get(&checkasm_lfg, bmg);       \
  37         buf[i]     = bmg[0] * stddev + mean;  \
  38         buf[i + 1] = bmg[1] * stddev + mean;  \
  39     }                                         \
  40 } while(0);
  41
  42 static void test_vector_fmul(const float *src0, const float *src1)
  43 {
  44     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  45     LOCAL_ALIGNED_32(float, odst, [LEN]);
  46     int i;
  47
  48     declare_func(void, float *dst, const float *src0, const float *src1,
  49                  int len);
  50
  51     call_ref(cdst, src0, src1, LEN);
  52     call_new(odst, src0, src1, LEN);
  53     for (i = 0; i < LEN; i++) {
  54         double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
  55         if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
  56             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  57                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  58             fail();
  59             break;
  60         }
  61     }
  62     bench_new(odst, src0, src1, LEN);
  63 }
  64
  65 static void test_vector_dmul(const double *src0, const double *src1)
  66 {
  67     LOCAL_ALIGNED_32(double, cdst, [LEN]);
  68     LOCAL_ALIGNED_32(double, odst, [LEN]);
  69     int i;
  70
  71     declare_func(void, double *dst, const double *src0, const double *src1,
  72                  int len);
  73
  74     call_ref(cdst, src0, src1, LEN);
  75     call_new(odst, src0, src1, LEN);
  76     for (i = 0; i < LEN; i++) {
  77         double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
  78         if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
  79             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  80                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  81             fail();
  82             break;
  83         }
  84     }
  85     bench_new(odst, src0, src1, LEN);
  86 }
  87
  88 #define ARBITRARY_FMUL_ADD_CONST 0.005
  89 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
  90 {
  91     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  92     LOCAL_ALIGNED_32(float, odst, [LEN]);
  93     int i;
  94
  95     declare_func(void, float *dst, const float *src0, const float *src1,
  96                      const float *src2, int len);
  97
  98     call_ref(cdst, src0, src1, src2, LEN);
  99     call_new(odst, src0, src1, src2, LEN);
 100     for (i = 0; i < LEN; i++) {
 101         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
 102             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 103                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 104             fail();
 105             break;
 106         }
 107     }
 108     bench_new(odst, src0, src1, src2, LEN);
 109 }
 110
 111 static void test_vector_fmul_scalar(const float *src0, const float *src1)
 112 {
 113     LOCAL_ALIGNED_16(float, cdst, [LEN]);
 114     LOCAL_ALIGNED_16(float, odst, [LEN]);
 115     int i;
 116
 117     declare_func(void, float *dst, const float *src, float mul, int len);
 118
 119     call_ref(cdst, src0, src1[0], LEN);
 120     call_new(odst, src0, src1[0], LEN);
 121         for (i = 0; i < LEN; i++) {
 122             double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
 123             if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
 124                 fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 125                         i, cdst[i], odst[i], cdst[i] - odst[i]);
 126                 fail();
 127                 break;
 128             }
 129         }
 130     bench_new(odst, src0, src1[0], LEN);
 131 }
 132
 133 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 134 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
 135 {
 136     LOCAL_ALIGNED_16(float, cdst, [LEN]);
 137     LOCAL_ALIGNED_16(float, odst, [LEN]);
 138     int i;
 139
 140     declare_func(void, float *dst, const float *src0, const float *src1,
 141                  const float *win, int len);
 142
 143     call_ref(cdst, src0, src1, win, LEN / 2);
 144     call_new(odst, src0, src1, win, LEN / 2);
 145     for (i = 0; i < LEN; i++) {
 146         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
 147             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 148                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 149             fail();
 150             break;
 151         }
 152     }
 153     bench_new(odst, src0, src1, win, LEN / 2);
 154 }
 155
 156 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 157 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
 158 {
 159     LOCAL_ALIGNED_32(float, cdst, [LEN]);
 160     LOCAL_ALIGNED_32(float, odst, [LEN]);
 161     int i;
 162
 163     declare_func(void, float *dst, const float *src, float mul, int len);
 164
 165     memcpy(cdst, src2, LEN * sizeof(*src2));
 166     memcpy(odst, src2, LEN * sizeof(*src2));
 167
 168     call_ref(cdst, src0, src1[0], LEN);
 169     call_new(odst, src0, src1[0], LEN);
 170     for (i = 0; i < LEN; i++) {
 171         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
 172             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 173                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 174             fail();
 175             break;
 176         }
 177     }
 178     memcpy(odst, src2, LEN * sizeof(*src2));
 179     bench_new(odst, src0, src1[0], LEN);
 180 }
 181
 182 static void test_vector_dmul_scalar(const double *src0, const double *src1)
 183 {
 184     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 185     LOCAL_ALIGNED_32(double, odst, [LEN]);
 186     int i;
 187
 188     declare_func(void, double *dst, const double *src, double mul, int len);
 189
 190     call_ref(cdst, src0, src1[0], LEN);
 191     call_new(odst, src0, src1[0], LEN);
 192     for (i = 0; i < LEN; i++) {
 193         double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
 194         if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
 195             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
 196                     cdst[i], odst[i], cdst[i] - odst[i]);
 197             fail();
 198             break;
 199         }
 200     }
 201     bench_new(odst, src0, src1[0], LEN);
 202 }
 203
 204 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
 205 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
 206 {
 207     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 208     LOCAL_ALIGNED_32(double, odst, [LEN]);
 209     int i;
 210
 211     declare_func(void, double *dst, const double *src, double mul, int len);
 212
 213     memcpy(cdst, src2, LEN * sizeof(*src2));
 214     memcpy(odst, src2, LEN * sizeof(*src2));
 215     call_ref(cdst, src0, src1[0], LEN);
 216     call_new(odst, src0, src1[0], LEN);
 217     for (i = 0; i < LEN; i++) {
 218         if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
 219             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 220                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 221             fail();
 222             break;
 223         }
 224     }
 225     memcpy(odst, src2, LEN * sizeof(*src2));
 226     bench_new(odst, src0, src1[0], LEN);
 227 }
 228
 229 static void test_butterflies_float(const float *src0, const float *src1)
 230 {
 231     LOCAL_ALIGNED_16(float,  cdst,  [LEN]);
 232     LOCAL_ALIGNED_16(float,  odst,  [LEN]);
 233     LOCAL_ALIGNED_16(float,  cdst1, [LEN]);
 234     LOCAL_ALIGNED_16(float,  odst1, [LEN]);
 235     int i;
 236
 237     declare_func(void, float *av_restrict src0, float *av_restrict src1,
 238     int len);
 239
 240     memcpy(cdst,  src0, LEN * sizeof(*src0));
 241     memcpy(cdst1, src1, LEN * sizeof(*src1));
 242     memcpy(odst,  src0, LEN * sizeof(*src0));
 243     memcpy(odst1, src1, LEN * sizeof(*src1));
 244
 245     call_ref(cdst, cdst1, LEN);
 246     call_new(odst, odst1, LEN);
 247     for (i = 0; i < LEN; i++) {
 248         if (!float_near_abs_eps(cdst[i],  odst[i],  FLT_EPSILON) ||
 249             !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
 250             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 251                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 252             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 253                     i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
 254             fail();
 255             break;
 256         }
 257     }
 258     memcpy(odst,  src0, LEN * sizeof(*src0));
 259     memcpy(odst1, src1, LEN * sizeof(*src1));
 260     bench_new(odst, odst1, LEN);
 261 }
 262
 263 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 264 static void test_scalarproduct_float(const float *src0, const float *src1)
 265 {
 266     float cprod, oprod;
 267
 268     declare_func_float(float, const float *src0, const float *src1, int len);
 269
 270     cprod = call_ref(src0, src1, LEN);
 271     oprod = call_new(src0, src1, LEN);
 272     if (!float_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
 273         fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
 274                 cprod, oprod, cprod - oprod);
 275         fail();
 276     }
 277     bench_new(src0, src1, LEN);
 278 }
 279
 280 void checkasm_check_float_dsp(void)
 281 {
 282     LOCAL_ALIGNED_32(float,  src0,     [LEN]);
 283     LOCAL_ALIGNED_32(float,  src1,     [LEN]);
 284     LOCAL_ALIGNED_32(float,  src2,     [LEN]);
 285     LOCAL_ALIGNED_16(float,  src3,     [LEN]);
 286     LOCAL_ALIGNED_16(float,  src4,     [LEN]);
 287     LOCAL_ALIGNED_16(float,  src5,     [LEN]);
 288     LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
 289     LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
 290     LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
 291     AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(1);
 292
 293     if (!fdsp) {
 294         fprintf(stderr, "floatdsp: Out of memory error\n");
 295         return;
 296     }
 297
 298     randomize_buffer(src0);
 299     randomize_buffer(src1);
 300     randomize_buffer(src2);
 301     randomize_buffer(src3);
 302     randomize_buffer(src4);
 303     randomize_buffer(src5);
 304     randomize_buffer(dbl_src0);
 305     randomize_buffer(dbl_src1);
 306     randomize_buffer(dbl_src2);
 307
 308     if (check_func(fdsp->vector_fmul, "vector_fmul"))
 309         test_vector_fmul(src0, src1);
 310     if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
 311         test_vector_fmul_add(src0, src1, src2);
 312     if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
 313         test_vector_fmul_scalar(src3, src4);
 314     if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
 315         test_vector_fmul(src0, src1);
 316     if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
 317         test_vector_fmul_window(src3, src4, src5);
 318     report("vector_fmul");
 319     if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
 320         test_vector_fmac_scalar(src0, src1, src2);
 321     report("vector_fmac");
 322     if (check_func(fdsp->vector_dmul, "vector_dmul"))
 323         test_vector_dmul(dbl_src0, dbl_src1);
 324     if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
 325         test_vector_dmul_scalar(dbl_src0, dbl_src1);
 326     report("vector_dmul");
 327     if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
 328         test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
 329     report("vector_dmac");
 330     if (check_func(fdsp->butterflies_float, "butterflies_float"))
 331         test_butterflies_float(src3, src4);
 332     report("butterflies_float");
 333     if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
 334         test_scalarproduct_float(src3, src4);
 335     report("scalarproduct_float");
 336
 337     av_freep(&fdsp);
 338 }