git.sesse.net Git - ffmpeg/blob - tests/checkasm/float_dsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include "config.h"
  20
  21 #include <float.h>
  22 #include <stdint.h>
  23
  24 #include "libavutil/float_dsp.h"
  25 #include "libavutil/internal.h"
  26 #include "checkasm.h"
  27
  28 #define LEN 256
  29
  30 #define randomize_buffer(buf)                 \
  31 do {                                          \
  32     int i;                                    \
  33     double bmg[2], stddev = 10.0, mean = 0.0; \
  34                                               \
  35     for (i = 0; i < LEN; i += 2) {            \
  36         av_bmg_get(&checkasm_lfg, bmg);       \
  37         buf[i]     = bmg[0] * stddev + mean;  \
  38         buf[i + 1] = bmg[1] * stddev + mean;  \
  39     }                                         \
  40 } while(0);
  41
  42 static void test_vector_fmul(const float *src0, const float *src1)
  43 {
  44     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  45     LOCAL_ALIGNED_32(float, odst, [LEN]);
  46     int i;
  47
  48     declare_func(void, float *dst, const float *src0, const float *src1,
  49                  int len);
  50
  51     call_ref(cdst, src0, src1, LEN);
  52     call_new(odst, src0, src1, LEN);
  53     for (i = 0; i < LEN; i++) {
  54         if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON)) {
  55             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  56                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  57             fail();
  58             break;
  59         }
  60     }
  61     bench_new(odst, src0, src1, LEN);
  62 }
  63
  64 static void test_vector_dmul(const double *src0, const double *src1)
  65 {
  66     LOCAL_ALIGNED_32(double, cdst, [LEN]);
  67     LOCAL_ALIGNED_32(double, odst, [LEN]);
  68     int i;
  69
  70     declare_func(void, double *dst, const double *src0, const double *src1,
  71                  int len);
  72
  73     call_ref(cdst, src0, src1, LEN);
  74     call_new(odst, src0, src1, LEN);
  75     for (i = 0; i < LEN; i++) {
  76         if (!double_near_abs_eps(cdst[i], odst[i], DBL_EPSILON)) {
  77             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  78                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  79             fail();
  80             break;
  81         }
  82     }
  83     bench_new(odst, src0, src1, LEN);
  84 }
  85
  86 #define ARBITRARY_FMUL_ADD_CONST 0.005
  87 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
  88 {
  89     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  90     LOCAL_ALIGNED_32(float, odst, [LEN]);
  91     int i;
  92
  93     declare_func(void, float *dst, const float *src0, const float *src1,
  94                      const float *src2, int len);
  95
  96     call_ref(cdst, src0, src1, src2, LEN);
  97     call_new(odst, src0, src1, src2, LEN);
  98     for (i = 0; i < LEN; i++) {
  99         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
 100             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 101                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 102             fail();
 103             break;
 104         }
 105     }
 106     bench_new(odst, src0, src1, src2, LEN);
 107 }
 108
 109 static void test_vector_fmul_scalar(const float *src0, const float *src1)
 110 {
 111     LOCAL_ALIGNED_16(float, cdst, [LEN]);
 112     LOCAL_ALIGNED_16(float, odst, [LEN]);
 113     int i;
 114
 115     declare_func(void, float *dst, const float *src, float mul, int len);
 116
 117     call_ref(cdst, src0, src1[0], LEN);
 118     call_new(odst, src0, src1[0], LEN);
 119         for (i = 0; i < LEN; i++) {
 120             if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON)) {
 121                 fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 122                         i, cdst[i], odst[i], cdst[i] - odst[i]);
 123                 fail();
 124                 break;
 125             }
 126         }
 127     bench_new(odst, src0, src1[0], LEN);
 128 }
 129
 130 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 131 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
 132 {
 133     LOCAL_ALIGNED_16(float, cdst, [LEN]);
 134     LOCAL_ALIGNED_16(float, odst, [LEN]);
 135     int i;
 136
 137     declare_func(void, float *dst, const float *src0, const float *src1,
 138                  const float *win, int len);
 139
 140     call_ref(cdst, src0, src1, win, LEN / 2);
 141     call_new(odst, src0, src1, win, LEN / 2);
 142     for (i = 0; i < LEN; i++) {
 143         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
 144             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 145                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 146             fail();
 147             break;
 148         }
 149     }
 150     bench_new(odst, src0, src1, win, LEN / 2);
 151 }
 152
 153 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 154 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
 155 {
 156     LOCAL_ALIGNED_32(float, cdst, [LEN]);
 157     LOCAL_ALIGNED_32(float, odst, [LEN]);
 158     int i;
 159
 160     declare_func(void, float *dst, const float *src, float mul, int len);
 161
 162     memcpy(cdst, src2, LEN * sizeof(*src2));
 163     memcpy(odst, src2, LEN * sizeof(*src2));
 164
 165     call_ref(cdst, src0, src1[0], LEN);
 166     call_new(odst, src0, src1[0], LEN);
 167     for (i = 0; i < LEN; i++) {
 168         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
 169             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 170                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 171             fail();
 172             break;
 173         }
 174     }
 175     memcpy(odst, src2, LEN * sizeof(*src2));
 176     bench_new(odst, src0, src1[0], LEN);
 177 }
 178
 179 static void test_vector_dmul_scalar(const double *src0, const double *src1)
 180 {
 181     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 182     LOCAL_ALIGNED_32(double, odst, [LEN]);
 183     int i;
 184
 185     declare_func(void, double *dst, const double *src, double mul, int len);
 186
 187     call_ref(cdst, src0, src1[0], LEN);
 188     call_new(odst, src0, src1[0], LEN);
 189     for (i = 0; i < LEN; i++) {
 190         double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
 191         if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
 192             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
 193                     cdst[i], odst[i], cdst[i] - odst[i]);
 194             fail();
 195             break;
 196         }
 197     }
 198     bench_new(odst, src0, src1[0], LEN);
 199 }
 200
 201 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
 202 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
 203 {
 204     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 205     LOCAL_ALIGNED_32(double, odst, [LEN]);
 206     int i;
 207
 208     declare_func(void, double *dst, const double *src, double mul, int len);
 209
 210     memcpy(cdst, src2, LEN * sizeof(*src2));
 211     memcpy(odst, src2, LEN * sizeof(*src2));
 212     call_ref(cdst, src0, src1[0], LEN);
 213     call_new(odst, src0, src1[0], LEN);
 214     for (i = 0; i < LEN; i++) {
 215         if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
 216             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 217                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 218             fail();
 219             break;
 220         }
 221     }
 222     memcpy(odst, src2, LEN * sizeof(*src2));
 223     bench_new(odst, src0, src1[0], LEN);
 224 }
 225
 226 static void test_butterflies_float(const float *src0, const float *src1)
 227 {
 228     LOCAL_ALIGNED_16(float,  cdst,  [LEN]);
 229     LOCAL_ALIGNED_16(float,  odst,  [LEN]);
 230     LOCAL_ALIGNED_16(float,  cdst1, [LEN]);
 231     LOCAL_ALIGNED_16(float,  odst1, [LEN]);
 232     int i;
 233
 234     declare_func(void, float *av_restrict src0, float *av_restrict src1,
 235     int len);
 236
 237     memcpy(cdst,  src0, LEN * sizeof(*src0));
 238     memcpy(cdst1, src1, LEN * sizeof(*src1));
 239     memcpy(odst,  src0, LEN * sizeof(*src0));
 240     memcpy(odst1, src1, LEN * sizeof(*src1));
 241
 242     call_ref(cdst, cdst1, LEN);
 243     call_new(odst, odst1, LEN);
 244     for (i = 0; i < LEN; i++) {
 245         if (!float_near_abs_eps(cdst[i],  odst[i],  FLT_EPSILON) ||
 246             !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
 247             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 248                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 249             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 250                     i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
 251             fail();
 252             break;
 253         }
 254     }
 255     memcpy(odst,  src0, LEN * sizeof(*src0));
 256     memcpy(odst1, src1, LEN * sizeof(*src1));
 257     bench_new(odst, odst1, LEN);
 258 }
 259
 260 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 261 static void test_scalarproduct_float(const float *src0, const float *src1)
 262 {
 263     float cprod, oprod;
 264
 265     declare_func_float(float, const float *src0, const float *src1, int len);
 266
 267     cprod = call_ref(src0, src1, LEN);
 268     oprod = call_new(src0, src1, LEN);
 269     if (!float_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
 270         fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
 271                 cprod, oprod, cprod - oprod);
 272         fail();
 273     }
 274     bench_new(src0, src1, LEN);
 275 }
 276
 277 void checkasm_check_float_dsp(void)
 278 {
 279     LOCAL_ALIGNED_32(float,  src0,     [LEN]);
 280     LOCAL_ALIGNED_32(float,  src1,     [LEN]);
 281     LOCAL_ALIGNED_32(float,  src2,     [LEN]);
 282     LOCAL_ALIGNED_16(float,  src3,     [LEN]);
 283     LOCAL_ALIGNED_16(float,  src4,     [LEN]);
 284     LOCAL_ALIGNED_16(float,  src5,     [LEN]);
 285     LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
 286     LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
 287     LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
 288     AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(1);
 289
 290     if (!fdsp) {
 291         fprintf(stderr, "floatdsp: Out of memory error\n");
 292         return;
 293     }
 294
 295     randomize_buffer(src0);
 296     randomize_buffer(src1);
 297     randomize_buffer(src2);
 298     randomize_buffer(src3);
 299     randomize_buffer(src4);
 300     randomize_buffer(src5);
 301     randomize_buffer(dbl_src0);
 302     randomize_buffer(dbl_src1);
 303     randomize_buffer(dbl_src2);
 304
 305     if (check_func(fdsp->vector_fmul, "vector_fmul"))
 306         test_vector_fmul(src0, src1);
 307     if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
 308         test_vector_fmul_add(src0, src1, src2);
 309     if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
 310         test_vector_fmul_scalar(src3, src4);
 311     if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
 312         test_vector_fmul(src0, src1);
 313     if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
 314         test_vector_fmul_window(src3, src4, src5);
 315     report("vector_fmul");
 316     if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
 317         test_vector_fmac_scalar(src0, src1, src2);
 318     report("vector_fmac");
 319     if (check_func(fdsp->vector_dmul, "vector_dmul"))
 320         test_vector_dmul(dbl_src0, dbl_src1);
 321     if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
 322         test_vector_dmul_scalar(dbl_src0, dbl_src1);
 323     report("vector_dmul");
 324     if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
 325         test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
 326     report("vector_dmac");
 327     if (check_func(fdsp->butterflies_float, "butterflies_float"))
 328         test_butterflies_float(src3, src4);
 329     report("butterflies_float");
 330     if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
 331         test_scalarproduct_float(src3, src4);
 332     report("scalarproduct_float");
 333
 334     av_freep(&fdsp);
 335 }