git.sesse.net Git - ffmpeg/blob - libavutil/float_dsp.c

   1 /*
   2  * Copyright 2005 Balatoni Denes
   3  * Copyright 2006 Loren Merritt
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "config.h"
  23 #include "attributes.h"
  24 #include "float_dsp.h"
  25 #include "mem.h"
  26
  27 static void vector_fmul_c(float *dst, const float *src0, const float *src1,
  28                           int len)
  29 {
  30     int i;
  31     for (i = 0; i < len; i++)
  32         dst[i] = src0[i] * src1[i];
  33 }
  34
  35 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
  36                                  int len)
  37 {
  38     int i;
  39     for (i = 0; i < len; i++)
  40         dst[i] += src[i] * mul;
  41 }
  42
  43 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
  44                                  int len)
  45 {
  46     int i;
  47     for (i = 0; i < len; i++)
  48         dst[i] = src[i] * mul;
  49 }
  50
  51 static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
  52                                  int len)
  53 {
  54     int i;
  55     for (i = 0; i < len; i++)
  56         dst[i] = src[i] * mul;
  57 }
  58
  59 static void vector_fmul_window_c(float *dst, const float *src0,
  60                                  const float *src1, const float *win, int len)
  61 {
  62     int i, j;
  63
  64     dst  += len;
  65     win  += len;
  66     src0 += len;
  67
  68     for (i = -len, j = len - 1; i < 0; i++, j--) {
  69         float s0 = src0[i];
  70         float s1 = src1[j];
  71         float wi = win[i];
  72         float wj = win[j];
  73         dst[i] = s0 * wj - s1 * wi;
  74         dst[j] = s0 * wi + s1 * wj;
  75     }
  76 }
  77
  78 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
  79                               const float *src2, int len){
  80     int i;
  81
  82     for (i = 0; i < len; i++)
  83         dst[i] = src0[i] * src1[i] + src2[i];
  84 }
  85
  86 static void vector_fmul_reverse_c(float *dst, const float *src0,
  87                                   const float *src1, int len)
  88 {
  89     int i;
  90
  91     src1 += len-1;
  92     for (i = 0; i < len; i++)
  93         dst[i] = src0[i] * src1[-i];
  94 }
  95
  96 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
  97                                 int len)
  98 {
  99     int i;
 100
 101     for (i = 0; i < len; i++) {
 102         float t = v1[i] - v2[i];
 103         v1[i] += v2[i];
 104         v2[i] = t;
 105     }
 106 }
 107
 108 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
 109 {
 110     float p = 0.0;
 111     int i;
 112
 113     for (i = 0; i < len; i++)
 114         p += v1[i] * v2[i];
 115
 116     return p;
 117 }
 118
 119 av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
 120 {
 121     AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
 122     if (!fdsp)
 123         return NULL;
 124
 125     fdsp->vector_fmul = vector_fmul_c;
 126     fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
 127     fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
 128     fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
 129     fdsp->vector_fmul_window = vector_fmul_window_c;
 130     fdsp->vector_fmul_add = vector_fmul_add_c;
 131     fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
 132     fdsp->butterflies_float = butterflies_float_c;
 133     fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
 134
 135     if (ARCH_AARCH64)
 136         ff_float_dsp_init_aarch64(fdsp);
 137     if (ARCH_ARM)
 138         ff_float_dsp_init_arm(fdsp);
 139     if (ARCH_PPC)
 140         ff_float_dsp_init_ppc(fdsp, bit_exact);
 141     if (ARCH_X86)
 142         ff_float_dsp_init_x86(fdsp);
 143     if (ARCH_MIPS)
 144         ff_float_dsp_init_mips(fdsp);
 145     return fdsp;
 146 }
 147
 148
 149 #ifdef TEST
 150
 151 #include <float.h>
 152 #include <math.h>
 153 #include <stdint.h>
 154 #include <stdlib.h>
 155 #include <string.h>
 156 #if HAVE_UNISTD_H
 157 #include <unistd.h> /* for getopt */
 158 #endif
 159 #if !HAVE_GETOPT
 160 #include "compat/getopt.c"
 161 #endif
 162
 163 #include "common.h"
 164 #include "cpu.h"
 165 #include "internal.h"
 166 #include "lfg.h"
 167 #include "log.h"
 168 #include "random_seed.h"
 169
 170 #define LEN 240
 171
 172 static void fill_float_array(AVLFG *lfg, float *a, int len)
 173 {
 174     int i;
 175     double bmg[2], stddev = 10.0, mean = 0.0;
 176
 177     for (i = 0; i < len; i += 2) {
 178         av_bmg_get(lfg, bmg);
 179         a[i]     = bmg[0] * stddev + mean;
 180         a[i + 1] = bmg[1] * stddev + mean;
 181     }
 182 }
 183 static int compare_floats(const float *a, const float *b, int len,
 184                           float max_diff)
 185 {
 186     int i;
 187     for (i = 0; i < len; i++) {
 188         if (fabsf(a[i] - b[i]) > max_diff) {
 189             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 190                    i, a[i], b[i], a[i] - b[i]);
 191             return -1;
 192         }
 193     }
 194     return 0;
 195 }
 196
 197 static void fill_double_array(AVLFG *lfg, double *a, int len)
 198 {
 199     int i;
 200     double bmg[2], stddev = 10.0, mean = 0.0;
 201
 202     for (i = 0; i < len; i += 2) {
 203         av_bmg_get(lfg, bmg);
 204         a[i]     = bmg[0] * stddev + mean;
 205         a[i + 1] = bmg[1] * stddev + mean;
 206     }
 207 }
 208
 209 static int compare_doubles(const double *a, const double *b, int len,
 210                            double max_diff)
 211 {
 212     int i;
 213
 214     for (i = 0; i < len; i++) {
 215         if (fabs(a[i] - b[i]) > max_diff) {
 216             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 217                    i, a[i], b[i], a[i] - b[i]);
 218             return -1;
 219         }
 220     }
 221     return 0;
 222 }
 223
 224 static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 225                             const float *v1, const float *v2)
 226 {
 227     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 228     LOCAL_ALIGNED(32, float, odst, [LEN]);
 229     int ret;
 230
 231     cdsp->vector_fmul(cdst, v1, v2, LEN);
 232     fdsp->vector_fmul(odst, v1, v2, LEN);
 233
 234     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 235         av_log(NULL, AV_LOG_ERROR, "vector_fmul failed\n");
 236
 237     return ret;
 238 }
 239
 240 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 241 static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 242                                    const float *v1, const float *src0, float scale)
 243 {
 244     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 245     LOCAL_ALIGNED(32, float, odst, [LEN]);
 246     int ret;
 247
 248     memcpy(cdst, v1, LEN * sizeof(*v1));
 249     memcpy(odst, v1, LEN * sizeof(*v1));
 250
 251     cdsp->vector_fmac_scalar(cdst, src0, scale, LEN);
 252     fdsp->vector_fmac_scalar(odst, src0, scale, LEN);
 253
 254     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMAC_SCALAR_CONST))
 255         av_log(NULL, AV_LOG_ERROR, "vector_fmac_scalar failed\n");
 256
 257     return ret;
 258 }
 259
 260 static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 261                                    const float *v1, float scale)
 262 {
 263     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 264     LOCAL_ALIGNED(32, float, odst, [LEN]);
 265     int ret;
 266
 267     cdsp->vector_fmul_scalar(cdst, v1, scale, LEN);
 268     fdsp->vector_fmul_scalar(odst, v1, scale, LEN);
 269
 270     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 271         av_log(NULL, AV_LOG_ERROR, "vector_fmul_scalar failed\n");
 272
 273     return ret;
 274 }
 275
 276 static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 277                                    const double *v1, double scale)
 278 {
 279     LOCAL_ALIGNED(32, double, cdst, [LEN]);
 280     LOCAL_ALIGNED(32, double, odst, [LEN]);
 281     int ret;
 282
 283     cdsp->vector_dmul_scalar(cdst, v1, scale, LEN);
 284     fdsp->vector_dmul_scalar(odst, v1, scale, LEN);
 285
 286     if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON))
 287         av_log(NULL, AV_LOG_ERROR, "vector_dmul_scalar failed\n");
 288
 289     return ret;
 290 }
 291
 292 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 293 static int test_vector_fmul_window(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 294                                    const float *v1, const float *v2, const float *v3)
 295 {
 296     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 297     LOCAL_ALIGNED(32, float, odst, [LEN]);
 298     int ret;
 299
 300     cdsp->vector_fmul_window(cdst, v1, v2, v3, LEN / 2);
 301     fdsp->vector_fmul_window(odst, v1, v2, v3, LEN / 2);
 302
 303     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_WINDOW_CONST))
 304         av_log(NULL, AV_LOG_ERROR, "vector_fmul_window failed\n");
 305
 306     return ret;
 307 }
 308
 309 #define ARBITRARY_FMUL_ADD_CONST 0.005
 310 static int test_vector_fmul_add(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 311                                 const float *v1, const float *v2, const float *v3)
 312 {
 313     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 314     LOCAL_ALIGNED(32, float, odst, [LEN]);
 315     int ret;
 316
 317     cdsp->vector_fmul_add(cdst, v1, v2, v3, LEN);
 318     fdsp->vector_fmul_add(odst, v1, v2, v3, LEN);
 319
 320     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_ADD_CONST))
 321         av_log(NULL, AV_LOG_ERROR, "vector_fmul_add failed\n");
 322
 323     return ret;
 324 }
 325
 326 static int test_vector_fmul_reverse(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 327                                     const float *v1, const float *v2)
 328 {
 329     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 330     LOCAL_ALIGNED(32, float, odst, [LEN]);
 331     int ret;
 332
 333     cdsp->vector_fmul_reverse(cdst, v1, v2, LEN);
 334     fdsp->vector_fmul_reverse(odst, v1, v2, LEN);
 335
 336     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 337         av_log(NULL, AV_LOG_ERROR, "vector_fmul_reverse failed\n");
 338
 339     return ret;
 340 }
 341
 342 static int test_butterflies_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 343                                   const float *v1, const float *v2)
 344 {
 345     LOCAL_ALIGNED(32, float, cv1, [LEN]);
 346     LOCAL_ALIGNED(32, float, cv2, [LEN]);
 347     LOCAL_ALIGNED(32, float, ov1, [LEN]);
 348     LOCAL_ALIGNED(32, float, ov2, [LEN]);
 349     int ret;
 350
 351     memcpy(cv1, v1, LEN * sizeof(*v1));
 352     memcpy(cv2, v2, LEN * sizeof(*v2));
 353     memcpy(ov1, v1, LEN * sizeof(*v1));
 354     memcpy(ov2, v2, LEN * sizeof(*v2));
 355
 356     cdsp->butterflies_float(cv1, cv2, LEN);
 357     fdsp->butterflies_float(ov1, ov2, LEN);
 358
 359     if ((ret = compare_floats(cv1, ov1, LEN, FLT_EPSILON)) ||
 360         (ret = compare_floats(cv2, ov2, LEN, FLT_EPSILON)))
 361         av_log(NULL, AV_LOG_ERROR, "butterflies_float failed\n");
 362
 363     return ret;
 364 }
 365
 366 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 367 static int test_scalarproduct_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 368                                     const float *v1, const float *v2)
 369 {
 370     float cprod, oprod;
 371     int ret;
 372
 373     cprod = cdsp->scalarproduct_float(v1, v2, LEN);
 374     oprod = fdsp->scalarproduct_float(v1, v2, LEN);
 375
 376     if (ret = compare_floats(&cprod, &oprod, 1, ARBITRARY_SCALARPRODUCT_CONST))
 377         av_log(NULL, AV_LOG_ERROR, "scalarproduct_float failed\n");
 378
 379     return ret;
 380 }
 381
 382 int main(int argc, char **argv)
 383 {
 384     int ret = 0, seeded = 0;
 385     uint32_t seed;
 386     AVFloatDSPContext *fdsp, *cdsp;
 387     AVLFG lfg;
 388
 389     LOCAL_ALIGNED(32, float, src0, [LEN]);
 390     LOCAL_ALIGNED(32, float, src1, [LEN]);
 391     LOCAL_ALIGNED(32, float, src2, [LEN]);
 392     LOCAL_ALIGNED(32, double, dbl_src0, [LEN]);
 393     LOCAL_ALIGNED(32, double, dbl_src1, [LEN]);
 394
 395     for (;;) {
 396         int arg = getopt(argc, argv, "s:c:");
 397         if (arg == -1)
 398             break;
 399         switch (arg) {
 400         case 's':
 401             seed = strtoul(optarg, NULL, 10);
 402             seeded = 1;
 403             break;
 404         case 'c':
 405         {
 406             int cpuflags = av_get_cpu_flags();
 407
 408             if (av_parse_cpu_caps(&cpuflags, optarg) < 0)
 409                 return 1;
 410
 411             av_force_cpu_flags(cpuflags);
 412             break;
 413         }
 414         }
 415     }
 416     if (!seeded)
 417         seed = av_get_random_seed();
 418
 419     av_log(NULL, AV_LOG_INFO, "float_dsp-test: %s %u\n", seeded ? "seed" : "random seed", seed);
 420
 421     fdsp = avpriv_float_dsp_alloc(1);
 422     av_force_cpu_flags(0);
 423     cdsp = avpriv_float_dsp_alloc(1);
 424
 425     if (!fdsp || !cdsp) {
 426         ret = 1;
 427         goto end;
 428     }
 429
 430     av_lfg_init(&lfg, seed);
 431
 432     fill_float_array(&lfg, src0, LEN);
 433     fill_float_array(&lfg, src1, LEN);
 434     fill_float_array(&lfg, src2, LEN);
 435
 436     fill_double_array(&lfg, dbl_src0, LEN);
 437     fill_double_array(&lfg, dbl_src1, LEN);
 438
 439     if (test_vector_fmul(fdsp, cdsp, src0, src1))
 440         ret -= 1 << 0;
 441     if (test_vector_fmac_scalar(fdsp, cdsp, src2, src0, src1[0]))
 442         ret -= 1 << 1;
 443     if (test_vector_fmul_scalar(fdsp, cdsp, src0, src1[0]))
 444         ret -= 1 << 2;
 445     if (test_vector_fmul_window(fdsp, cdsp, src0, src1, src2))
 446         ret -= 1 << 3;
 447     if (test_vector_fmul_add(fdsp, cdsp, src0, src1, src2))
 448         ret -= 1 << 4;
 449     if (test_vector_fmul_reverse(fdsp, cdsp, src0, src1))
 450         ret -= 1 << 5;
 451     if (test_butterflies_float(fdsp, cdsp, src0, src1))
 452         ret -= 1 << 6;
 453     if (test_scalarproduct_float(fdsp, cdsp, src0, src1))
 454         ret -= 1 << 7;
 455     if (test_vector_dmul_scalar(fdsp, cdsp, dbl_src0, dbl_src1[0]))
 456         ret -= 1 << 8;
 457
 458 end:
 459     av_freep(&fdsp);
 460     av_freep(&cdsp);
 461     return ret;
 462 }
 463
 464 #endif /* TEST */