git.sesse.net Git - ffmpeg/blob - libavutil/float_dsp.c

   1 /*
   2  * Copyright 2005 Balatoni Denes
   3  * Copyright 2006 Loren Merritt
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "config.h"
  23 #include "attributes.h"
  24 #include "float_dsp.h"
  25
  26 static void vector_fmul_c(float *dst, const float *src0, const float *src1,
  27                           int len)
  28 {
  29     int i;
  30     for (i = 0; i < len; i++)
  31         dst[i] = src0[i] * src1[i];
  32 }
  33
  34 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
  35                                  int len)
  36 {
  37     int i;
  38     for (i = 0; i < len; i++)
  39         dst[i] += src[i] * mul;
  40 }
  41
  42 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
  43                                  int len)
  44 {
  45     int i;
  46     for (i = 0; i < len; i++)
  47         dst[i] = src[i] * mul;
  48 }
  49
  50 static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
  51                                  int len)
  52 {
  53     int i;
  54     for (i = 0; i < len; i++)
  55         dst[i] = src[i] * mul;
  56 }
  57
  58 static void vector_fmul_window_c(float *dst, const float *src0,
  59                                  const float *src1, const float *win, int len)
  60 {
  61     int i, j;
  62
  63     dst  += len;
  64     win  += len;
  65     src0 += len;
  66
  67     for (i = -len, j = len - 1; i < 0; i++, j--) {
  68         float s0 = src0[i];
  69         float s1 = src1[j];
  70         float wi = win[i];
  71         float wj = win[j];
  72         dst[i] = s0 * wj - s1 * wi;
  73         dst[j] = s0 * wi + s1 * wj;
  74     }
  75 }
  76
  77 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
  78                               const float *src2, int len){
  79     int i;
  80
  81     for (i = 0; i < len; i++)
  82         dst[i] = src0[i] * src1[i] + src2[i];
  83 }
  84
  85 static void vector_fmul_reverse_c(float *dst, const float *src0,
  86                                   const float *src1, int len)
  87 {
  88     int i;
  89
  90     src1 += len-1;
  91     for (i = 0; i < len; i++)
  92         dst[i] = src0[i] * src1[-i];
  93 }
  94
  95 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
  96                                 int len)
  97 {
  98     int i;
  99
 100     for (i = 0; i < len; i++) {
 101         float t = v1[i] - v2[i];
 102         v1[i] += v2[i];
 103         v2[i] = t;
 104     }
 105 }
 106
 107 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
 108 {
 109     float p = 0.0;
 110     int i;
 111
 112     for (i = 0; i < len; i++)
 113         p += v1[i] * v2[i];
 114
 115     return p;
 116 }
 117
 118 av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
 119 {
 120     fdsp->vector_fmul = vector_fmul_c;
 121     fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
 122     fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
 123     fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
 124     fdsp->vector_fmul_window = vector_fmul_window_c;
 125     fdsp->vector_fmul_add = vector_fmul_add_c;
 126     fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
 127     fdsp->butterflies_float = butterflies_float_c;
 128     fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
 129
 130     if (ARCH_AARCH64)
 131         ff_float_dsp_init_aarch64(fdsp);
 132     if (ARCH_ARM)
 133         ff_float_dsp_init_arm(fdsp);
 134     if (ARCH_PPC)
 135         ff_float_dsp_init_ppc(fdsp, bit_exact);
 136     if (ARCH_X86)
 137         ff_float_dsp_init_x86(fdsp);
 138     if (ARCH_MIPS)
 139         ff_float_dsp_init_mips(fdsp);
 140 }
 141
 142 #ifdef TEST
 143
 144 #include <float.h>
 145 #include <math.h>
 146 #include <stdint.h>
 147 #include <stdlib.h>
 148 #include <string.h>
 149 #if HAVE_UNISTD_H
 150 #include <unistd.h> /* for getopt */
 151 #endif
 152 #if !HAVE_GETOPT
 153 #include "compat/getopt.c"
 154 #endif
 155
 156 #include "common.h"
 157 #include "cpu.h"
 158 #include "internal.h"
 159 #include "lfg.h"
 160 #include "log.h"
 161 #include "mem.h"
 162 #include "random_seed.h"
 163
 164 #define LEN 240
 165
 166 static void fill_float_array(AVLFG *lfg, float *a, int len)
 167 {
 168     int i;
 169     double bmg[2], stddev = 10.0, mean = 0.0;
 170
 171     for (i = 0; i < len; i += 2) {
 172         av_bmg_get(lfg, bmg);
 173         a[i]     = bmg[0] * stddev + mean;
 174         a[i + 1] = bmg[1] * stddev + mean;
 175     }
 176 }
 177 static int compare_floats(const float *a, const float *b, int len,
 178                           float max_diff)
 179 {
 180     int i;
 181     for (i = 0; i < len; i++) {
 182         if (fabsf(a[i] - b[i]) > max_diff) {
 183             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 184                    i, a[i], b[i], a[i] - b[i]);
 185             return -1;
 186         }
 187     }
 188     return 0;
 189 }
 190
 191 static void fill_double_array(AVLFG *lfg, double *a, int len)
 192 {
 193     int i;
 194     double bmg[2], stddev = 10.0, mean = 0.0;
 195
 196     for (i = 0; i < len; i += 2) {
 197         av_bmg_get(lfg, bmg);
 198         a[i]     = bmg[0] * stddev + mean;
 199         a[i + 1] = bmg[1] * stddev + mean;
 200     }
 201 }
 202
 203 static int compare_doubles(const double *a, const double *b, int len,
 204                            double max_diff)
 205 {
 206     int i;
 207
 208     for (i = 0; i < len; i++) {
 209         if (fabs(a[i] - b[i]) > max_diff) {
 210             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 211                    i, a[i], b[i], a[i] - b[i]);
 212             return -1;
 213         }
 214     }
 215     return 0;
 216 }
 217
 218 static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 219                             const float *v1, const float *v2)
 220 {
 221     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 222     LOCAL_ALIGNED(32, float, odst, [LEN]);
 223     int ret;
 224
 225     cdsp->vector_fmul(cdst, v1, v2, LEN);
 226     fdsp->vector_fmul(odst, v1, v2, LEN);
 227
 228     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 229         av_log(NULL, AV_LOG_ERROR, "vector_fmul failed\n");
 230
 231     return ret;
 232 }
 233
 234 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 235 static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 236                                    const float *v1, const float *src0, float scale)
 237 {
 238     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 239     LOCAL_ALIGNED(32, float, odst, [LEN]);
 240     int ret;
 241
 242     memcpy(cdst, v1, LEN * sizeof(*v1));
 243     memcpy(odst, v1, LEN * sizeof(*v1));
 244
 245     cdsp->vector_fmac_scalar(cdst, src0, scale, LEN);
 246     fdsp->vector_fmac_scalar(odst, src0, scale, LEN);
 247
 248     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMAC_SCALAR_CONST))
 249         av_log(NULL, AV_LOG_ERROR, "vector_fmac_scalar failed\n");
 250
 251     return ret;
 252 }
 253
 254 static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 255                                    const float *v1, float scale)
 256 {
 257     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 258     LOCAL_ALIGNED(32, float, odst, [LEN]);
 259     int ret;
 260
 261     cdsp->vector_fmul_scalar(cdst, v1, scale, LEN);
 262     fdsp->vector_fmul_scalar(odst, v1, scale, LEN);
 263
 264     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 265         av_log(NULL, AV_LOG_ERROR, "vector_fmul_scalar failed\n");
 266
 267     return ret;
 268 }
 269
 270 static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 271                                    const double *v1, double scale)
 272 {
 273     LOCAL_ALIGNED(32, double, cdst, [LEN]);
 274     LOCAL_ALIGNED(32, double, odst, [LEN]);
 275     int ret;
 276
 277     cdsp->vector_dmul_scalar(cdst, v1, scale, LEN);
 278     fdsp->vector_dmul_scalar(odst, v1, scale, LEN);
 279
 280     if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON))
 281         av_log(NULL, AV_LOG_ERROR, "vector_dmul_scalar failed\n");
 282
 283     return ret;
 284 }
 285
 286 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 287 static int test_vector_fmul_window(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 288                                    const float *v1, const float *v2, const float *v3)
 289 {
 290     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 291     LOCAL_ALIGNED(32, float, odst, [LEN]);
 292     int ret;
 293
 294     cdsp->vector_fmul_window(cdst, v1, v2, v3, LEN / 2);
 295     fdsp->vector_fmul_window(odst, v1, v2, v3, LEN / 2);
 296
 297     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_WINDOW_CONST))
 298         av_log(NULL, AV_LOG_ERROR, "vector_fmul_window failed\n");
 299
 300     return ret;
 301 }
 302
 303 #define ARBITRARY_FMUL_ADD_CONST 0.005
 304 static int test_vector_fmul_add(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 305                                 const float *v1, const float *v2, const float *v3)
 306 {
 307     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 308     LOCAL_ALIGNED(32, float, odst, [LEN]);
 309     int ret;
 310
 311     cdsp->vector_fmul_add(cdst, v1, v2, v3, LEN);
 312     fdsp->vector_fmul_add(odst, v1, v2, v3, LEN);
 313
 314     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_ADD_CONST))
 315         av_log(NULL, AV_LOG_ERROR, "vector_fmul_add failed\n");
 316
 317     return ret;
 318 }
 319
 320 static int test_vector_fmul_reverse(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 321                                     const float *v1, const float *v2)
 322 {
 323     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 324     LOCAL_ALIGNED(32, float, odst, [LEN]);
 325     int ret;
 326
 327     cdsp->vector_fmul_reverse(cdst, v1, v2, LEN);
 328     fdsp->vector_fmul_reverse(odst, v1, v2, LEN);
 329
 330     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 331         av_log(NULL, AV_LOG_ERROR, "vector_fmul_reverse failed\n");
 332
 333     return ret;
 334 }
 335
 336 static int test_butterflies_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 337                                   const float *v1, const float *v2)
 338 {
 339     LOCAL_ALIGNED(32, float, cv1, [LEN]);
 340     LOCAL_ALIGNED(32, float, cv2, [LEN]);
 341     LOCAL_ALIGNED(32, float, ov1, [LEN]);
 342     LOCAL_ALIGNED(32, float, ov2, [LEN]);
 343     int ret;
 344
 345     memcpy(cv1, v1, LEN * sizeof(*v1));
 346     memcpy(cv2, v2, LEN * sizeof(*v2));
 347     memcpy(ov1, v1, LEN * sizeof(*v1));
 348     memcpy(ov2, v2, LEN * sizeof(*v2));
 349
 350     cdsp->butterflies_float(cv1, cv2, LEN);
 351     fdsp->butterflies_float(ov1, ov2, LEN);
 352
 353     if ((ret = compare_floats(cv1, ov1, LEN, FLT_EPSILON)) ||
 354         (ret = compare_floats(cv2, ov2, LEN, FLT_EPSILON)))
 355         av_log(NULL, AV_LOG_ERROR, "butterflies_float failed\n");
 356
 357     return ret;
 358 }
 359
 360 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 361 static int test_scalarproduct_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 362                                     const float *v1, const float *v2)
 363 {
 364     float cprod, oprod;
 365     int ret;
 366
 367     cprod = cdsp->scalarproduct_float(v1, v2, LEN);
 368     oprod = fdsp->scalarproduct_float(v1, v2, LEN);
 369
 370     if (ret = compare_floats(&cprod, &oprod, 1, ARBITRARY_SCALARPRODUCT_CONST))
 371         av_log(NULL, AV_LOG_ERROR, "scalarproduct_float failed\n");
 372
 373     return ret;
 374 }
 375
 376 int main(int argc, char **argv)
 377 {
 378     int ret = 0, seeded = 0;
 379     uint32_t seed;
 380     AVFloatDSPContext fdsp, cdsp;
 381     AVLFG lfg;
 382
 383     LOCAL_ALIGNED(32, float, src0, [LEN]);
 384     LOCAL_ALIGNED(32, float, src1, [LEN]);
 385     LOCAL_ALIGNED(32, float, src2, [LEN]);
 386     LOCAL_ALIGNED(32, double, dbl_src0, [LEN]);
 387     LOCAL_ALIGNED(32, double, dbl_src1, [LEN]);
 388
 389     for (;;) {
 390         int arg = getopt(argc, argv, "s:c:");
 391         if (arg == -1)
 392             break;
 393         switch (arg) {
 394         case 's':
 395             seed = strtoul(optarg, NULL, 10);
 396             seeded = 1;
 397             break;
 398         case 'c':
 399         {
 400             int cpuflags = av_get_cpu_flags();
 401
 402             if (av_parse_cpu_caps(&cpuflags, optarg) < 0)
 403                 return 1;
 404
 405             av_force_cpu_flags(cpuflags);
 406             break;
 407         }
 408         }
 409     }
 410     if (!seeded)
 411         seed = av_get_random_seed();
 412
 413     av_log(NULL, AV_LOG_INFO, "float_dsp-test: %s %u\n", seeded ? "seed" : "random seed", seed);
 414
 415     av_lfg_init(&lfg, seed);
 416
 417     fill_float_array(&lfg, src0, LEN);
 418     fill_float_array(&lfg, src1, LEN);
 419     fill_float_array(&lfg, src2, LEN);
 420
 421     fill_double_array(&lfg, dbl_src0, LEN);
 422     fill_double_array(&lfg, dbl_src1, LEN);
 423
 424     avpriv_float_dsp_init(&fdsp, 1);
 425     av_set_cpu_flags_mask(0);
 426     avpriv_float_dsp_init(&cdsp, 1);
 427
 428     if (test_vector_fmul(&fdsp, &cdsp, src0, src1))
 429         ret -= 1 << 0;
 430     if (test_vector_fmac_scalar(&fdsp, &cdsp, src2, src0, src1[0]))
 431         ret -= 1 << 1;
 432     if (test_vector_fmul_scalar(&fdsp, &cdsp, src0, src1[0]))
 433         ret -= 1 << 2;
 434     if (test_vector_fmul_window(&fdsp, &cdsp, src0, src1, src2))
 435         ret -= 1 << 3;
 436     if (test_vector_fmul_add(&fdsp, &cdsp, src0, src1, src2))
 437         ret -= 1 << 4;
 438     if (test_vector_fmul_reverse(&fdsp, &cdsp, src0, src1))
 439         ret -= 1 << 5;
 440     if (test_butterflies_float(&fdsp, &cdsp, src0, src1))
 441         ret -= 1 << 6;
 442     if (test_scalarproduct_float(&fdsp, &cdsp, src0, src1))
 443         ret -= 1 << 7;
 444     if (test_vector_dmul_scalar(&fdsp, &cdsp, dbl_src0, dbl_src1[0]))
 445         ret -= 1 << 8;
 446
 447     return ret;
 448 }
 449
 450 #endif /* TEST */