git.sesse.net Git - ffmpeg/blob - libavutil/float_dsp.c

   1 /*
   2  * Copyright 2005 Balatoni Denes
   3  * Copyright 2006 Loren Merritt
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "config.h"
  23 #include "attributes.h"
  24 #include "float_dsp.h"
  25 #include "mem.h"
  26
  27 static void vector_fmul_c(float *dst, const float *src0, const float *src1,
  28                           int len)
  29 {
  30     int i;
  31     for (i = 0; i < len; i++)
  32         dst[i] = src0[i] * src1[i];
  33 }
  34
  35 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
  36                                  int len)
  37 {
  38     int i;
  39     for (i = 0; i < len; i++)
  40         dst[i] += src[i] * mul;
  41 }
  42
  43 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
  44                                  int len)
  45 {
  46     int i;
  47     for (i = 0; i < len; i++)
  48         dst[i] = src[i] * mul;
  49 }
  50
  51 static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
  52                                  int len)
  53 {
  54     int i;
  55     for (i = 0; i < len; i++)
  56         dst[i] = src[i] * mul;
  57 }
  58
  59 static void vector_fmul_window_c(float *dst, const float *src0,
  60                                  const float *src1, const float *win, int len)
  61 {
  62     int i, j;
  63
  64     dst  += len;
  65     win  += len;
  66     src0 += len;
  67
  68     for (i = -len, j = len - 1; i < 0; i++, j--) {
  69         float s0 = src0[i];
  70         float s1 = src1[j];
  71         float wi = win[i];
  72         float wj = win[j];
  73         dst[i] = s0 * wj - s1 * wi;
  74         dst[j] = s0 * wi + s1 * wj;
  75     }
  76 }
  77
  78 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
  79                               const float *src2, int len){
  80     int i;
  81
  82     for (i = 0; i < len; i++)
  83         dst[i] = src0[i] * src1[i] + src2[i];
  84 }
  85
  86 static void vector_fmul_reverse_c(float *dst, const float *src0,
  87                                   const float *src1, int len)
  88 {
  89     int i;
  90
  91     src1 += len-1;
  92     for (i = 0; i < len; i++)
  93         dst[i] = src0[i] * src1[-i];
  94 }
  95
  96 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
  97                                 int len)
  98 {
  99     int i;
 100
 101     for (i = 0; i < len; i++) {
 102         float t = v1[i] - v2[i];
 103         v1[i] += v2[i];
 104         v2[i] = t;
 105     }
 106 }
 107
 108 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
 109 {
 110     float p = 0.0;
 111     int i;
 112
 113     for (i = 0; i < len; i++)
 114         p += v1[i] * v2[i];
 115
 116     return p;
 117 }
 118
 119 av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
 120 {
 121     fdsp->vector_fmul = vector_fmul_c;
 122     fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
 123     fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
 124     fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
 125     fdsp->vector_fmul_window = vector_fmul_window_c;
 126     fdsp->vector_fmul_add = vector_fmul_add_c;
 127     fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
 128     fdsp->butterflies_float = butterflies_float_c;
 129     fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
 130
 131     if (ARCH_AARCH64)
 132         ff_float_dsp_init_aarch64(fdsp);
 133     if (ARCH_ARM)
 134         ff_float_dsp_init_arm(fdsp);
 135     if (ARCH_PPC)
 136         ff_float_dsp_init_ppc(fdsp, bit_exact);
 137     if (ARCH_X86)
 138         ff_float_dsp_init_x86(fdsp);
 139     if (ARCH_MIPS)
 140         ff_float_dsp_init_mips(fdsp);
 141 }
 142
 143 av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
 144 {
 145     AVFloatDSPContext *ret = av_mallocz(sizeof(AVFloatDSPContext));
 146     if (ret)
 147         avpriv_float_dsp_init(ret, bit_exact);
 148     return ret;
 149 }
 150
 151
 152 #ifdef TEST
 153
 154 #include <float.h>
 155 #include <math.h>
 156 #include <stdint.h>
 157 #include <stdlib.h>
 158 #include <string.h>
 159 #if HAVE_UNISTD_H
 160 #include <unistd.h> /* for getopt */
 161 #endif
 162 #if !HAVE_GETOPT
 163 #include "compat/getopt.c"
 164 #endif
 165
 166 #include "common.h"
 167 #include "cpu.h"
 168 #include "internal.h"
 169 #include "lfg.h"
 170 #include "log.h"
 171 #include "random_seed.h"
 172
 173 #define LEN 240
 174
 175 static void fill_float_array(AVLFG *lfg, float *a, int len)
 176 {
 177     int i;
 178     double bmg[2], stddev = 10.0, mean = 0.0;
 179
 180     for (i = 0; i < len; i += 2) {
 181         av_bmg_get(lfg, bmg);
 182         a[i]     = bmg[0] * stddev + mean;
 183         a[i + 1] = bmg[1] * stddev + mean;
 184     }
 185 }
 186 static int compare_floats(const float *a, const float *b, int len,
 187                           float max_diff)
 188 {
 189     int i;
 190     for (i = 0; i < len; i++) {
 191         if (fabsf(a[i] - b[i]) > max_diff) {
 192             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 193                    i, a[i], b[i], a[i] - b[i]);
 194             return -1;
 195         }
 196     }
 197     return 0;
 198 }
 199
 200 static void fill_double_array(AVLFG *lfg, double *a, int len)
 201 {
 202     int i;
 203     double bmg[2], stddev = 10.0, mean = 0.0;
 204
 205     for (i = 0; i < len; i += 2) {
 206         av_bmg_get(lfg, bmg);
 207         a[i]     = bmg[0] * stddev + mean;
 208         a[i + 1] = bmg[1] * stddev + mean;
 209     }
 210 }
 211
 212 static int compare_doubles(const double *a, const double *b, int len,
 213                            double max_diff)
 214 {
 215     int i;
 216
 217     for (i = 0; i < len; i++) {
 218         if (fabs(a[i] - b[i]) > max_diff) {
 219             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 220                    i, a[i], b[i], a[i] - b[i]);
 221             return -1;
 222         }
 223     }
 224     return 0;
 225 }
 226
 227 static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 228                             const float *v1, const float *v2)
 229 {
 230     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 231     LOCAL_ALIGNED(32, float, odst, [LEN]);
 232     int ret;
 233
 234     cdsp->vector_fmul(cdst, v1, v2, LEN);
 235     fdsp->vector_fmul(odst, v1, v2, LEN);
 236
 237     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 238         av_log(NULL, AV_LOG_ERROR, "vector_fmul failed\n");
 239
 240     return ret;
 241 }
 242
 243 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 244 static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 245                                    const float *v1, const float *src0, float scale)
 246 {
 247     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 248     LOCAL_ALIGNED(32, float, odst, [LEN]);
 249     int ret;
 250
 251     memcpy(cdst, v1, LEN * sizeof(*v1));
 252     memcpy(odst, v1, LEN * sizeof(*v1));
 253
 254     cdsp->vector_fmac_scalar(cdst, src0, scale, LEN);
 255     fdsp->vector_fmac_scalar(odst, src0, scale, LEN);
 256
 257     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMAC_SCALAR_CONST))
 258         av_log(NULL, AV_LOG_ERROR, "vector_fmac_scalar failed\n");
 259
 260     return ret;
 261 }
 262
 263 static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 264                                    const float *v1, float scale)
 265 {
 266     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 267     LOCAL_ALIGNED(32, float, odst, [LEN]);
 268     int ret;
 269
 270     cdsp->vector_fmul_scalar(cdst, v1, scale, LEN);
 271     fdsp->vector_fmul_scalar(odst, v1, scale, LEN);
 272
 273     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 274         av_log(NULL, AV_LOG_ERROR, "vector_fmul_scalar failed\n");
 275
 276     return ret;
 277 }
 278
 279 static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 280                                    const double *v1, double scale)
 281 {
 282     LOCAL_ALIGNED(32, double, cdst, [LEN]);
 283     LOCAL_ALIGNED(32, double, odst, [LEN]);
 284     int ret;
 285
 286     cdsp->vector_dmul_scalar(cdst, v1, scale, LEN);
 287     fdsp->vector_dmul_scalar(odst, v1, scale, LEN);
 288
 289     if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON))
 290         av_log(NULL, AV_LOG_ERROR, "vector_dmul_scalar failed\n");
 291
 292     return ret;
 293 }
 294
 295 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 296 static int test_vector_fmul_window(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 297                                    const float *v1, const float *v2, const float *v3)
 298 {
 299     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 300     LOCAL_ALIGNED(32, float, odst, [LEN]);
 301     int ret;
 302
 303     cdsp->vector_fmul_window(cdst, v1, v2, v3, LEN / 2);
 304     fdsp->vector_fmul_window(odst, v1, v2, v3, LEN / 2);
 305
 306     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_WINDOW_CONST))
 307         av_log(NULL, AV_LOG_ERROR, "vector_fmul_window failed\n");
 308
 309     return ret;
 310 }
 311
 312 #define ARBITRARY_FMUL_ADD_CONST 0.005
 313 static int test_vector_fmul_add(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 314                                 const float *v1, const float *v2, const float *v3)
 315 {
 316     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 317     LOCAL_ALIGNED(32, float, odst, [LEN]);
 318     int ret;
 319
 320     cdsp->vector_fmul_add(cdst, v1, v2, v3, LEN);
 321     fdsp->vector_fmul_add(odst, v1, v2, v3, LEN);
 322
 323     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_ADD_CONST))
 324         av_log(NULL, AV_LOG_ERROR, "vector_fmul_add failed\n");
 325
 326     return ret;
 327 }
 328
 329 static int test_vector_fmul_reverse(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 330                                     const float *v1, const float *v2)
 331 {
 332     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 333     LOCAL_ALIGNED(32, float, odst, [LEN]);
 334     int ret;
 335
 336     cdsp->vector_fmul_reverse(cdst, v1, v2, LEN);
 337     fdsp->vector_fmul_reverse(odst, v1, v2, LEN);
 338
 339     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 340         av_log(NULL, AV_LOG_ERROR, "vector_fmul_reverse failed\n");
 341
 342     return ret;
 343 }
 344
 345 static int test_butterflies_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 346                                   const float *v1, const float *v2)
 347 {
 348     LOCAL_ALIGNED(32, float, cv1, [LEN]);
 349     LOCAL_ALIGNED(32, float, cv2, [LEN]);
 350     LOCAL_ALIGNED(32, float, ov1, [LEN]);
 351     LOCAL_ALIGNED(32, float, ov2, [LEN]);
 352     int ret;
 353
 354     memcpy(cv1, v1, LEN * sizeof(*v1));
 355     memcpy(cv2, v2, LEN * sizeof(*v2));
 356     memcpy(ov1, v1, LEN * sizeof(*v1));
 357     memcpy(ov2, v2, LEN * sizeof(*v2));
 358
 359     cdsp->butterflies_float(cv1, cv2, LEN);
 360     fdsp->butterflies_float(ov1, ov2, LEN);
 361
 362     if ((ret = compare_floats(cv1, ov1, LEN, FLT_EPSILON)) ||
 363         (ret = compare_floats(cv2, ov2, LEN, FLT_EPSILON)))
 364         av_log(NULL, AV_LOG_ERROR, "butterflies_float failed\n");
 365
 366     return ret;
 367 }
 368
 369 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 370 static int test_scalarproduct_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 371                                     const float *v1, const float *v2)
 372 {
 373     float cprod, oprod;
 374     int ret;
 375
 376     cprod = cdsp->scalarproduct_float(v1, v2, LEN);
 377     oprod = fdsp->scalarproduct_float(v1, v2, LEN);
 378
 379     if (ret = compare_floats(&cprod, &oprod, 1, ARBITRARY_SCALARPRODUCT_CONST))
 380         av_log(NULL, AV_LOG_ERROR, "scalarproduct_float failed\n");
 381
 382     return ret;
 383 }
 384
 385 int main(int argc, char **argv)
 386 {
 387     int ret = 0, seeded = 0;
 388     uint32_t seed;
 389     AVFloatDSPContext fdsp, cdsp;
 390     AVLFG lfg;
 391
 392     LOCAL_ALIGNED(32, float, src0, [LEN]);
 393     LOCAL_ALIGNED(32, float, src1, [LEN]);
 394     LOCAL_ALIGNED(32, float, src2, [LEN]);
 395     LOCAL_ALIGNED(32, double, dbl_src0, [LEN]);
 396     LOCAL_ALIGNED(32, double, dbl_src1, [LEN]);
 397
 398     for (;;) {
 399         int arg = getopt(argc, argv, "s:c:");
 400         if (arg == -1)
 401             break;
 402         switch (arg) {
 403         case 's':
 404             seed = strtoul(optarg, NULL, 10);
 405             seeded = 1;
 406             break;
 407         case 'c':
 408         {
 409             int cpuflags = av_get_cpu_flags();
 410
 411             if (av_parse_cpu_caps(&cpuflags, optarg) < 0)
 412                 return 1;
 413
 414             av_force_cpu_flags(cpuflags);
 415             break;
 416         }
 417         }
 418     }
 419     if (!seeded)
 420         seed = av_get_random_seed();
 421
 422     av_log(NULL, AV_LOG_INFO, "float_dsp-test: %s %u\n", seeded ? "seed" : "random seed", seed);
 423
 424     av_lfg_init(&lfg, seed);
 425
 426     fill_float_array(&lfg, src0, LEN);
 427     fill_float_array(&lfg, src1, LEN);
 428     fill_float_array(&lfg, src2, LEN);
 429
 430     fill_double_array(&lfg, dbl_src0, LEN);
 431     fill_double_array(&lfg, dbl_src1, LEN);
 432
 433     avpriv_float_dsp_init(&fdsp, 1);
 434     av_set_cpu_flags_mask(0);
 435     avpriv_float_dsp_init(&cdsp, 1);
 436
 437     if (test_vector_fmul(&fdsp, &cdsp, src0, src1))
 438         ret -= 1 << 0;
 439     if (test_vector_fmac_scalar(&fdsp, &cdsp, src2, src0, src1[0]))
 440         ret -= 1 << 1;
 441     if (test_vector_fmul_scalar(&fdsp, &cdsp, src0, src1[0]))
 442         ret -= 1 << 2;
 443     if (test_vector_fmul_window(&fdsp, &cdsp, src0, src1, src2))
 444         ret -= 1 << 3;
 445     if (test_vector_fmul_add(&fdsp, &cdsp, src0, src1, src2))
 446         ret -= 1 << 4;
 447     if (test_vector_fmul_reverse(&fdsp, &cdsp, src0, src1))
 448         ret -= 1 << 5;
 449     if (test_butterflies_float(&fdsp, &cdsp, src0, src1))
 450         ret -= 1 << 6;
 451     if (test_scalarproduct_float(&fdsp, &cdsp, src0, src1))
 452         ret -= 1 << 7;
 453     if (test_vector_dmul_scalar(&fdsp, &cdsp, dbl_src0, dbl_src1[0]))
 454         ret -= 1 << 8;
 455
 456     return ret;
 457 }
 458
 459 #endif /* TEST */