git.sesse.net Git - ffmpeg/blob - libavutil/float_dsp.c

   1 /*
   2  * Copyright 2005 Balatoni Denes
   3  * Copyright 2006 Loren Merritt
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "config.h"
  23 #include "attributes.h"
  24 #include "float_dsp.h"
  25
  26 static void vector_fmul_c(float *dst, const float *src0, const float *src1,
  27                           int len)
  28 {
  29     int i;
  30     for (i = 0; i < len; i++)
  31         dst[i] = src0[i] * src1[i];
  32 }
  33
  34 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
  35                                  int len)
  36 {
  37     int i;
  38     for (i = 0; i < len; i++)
  39         dst[i] += src[i] * mul;
  40 }
  41
  42 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
  43                                  int len)
  44 {
  45     int i;
  46     for (i = 0; i < len; i++)
  47         dst[i] = src[i] * mul;
  48 }
  49
  50 static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
  51                                  int len)
  52 {
  53     int i;
  54     for (i = 0; i < len; i++)
  55         dst[i] = src[i] * mul;
  56 }
  57
  58 static void vector_fmul_window_c(float *dst, const float *src0,
  59                                  const float *src1, const float *win, int len)
  60 {
  61     int i, j;
  62
  63     dst  += len;
  64     win  += len;
  65     src0 += len;
  66
  67     for (i = -len, j = len - 1; i < 0; i++, j--) {
  68         float s0 = src0[i];
  69         float s1 = src1[j];
  70         float wi = win[i];
  71         float wj = win[j];
  72         dst[i] = s0 * wj - s1 * wi;
  73         dst[j] = s0 * wi + s1 * wj;
  74     }
  75 }
  76
  77 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
  78                               const float *src2, int len){
  79     int i;
  80
  81     for (i = 0; i < len; i++)
  82         dst[i] = src0[i] * src1[i] + src2[i];
  83 }
  84
  85 static void vector_fmul_reverse_c(float *dst, const float *src0,
  86                                   const float *src1, int len)
  87 {
  88     int i;
  89
  90     src1 += len-1;
  91     for (i = 0; i < len; i++)
  92         dst[i] = src0[i] * src1[-i];
  93 }
  94
  95 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
  96                                 int len)
  97 {
  98     int i;
  99
 100     for (i = 0; i < len; i++) {
 101         float t = v1[i] - v2[i];
 102         v1[i] += v2[i];
 103         v2[i] = t;
 104     }
 105 }
 106
 107 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
 108 {
 109     float p = 0.0;
 110     int i;
 111
 112     for (i = 0; i < len; i++)
 113         p += v1[i] * v2[i];
 114
 115     return p;
 116 }
 117
 118 av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
 119 {
 120     fdsp->vector_fmul = vector_fmul_c;
 121     fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
 122     fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
 123     fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
 124     fdsp->vector_fmul_window = vector_fmul_window_c;
 125     fdsp->vector_fmul_add = vector_fmul_add_c;
 126     fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
 127     fdsp->butterflies_float = butterflies_float_c;
 128     fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
 129
 130     if (ARCH_AARCH64)
 131         ff_float_dsp_init_aarch64(fdsp);
 132     if (ARCH_ARM)
 133         ff_float_dsp_init_arm(fdsp);
 134     if (ARCH_PPC)
 135         ff_float_dsp_init_ppc(fdsp, bit_exact);
 136     if (ARCH_X86)
 137         ff_float_dsp_init_x86(fdsp);
 138     if (ARCH_MIPS)
 139         ff_float_dsp_init_mips(fdsp);
 140 }
 141
 142 #ifdef TEST
 143
 144 #include <float.h>
 145 #include <math.h>
 146 #include <stdint.h>
 147 #include <stdlib.h>
 148 #include <string.h>
 149
 150 #include "common.h"
 151 #include "cpu.h"
 152 #include "internal.h"
 153 #include "lfg.h"
 154 #include "log.h"
 155 #include "mem.h"
 156 #include "random_seed.h"
 157
 158 #define LEN 240
 159
 160 static void fill_float_array(AVLFG *lfg, float *a, int len)
 161 {
 162     int i;
 163     double bmg[2], stddev = 10.0, mean = 0.0;
 164
 165     for (i = 0; i < len; i += 2) {
 166         av_bmg_get(lfg, bmg);
 167         a[i]     = bmg[0] * stddev + mean;
 168         a[i + 1] = bmg[1] * stddev + mean;
 169     }
 170 }
 171 static int compare_floats(const float *a, const float *b, int len,
 172                           float max_diff)
 173 {
 174     int i;
 175     for (i = 0; i < len; i++) {
 176         if (fabsf(a[i] - b[i]) > max_diff) {
 177             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 178                    i, a[i], b[i], a[i] - b[i]);
 179             return -1;
 180         }
 181     }
 182     return 0;
 183 }
 184
 185 static void fill_double_array(AVLFG *lfg, double *a, int len)
 186 {
 187     int i;
 188     double bmg[2], stddev = 10.0, mean = 0.0;
 189
 190     for (i = 0; i < len; i += 2) {
 191         av_bmg_get(lfg, bmg);
 192         a[i]     = bmg[0] * stddev + mean;
 193         a[i + 1] = bmg[1] * stddev + mean;
 194     }
 195 }
 196
 197 static int compare_doubles(const double *a, const double *b, int len,
 198                            double max_diff)
 199 {
 200     int i;
 201
 202     for (i = 0; i < len; i++) {
 203         if (fabs(a[i] - b[i]) > max_diff) {
 204             av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
 205                    i, a[i], b[i], a[i] - b[i]);
 206             return -1;
 207         }
 208     }
 209     return 0;
 210 }
 211
 212 static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 213                             const float *v1, const float *v2)
 214 {
 215     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 216     LOCAL_ALIGNED(32, float, odst, [LEN]);
 217     int ret;
 218
 219     cdsp->vector_fmul(cdst, v1, v2, LEN);
 220     fdsp->vector_fmul(odst, v1, v2, LEN);
 221
 222     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 223         av_log(NULL, AV_LOG_ERROR, "vector_fmul failed\n");
 224
 225     return ret;
 226 }
 227
 228 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 229 static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 230                                    const float *v1, const float *src0, float scale)
 231 {
 232     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 233     LOCAL_ALIGNED(32, float, odst, [LEN]);
 234     int ret;
 235
 236     memcpy(cdst, v1, LEN * sizeof(*v1));
 237     memcpy(odst, v1, LEN * sizeof(*v1));
 238
 239     cdsp->vector_fmac_scalar(cdst, src0, scale, LEN);
 240     fdsp->vector_fmac_scalar(odst, src0, scale, LEN);
 241
 242     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMAC_SCALAR_CONST))
 243         av_log(NULL, AV_LOG_ERROR, "vector_fmac_scalar failed\n");
 244
 245     return ret;
 246 }
 247
 248 static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 249                                    const float *v1, float scale)
 250 {
 251     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 252     LOCAL_ALIGNED(32, float, odst, [LEN]);
 253     int ret;
 254
 255     cdsp->vector_fmul_scalar(cdst, v1, scale, LEN);
 256     fdsp->vector_fmul_scalar(odst, v1, scale, LEN);
 257
 258     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 259         av_log(NULL, AV_LOG_ERROR, "vector_fmul_scalar failed\n");
 260
 261     return ret;
 262 }
 263
 264 static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 265                                    const double *v1, double scale)
 266 {
 267     LOCAL_ALIGNED(32, double, cdst, [LEN]);
 268     LOCAL_ALIGNED(32, double, odst, [LEN]);
 269     int ret;
 270
 271     cdsp->vector_dmul_scalar(cdst, v1, scale, LEN);
 272     fdsp->vector_dmul_scalar(odst, v1, scale, LEN);
 273
 274     if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON))
 275         av_log(NULL, AV_LOG_ERROR, "vector_dmul_scalar failed\n");
 276
 277     return ret;
 278 }
 279
 280 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 281 static int test_vector_fmul_window(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 282                                    const float *v1, const float *v2, const float *v3)
 283 {
 284     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 285     LOCAL_ALIGNED(32, float, odst, [LEN]);
 286     int ret;
 287
 288     cdsp->vector_fmul_window(cdst, v1, v2, v3, LEN / 2);
 289     fdsp->vector_fmul_window(odst, v1, v2, v3, LEN / 2);
 290
 291     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_WINDOW_CONST))
 292         av_log(NULL, AV_LOG_ERROR, "vector_fmul_window failed\n");
 293
 294     return ret;
 295 }
 296
 297 #define ARBITRARY_FMUL_ADD_CONST 0.005
 298 static int test_vector_fmul_add(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 299                                 const float *v1, const float *v2, const float *v3)
 300 {
 301     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 302     LOCAL_ALIGNED(32, float, odst, [LEN]);
 303     int ret;
 304
 305     cdsp->vector_fmul_add(cdst, v1, v2, v3, LEN);
 306     fdsp->vector_fmul_add(odst, v1, v2, v3, LEN);
 307
 308     if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_ADD_CONST))
 309         av_log(NULL, AV_LOG_ERROR, "vector_fmul_add failed\n");
 310
 311     return ret;
 312 }
 313
 314 static int test_vector_fmul_reverse(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 315                                     const float *v1, const float *v2)
 316 {
 317     LOCAL_ALIGNED(32, float, cdst, [LEN]);
 318     LOCAL_ALIGNED(32, float, odst, [LEN]);
 319     int ret;
 320
 321     cdsp->vector_fmul_reverse(cdst, v1, v2, LEN);
 322     fdsp->vector_fmul_reverse(odst, v1, v2, LEN);
 323
 324     if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
 325         av_log(NULL, AV_LOG_ERROR, "vector_fmul_reverse failed\n");
 326
 327     return ret;
 328 }
 329
 330 static int test_butterflies_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 331                                   const float *v1, const float *v2)
 332 {
 333     LOCAL_ALIGNED(32, float, cv1, [LEN]);
 334     LOCAL_ALIGNED(32, float, cv2, [LEN]);
 335     LOCAL_ALIGNED(32, float, ov1, [LEN]);
 336     LOCAL_ALIGNED(32, float, ov2, [LEN]);
 337     int ret;
 338
 339     memcpy(cv1, v1, LEN * sizeof(*v1));
 340     memcpy(cv2, v2, LEN * sizeof(*v2));
 341     memcpy(ov1, v1, LEN * sizeof(*v1));
 342     memcpy(ov2, v2, LEN * sizeof(*v2));
 343
 344     cdsp->butterflies_float(cv1, cv2, LEN);
 345     fdsp->butterflies_float(ov1, ov2, LEN);
 346
 347     if ((ret = compare_floats(cv1, ov1, LEN, FLT_EPSILON)) ||
 348         (ret = compare_floats(cv2, ov2, LEN, FLT_EPSILON)))
 349         av_log(NULL, AV_LOG_ERROR, "butterflies_float failed\n");
 350
 351     return ret;
 352 }
 353
 354 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 355 static int test_scalarproduct_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
 356                                     const float *v1, const float *v2)
 357 {
 358     float cprod, oprod;
 359     int ret;
 360
 361     cprod = cdsp->scalarproduct_float(v1, v2, LEN);
 362     oprod = fdsp->scalarproduct_float(v1, v2, LEN);
 363
 364     if (ret = compare_floats(&cprod, &oprod, 1, ARBITRARY_SCALARPRODUCT_CONST))
 365         av_log(NULL, AV_LOG_ERROR, "scalarproduct_float failed\n");
 366
 367     return ret;
 368 }
 369
 370 int main(int argc, char **argv)
 371 {
 372     int ret = 0;
 373     uint32_t seed;
 374     AVFloatDSPContext fdsp, cdsp;
 375     AVLFG lfg;
 376
 377     LOCAL_ALIGNED(32, float, src0, [LEN]);
 378     LOCAL_ALIGNED(32, float, src1, [LEN]);
 379     LOCAL_ALIGNED(32, float, src2, [LEN]);
 380     LOCAL_ALIGNED(32, double, dbl_src0, [LEN]);
 381     LOCAL_ALIGNED(32, double, dbl_src1, [LEN]);
 382
 383     if (argc > 2 && !strcmp(argv[1], "-s"))
 384         seed = strtoul(argv[2], NULL, 10);
 385     else
 386         seed = av_get_random_seed();
 387
 388     av_log(NULL, AV_LOG_INFO, "float_dsp-test: random seed %u\n", seed);
 389
 390     av_lfg_init(&lfg, seed);
 391
 392     fill_float_array(&lfg, src0, LEN);
 393     fill_float_array(&lfg, src1, LEN);
 394     fill_float_array(&lfg, src2, LEN);
 395
 396     fill_double_array(&lfg, dbl_src0, LEN);
 397     fill_double_array(&lfg, dbl_src1, LEN);
 398
 399     avpriv_float_dsp_init(&fdsp, 1);
 400     av_set_cpu_flags_mask(0);
 401     avpriv_float_dsp_init(&cdsp, 1);
 402
 403     if (test_vector_fmul(&fdsp, &cdsp, src0, src1))
 404         ret -= 1 << 0;
 405     if (test_vector_fmac_scalar(&fdsp, &cdsp, src2, src0, src1[0]))
 406         ret -= 1 << 1;
 407     if (test_vector_fmul_scalar(&fdsp, &cdsp, src0, src1[0]))
 408         ret -= 1 << 2;
 409     if (test_vector_fmul_window(&fdsp, &cdsp, src0, src1, src2))
 410         ret -= 1 << 3;
 411     if (test_vector_fmul_add(&fdsp, &cdsp, src0, src1, src2))
 412         ret -= 1 << 4;
 413     if (test_vector_fmul_reverse(&fdsp, &cdsp, src0, src1))
 414         ret -= 1 << 5;
 415     if (test_butterflies_float(&fdsp, &cdsp, src0, src1))
 416         ret -= 1 << 6;
 417     if (test_scalarproduct_float(&fdsp, &cdsp, src0, src1))
 418         ret -= 1 << 7;
 419     if (test_vector_dmul_scalar(&fdsp, &cdsp, dbl_src0, dbl_src1[0]))
 420         ret -= 1 << 8;
 421
 422     return ret;
 423 }
 424
 425 #endif /* TEST */