git.sesse.net Git - ffmpeg/blob - tests/checkasm/aacpsdsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include "libavcodec/aacpsdsp.h"
  20 #include "libavutil/intfloat.h"
  21
  22 #include "checkasm.h"
  23
  24 #define N 32
  25 #define STRIDE 128
  26 #define BUF_SIZE (N * STRIDE)
  27
  28 #define randomize(buf, len) do {                                \
  29     int i;                                                      \
  30     for (i = 0; i < len; i++) {                                 \
  31         const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
  32         (buf)[i] = f;                                           \
  33     }                                                           \
  34 } while (0)
  35
  36 #define EPS 0.005
  37
  38 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
  39 {
  40     int i;
  41     for (i = 0; i < len; i++) {
  42         union av_intfloat32 u = { .f = buf[i] };
  43         u.i &= (0xffffffff << bits);
  44         buf[i] = u.f;
  45     }
  46 }
  47
  48 static void test_add_squares(void)
  49 {
  50     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
  51     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
  52     LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
  53
  54     declare_func(void, INTFLOAT *dst,
  55                  const INTFLOAT (*src)[2], int n);
  56
  57     randomize((INTFLOAT *)src, BUF_SIZE * 2);
  58     randomize(dst0, BUF_SIZE);
  59     memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
  60     call_ref(dst0, src, BUF_SIZE);
  61     call_new(dst1, src, BUF_SIZE);
  62     if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
  63         fail();
  64     bench_new(dst1, src, BUF_SIZE);
  65 }
  66
  67 static void test_mul_pair_single(void)
  68 {
  69     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
  70     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
  71     LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
  72     LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
  73
  74     declare_func(void, INTFLOAT (*dst)[2],
  75                        INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
  76
  77     randomize((INTFLOAT *)src0, BUF_SIZE * 2);
  78     randomize(src1, BUF_SIZE);
  79     call_ref(dst0, src0, src1, BUF_SIZE);
  80     call_new(dst1, src0, src1, BUF_SIZE);
  81     if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
  82         fail();
  83     bench_new(dst1, src0, src1, BUF_SIZE);
  84 }
  85
  86 static void test_hybrid_analysis(void)
  87 {
  88     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
  89     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
  90     LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
  91     LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
  92
  93     declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
  94                  const INTFLOAT (*filter)[8][2],
  95                  ptrdiff_t stride, int n);
  96
  97     randomize((INTFLOAT *)in, 13 * 2);
  98     randomize((INTFLOAT *)filter, N * 8 * 2);
  99
 100     randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
 101     memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
 102
 103     call_ref(dst0, in, filter, STRIDE, N);
 104     call_new(dst1, in, filter, STRIDE, N);
 105
 106     if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
 107         fail();
 108     bench_new(dst1, in, filter, STRIDE, N);
 109 }
 110
 111 static void test_hybrid_analysis_ileave(void)
 112 {
 113     LOCAL_ALIGNED_16(INTFLOAT, in,   [2], [38][64]);
 114     LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
 115     LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
 116
 117     declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
 118                        int i, int len);
 119
 120     randomize((INTFLOAT *)out0, 91 * 32 * 2);
 121     randomize((INTFLOAT *)in,    2 * 38 * 64);
 122     memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
 123
 124     /* len is hardcoded to 32 as that's the only value used in
 125        libavcodec. asm functions are likely to be optimized
 126        hardcoding this value in their loops and could fail with
 127        anything else.
 128        i is hardcoded to the two values currently used by the
 129        aac decoder because the arm neon implementation is
 130        micro-optimized for them and will fail for almost every
 131        other value. */
 132     call_ref(out0, in, 3, 32);
 133     call_new(out1, in, 3, 32);
 134
 135     /* the function just moves data around, so memcmp is enough */
 136     if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
 137         fail();
 138
 139     call_ref(out0, in, 5, 32);
 140     call_new(out1, in, 5, 32);
 141
 142     if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
 143         fail();
 144
 145     bench_new(out1, in, 3, 32);
 146 }
 147
 148 static void test_hybrid_synthesis_deint(void)
 149 {
 150     LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
 151     LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
 152     LOCAL_ALIGNED_16(INTFLOAT, in,  [91], [32][2]);
 153
 154     declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
 155                        int i, int len);
 156
 157     randomize((INTFLOAT *)in,  91 * 32 * 2);
 158     randomize((INTFLOAT *)out0, 2 * 38 * 64);
 159     memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
 160
 161     /* len is hardcoded to 32 as that's the only value used in
 162        libavcodec. asm functions are likely to be optimized
 163        hardcoding this value in their loops and could fail with
 164        anything else.
 165        i is hardcoded to the two values currently used by the
 166        aac decoder because the arm neon implementation is
 167        micro-optimized for them and will fail for almost every
 168        other value. */
 169     call_ref(out0, in, 3, 32);
 170     call_new(out1, in, 3, 32);
 171
 172     /* the function just moves data around, so memcmp is enough */
 173     if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
 174         fail();
 175
 176     call_ref(out0, in, 5, 32);
 177     call_new(out1, in, 5, 32);
 178
 179     if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
 180         fail();
 181
 182     bench_new(out1, in, 3, 32);
 183 }
 184
 185 static void test_stereo_interpolate(PSDSPContext *psdsp)
 186 {
 187     int i;
 188     LOCAL_ALIGNED_16(INTFLOAT, l,  [BUF_SIZE], [2]);
 189     LOCAL_ALIGNED_16(INTFLOAT, r,  [BUF_SIZE], [2]);
 190     LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
 191     LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
 192     LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
 193     LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
 194     LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
 195     LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
 196
 197     declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
 198                        INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
 199
 200     randomize((INTFLOAT *)l, BUF_SIZE * 2);
 201     randomize((INTFLOAT *)r, BUF_SIZE * 2);
 202
 203     for (i = 0; i < 2; i++) {
 204         if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
 205             memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
 206             memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
 207             memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
 208             memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
 209
 210             randomize((INTFLOAT *)h, 2 * 4);
 211             randomize((INTFLOAT *)h_step, 2 * 4);
 212             // Clear the least significant 14 bits of h_step, to avoid
 213             // divergence when accumulating h_step BUF_SIZE times into
 214             // a float variable which may or may not have extra intermediate
 215             // precision. Therefore clear roughly log2(BUF_SIZE) less
 216             // significant bits, to get the same result regardless of any
 217             // extra precision in the accumulator.
 218             clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
 219
 220             call_ref(l0, r0, h, h_step, BUF_SIZE);
 221             call_new(l1, r1, h, h_step, BUF_SIZE);
 222             if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
 223                 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
 224                 fail();
 225
 226             memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
 227             memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
 228             bench_new(l1, r1, h, h_step, BUF_SIZE);
 229         }
 230     }
 231 }
 232
 233 void checkasm_check_aacpsdsp(void)
 234 {
 235     PSDSPContext psdsp;
 236
 237     ff_psdsp_init(&psdsp);
 238
 239     if (check_func(psdsp.add_squares, "ps_add_squares"))
 240         test_add_squares();
 241     report("add_squares");
 242
 243     if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
 244         test_mul_pair_single();
 245     report("mul_pair_single");
 246
 247     if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
 248         test_hybrid_analysis();
 249     report("hybrid_analysis");
 250
 251     if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
 252         test_hybrid_analysis_ileave();
 253     report("hybrid_analysis_ileave");
 254
 255     if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
 256         test_hybrid_synthesis_deint();
 257     report("hybrid_synthesis_deint");
 258
 259     test_stereo_interpolate(&psdsp);
 260     report("stereo_interpolate");
 261 }