git.sesse.net Git - ffmpeg/blob - tests/checkasm/aacpsdsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include "libavcodec/aacpsdsp.h"
  20 #include "libavutil/intfloat.h"
  21 #include "libavutil/mem_internal.h"
  22
  23 #include "checkasm.h"
  24
  25 #define N 32
  26 #define STRIDE 128
  27 #define BUF_SIZE (N * STRIDE)
  28
  29 #define randomize(buf, len) do {                                \
  30     int i;                                                      \
  31     for (i = 0; i < len; i++) {                                 \
  32         const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
  33         (buf)[i] = f;                                           \
  34     }                                                           \
  35 } while (0)
  36
  37 #define EPS 0.005
  38
  39 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
  40 {
  41     int i;
  42     for (i = 0; i < len; i++) {
  43         union av_intfloat32 u = { .f = buf[i] };
  44         u.i &= (0xffffffff << bits);
  45         buf[i] = u.f;
  46     }
  47 }
  48
  49 static void test_add_squares(void)
  50 {
  51     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
  52     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
  53     LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
  54
  55     declare_func(void, INTFLOAT *dst,
  56                  const INTFLOAT (*src)[2], int n);
  57
  58     randomize((INTFLOAT *)src, BUF_SIZE * 2);
  59     randomize(dst0, BUF_SIZE);
  60     memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
  61     call_ref(dst0, src, BUF_SIZE);
  62     call_new(dst1, src, BUF_SIZE);
  63     if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
  64         fail();
  65     bench_new(dst1, src, BUF_SIZE);
  66 }
  67
  68 static void test_mul_pair_single(void)
  69 {
  70     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
  71     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
  72     LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
  73     LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
  74
  75     declare_func(void, INTFLOAT (*dst)[2],
  76                        INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
  77
  78     randomize((INTFLOAT *)src0, BUF_SIZE * 2);
  79     randomize(src1, BUF_SIZE);
  80     call_ref(dst0, src0, src1, BUF_SIZE);
  81     call_new(dst1, src0, src1, BUF_SIZE);
  82     if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
  83         fail();
  84     bench_new(dst1, src0, src1, BUF_SIZE);
  85 }
  86
  87 static void test_hybrid_analysis(void)
  88 {
  89     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
  90     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
  91     LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
  92     LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
  93
  94     declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
  95                  const INTFLOAT (*filter)[8][2],
  96                  ptrdiff_t stride, int n);
  97
  98     randomize((INTFLOAT *)in, 13 * 2);
  99     randomize((INTFLOAT *)filter, N * 8 * 2);
 100
 101     randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
 102     memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
 103
 104     call_ref(dst0, in, filter, STRIDE, N);
 105     call_new(dst1, in, filter, STRIDE, N);
 106
 107     if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
 108         fail();
 109     bench_new(dst1, in, filter, STRIDE, N);
 110 }
 111
 112 static void test_hybrid_analysis_ileave(void)
 113 {
 114     LOCAL_ALIGNED_16(INTFLOAT, in,   [2], [38][64]);
 115     LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
 116     LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
 117
 118     declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
 119                        int i, int len);
 120
 121     randomize((INTFLOAT *)out0, 91 * 32 * 2);
 122     randomize((INTFLOAT *)in,    2 * 38 * 64);
 123     memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
 124
 125     /* len is hardcoded to 32 as that's the only value used in
 126        libavcodec. asm functions are likely to be optimized
 127        hardcoding this value in their loops and could fail with
 128        anything else.
 129        i is hardcoded to the two values currently used by the
 130        aac decoder because the arm neon implementation is
 131        micro-optimized for them and will fail for almost every
 132        other value. */
 133     call_ref(out0, in, 3, 32);
 134     call_new(out1, in, 3, 32);
 135
 136     /* the function just moves data around, so memcmp is enough */
 137     if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
 138         fail();
 139
 140     call_ref(out0, in, 5, 32);
 141     call_new(out1, in, 5, 32);
 142
 143     if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
 144         fail();
 145
 146     bench_new(out1, in, 3, 32);
 147 }
 148
 149 static void test_hybrid_synthesis_deint(void)
 150 {
 151     LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
 152     LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
 153     LOCAL_ALIGNED_16(INTFLOAT, in,  [91], [32][2]);
 154
 155     declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
 156                        int i, int len);
 157
 158     randomize((INTFLOAT *)in,  91 * 32 * 2);
 159     randomize((INTFLOAT *)out0, 2 * 38 * 64);
 160     memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
 161
 162     /* len is hardcoded to 32 as that's the only value used in
 163        libavcodec. asm functions are likely to be optimized
 164        hardcoding this value in their loops and could fail with
 165        anything else.
 166        i is hardcoded to the two values currently used by the
 167        aac decoder because the arm neon implementation is
 168        micro-optimized for them and will fail for almost every
 169        other value. */
 170     call_ref(out0, in, 3, 32);
 171     call_new(out1, in, 3, 32);
 172
 173     /* the function just moves data around, so memcmp is enough */
 174     if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
 175         fail();
 176
 177     call_ref(out0, in, 5, 32);
 178     call_new(out1, in, 5, 32);
 179
 180     if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
 181         fail();
 182
 183     bench_new(out1, in, 3, 32);
 184 }
 185
 186 static void test_stereo_interpolate(PSDSPContext *psdsp)
 187 {
 188     int i;
 189     LOCAL_ALIGNED_16(INTFLOAT, l,  [BUF_SIZE], [2]);
 190     LOCAL_ALIGNED_16(INTFLOAT, r,  [BUF_SIZE], [2]);
 191     LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
 192     LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
 193     LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
 194     LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
 195     LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
 196     LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
 197
 198     declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
 199                        INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
 200
 201     randomize((INTFLOAT *)l, BUF_SIZE * 2);
 202     randomize((INTFLOAT *)r, BUF_SIZE * 2);
 203
 204     for (i = 0; i < 2; i++) {
 205         if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
 206             memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
 207             memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
 208             memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
 209             memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
 210
 211             randomize((INTFLOAT *)h, 2 * 4);
 212             randomize((INTFLOAT *)h_step, 2 * 4);
 213             // Clear the least significant 14 bits of h_step, to avoid
 214             // divergence when accumulating h_step BUF_SIZE times into
 215             // a float variable which may or may not have extra intermediate
 216             // precision. Therefore clear roughly log2(BUF_SIZE) less
 217             // significant bits, to get the same result regardless of any
 218             // extra precision in the accumulator.
 219             clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
 220
 221             call_ref(l0, r0, h, h_step, BUF_SIZE);
 222             call_new(l1, r1, h, h_step, BUF_SIZE);
 223             if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
 224                 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
 225                 fail();
 226
 227             memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
 228             memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
 229             bench_new(l1, r1, h, h_step, BUF_SIZE);
 230         }
 231     }
 232 }
 233
 234 void checkasm_check_aacpsdsp(void)
 235 {
 236     PSDSPContext psdsp;
 237
 238     ff_psdsp_init(&psdsp);
 239
 240     if (check_func(psdsp.add_squares, "ps_add_squares"))
 241         test_add_squares();
 242     report("add_squares");
 243
 244     if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
 245         test_mul_pair_single();
 246     report("mul_pair_single");
 247
 248     if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
 249         test_hybrid_analysis();
 250     report("hybrid_analysis");
 251
 252     if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
 253         test_hybrid_analysis_ileave();
 254     report("hybrid_analysis_ileave");
 255
 256     if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
 257         test_hybrid_synthesis_deint();
 258     report("hybrid_synthesis_deint");
 259
 260     test_stereo_interpolate(&psdsp);
 261     report("stereo_interpolate");
 262 }