2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
26 #define BUF_SIZE (N * STRIDE)
28 #define randomize(buf, len) do { \
30 for (i = 0; i < len; i++) { \
31 const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
38 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
41 for (i = 0; i < len; i++) {
42 union av_intfloat32 u = { .f = buf[i] };
43 u.i &= (0xffffffff << bits);
48 static void test_add_squares(void)
50 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
51 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
52 LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
54 declare_func(void, INTFLOAT *dst,
55 const INTFLOAT (*src)[2], int n);
57 randomize((INTFLOAT *)src, BUF_SIZE * 2);
58 randomize(dst0, BUF_SIZE);
59 memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
60 call_ref(dst0, src, BUF_SIZE);
61 call_new(dst1, src, BUF_SIZE);
62 if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
64 bench_new(dst1, src, BUF_SIZE);
67 static void test_mul_pair_single(void)
69 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
70 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
71 LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
72 LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
74 declare_func(void, INTFLOAT (*dst)[2],
75 INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
77 randomize((INTFLOAT *)src0, BUF_SIZE * 2);
78 randomize(src1, BUF_SIZE);
79 call_ref(dst0, src0, src1, BUF_SIZE);
80 call_new(dst1, src0, src1, BUF_SIZE);
81 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
83 bench_new(dst1, src0, src1, BUF_SIZE);
86 static void test_hybrid_analysis(void)
88 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
89 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
90 LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
91 LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
93 declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
94 const INTFLOAT (*filter)[8][2],
95 ptrdiff_t stride, int n);
97 randomize((INTFLOAT *)in, 13 * 2);
98 randomize((INTFLOAT *)filter, N * 8 * 2);
100 randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
101 memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
103 call_ref(dst0, in, filter, STRIDE, N);
104 call_new(dst1, in, filter, STRIDE, N);
106 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
108 bench_new(dst1, in, filter, STRIDE, N);
111 static void test_hybrid_analysis_ileave(void)
113 LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
114 LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
115 LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
117 declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
120 randomize((INTFLOAT *)out0, 91 * 32 * 2);
121 randomize((INTFLOAT *)in, 2 * 38 * 64);
122 memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
124 /* len is hardcoded to 32 as that's the only value used in
125 libavcodec. asm functions are likely to be optimized
126 hardcoding this value in their loops and could fail with
128 i is hardcoded to the two values currently used by the
129 aac decoder because the arm neon implementation is
130 micro-optimized for them and will fail for almost every
132 call_ref(out0, in, 3, 32);
133 call_new(out1, in, 3, 32);
135 /* the function just moves data around, so memcmp is enough */
136 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
139 call_ref(out0, in, 5, 32);
140 call_new(out1, in, 5, 32);
142 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
145 bench_new(out1, in, 3, 32);
148 static void test_hybrid_synthesis_deint(void)
150 LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
151 LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
152 LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
154 declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
157 randomize((INTFLOAT *)in, 91 * 32 * 2);
158 randomize((INTFLOAT *)out0, 2 * 38 * 64);
159 memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
161 /* len is hardcoded to 32 as that's the only value used in
162 libavcodec. asm functions are likely to be optimized
163 hardcoding this value in their loops and could fail with
165 i is hardcoded to the two values currently used by the
166 aac decoder because the arm neon implementation is
167 micro-optimized for them and will fail for almost every
169 call_ref(out0, in, 3, 32);
170 call_new(out1, in, 3, 32);
172 /* the function just moves data around, so memcmp is enough */
173 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
176 call_ref(out0, in, 5, 32);
177 call_new(out1, in, 5, 32);
179 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
182 bench_new(out1, in, 3, 32);
185 static void test_stereo_interpolate(PSDSPContext *psdsp)
188 LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
189 LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]);
190 LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
191 LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
192 LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
193 LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
194 LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
195 LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
197 declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
198 INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
200 randomize((INTFLOAT *)l, BUF_SIZE * 2);
201 randomize((INTFLOAT *)r, BUF_SIZE * 2);
203 for (i = 0; i < 2; i++) {
204 if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
205 memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
206 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207 memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
208 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210 randomize((INTFLOAT *)h, 2 * 4);
211 randomize((INTFLOAT *)h_step, 2 * 4);
212 // Clear the least significant 14 bits of h_step, to avoid
213 // divergence when accumulating h_step BUF_SIZE times into
214 // a float variable which may or may not have extra intermediate
215 // precision. Therefore clear roughly log2(BUF_SIZE) less
216 // significant bits, to get the same result regardless of any
217 // extra precision in the accumulator.
218 clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
220 call_ref(l0, r0, h, h_step, BUF_SIZE);
221 call_new(l1, r1, h, h_step, BUF_SIZE);
222 if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
223 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
226 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
227 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
228 bench_new(l1, r1, h, h_step, BUF_SIZE);
233 void checkasm_check_aacpsdsp(void)
237 ff_psdsp_init(&psdsp);
239 if (check_func(psdsp.add_squares, "ps_add_squares"))
241 report("add_squares");
243 if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
244 test_mul_pair_single();
245 report("mul_pair_single");
247 if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
248 test_hybrid_analysis();
249 report("hybrid_analysis");
251 if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
252 test_hybrid_analysis_ileave();
253 report("hybrid_analysis_ileave");
255 if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
256 test_hybrid_synthesis_deint();
257 report("hybrid_synthesis_deint");
259 test_stereo_interpolate(&psdsp);
260 report("stereo_interpolate");