2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "libavcodec/aacpsdsp.h"
25 #define BUF_SIZE (N * STRIDE)
27 #define randomize(buf, len) do { \
29 for (i = 0; i < len; i++) { \
30 const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
37 static void test_add_squares(void)
39 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
40 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
41 LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
43 declare_func(void, INTFLOAT *dst,
44 const INTFLOAT (*src)[2], int n);
46 randomize((INTFLOAT *)src, BUF_SIZE * 2);
47 randomize(dst0, BUF_SIZE);
48 memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
49 call_ref(dst0, src, BUF_SIZE);
50 call_new(dst1, src, BUF_SIZE);
51 if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
53 bench_new(dst1, src, BUF_SIZE);
56 static void test_mul_pair_single(void)
58 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
59 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
60 LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
61 LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
63 declare_func(void, INTFLOAT (*dst)[2],
64 INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
66 randomize((INTFLOAT *)src0, BUF_SIZE * 2);
67 randomize(src1, BUF_SIZE);
68 call_ref(dst0, src0, src1, BUF_SIZE);
69 call_new(dst1, src0, src1, BUF_SIZE);
70 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
72 bench_new(dst1, src0, src1, BUF_SIZE);
75 static void test_hybrid_analysis(void)
77 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
78 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
79 LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
80 LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
82 declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
83 const INTFLOAT (*filter)[8][2],
84 ptrdiff_t stride, int n);
86 randomize((INTFLOAT *)in, 13 * 2);
87 randomize((INTFLOAT *)filter, N * 8 * 2);
89 randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
90 memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
92 call_ref(dst0, in, filter, STRIDE, N);
93 call_new(dst1, in, filter, STRIDE, N);
95 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
97 bench_new(dst1, in, filter, STRIDE, N);
100 static void test_hybrid_analysis_ileave(void)
102 LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
103 LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
104 LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
106 declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
109 randomize((INTFLOAT *)out0, 91 * 32 * 2);
110 randomize((INTFLOAT *)in, 2 * 38 * 64);
111 memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
113 /* len is hardcoded to 32 as that's the only value used in
114 libavcodec. asm functions are likely to be optimized
115 hardcoding this value in their loops and could fail with
117 i is hardcoded to the two values currently used by the
118 aac decoder because the arm neon implementation is
119 micro-optimized for them and will fail for almost every
121 call_ref(out0, in, 3, 32);
122 call_new(out1, in, 3, 32);
124 /* the function just moves data around, so memcmp is enough */
125 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
128 call_ref(out0, in, 5, 32);
129 call_new(out1, in, 5, 32);
131 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
134 bench_new(out1, in, 3, 32);
137 static void test_hybrid_synthesis_deint(void)
139 LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
140 LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
141 LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
143 declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
146 randomize((INTFLOAT *)in, 91 * 32 * 2);
147 randomize((INTFLOAT *)out0, 2 * 38 * 64);
148 memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
150 /* len is hardcoded to 32 as that's the only value used in
151 libavcodec. asm functions are likely to be optimized
152 hardcoding this value in their loops and could fail with
154 i is hardcoded to the two values currently used by the
155 aac decoder because the arm neon implementation is
156 micro-optimized for them and will fail for almost every
158 call_ref(out0, in, 3, 32);
159 call_new(out1, in, 3, 32);
161 /* the function just moves data around, so memcmp is enough */
162 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
165 call_ref(out0, in, 5, 32);
166 call_new(out1, in, 5, 32);
168 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
171 bench_new(out1, in, 3, 32);
174 static void test_stereo_interpolate(PSDSPContext *psdsp)
177 LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
178 LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]);
179 LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
180 LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
181 LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
182 LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
183 LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
184 LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
186 declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
187 INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
189 randomize((INTFLOAT *)l, BUF_SIZE * 2);
190 randomize((INTFLOAT *)r, BUF_SIZE * 2);
192 for (i = 0; i < 2; i++) {
193 if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
194 memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
195 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
196 memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
197 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
199 randomize((INTFLOAT *)h, 2 * 4);
200 randomize((INTFLOAT *)h_step, 2 * 4);
202 call_ref(l0, r0, h, h_step, BUF_SIZE);
203 call_new(l1, r1, h, h_step, BUF_SIZE);
204 if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
205 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
208 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
209 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210 bench_new(l1, r1, h, h_step, BUF_SIZE);
215 void checkasm_check_aacpsdsp(void)
219 ff_psdsp_init(&psdsp);
221 if (check_func(psdsp.add_squares, "ps_add_squares"))
223 report("add_squares");
225 if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
226 test_mul_pair_single();
227 report("mul_pair_single");
229 if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
230 test_hybrid_analysis();
231 report("hybrid_analysis");
233 if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
234 test_hybrid_analysis_ileave();
235 report("hybrid_analysis_ileave");
237 if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
238 test_hybrid_synthesis_deint();
239 report("hybrid_synthesis_deint");
241 test_stereo_interpolate(&psdsp);
242 report("stereo_interpolate");