2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
21 #include "libavutil/mem_internal.h"
27 #define BUF_SIZE (N * STRIDE)
29 #define randomize(buf, len) do { \
31 for (i = 0; i < len; i++) { \
32 const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
39 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
42 for (i = 0; i < len; i++) {
43 union av_intfloat32 u = { .f = buf[i] };
44 u.i &= (0xffffffff << bits);
49 static void test_add_squares(void)
51 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
52 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
53 LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
55 declare_func(void, INTFLOAT *dst,
56 const INTFLOAT (*src)[2], int n);
58 randomize((INTFLOAT *)src, BUF_SIZE * 2);
59 randomize(dst0, BUF_SIZE);
60 memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
61 call_ref(dst0, src, BUF_SIZE);
62 call_new(dst1, src, BUF_SIZE);
63 if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
65 bench_new(dst1, src, BUF_SIZE);
68 static void test_mul_pair_single(void)
70 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
71 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
72 LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
73 LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
75 declare_func(void, INTFLOAT (*dst)[2],
76 INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
78 randomize((INTFLOAT *)src0, BUF_SIZE * 2);
79 randomize(src1, BUF_SIZE);
80 call_ref(dst0, src0, src1, BUF_SIZE);
81 call_new(dst1, src0, src1, BUF_SIZE);
82 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
84 bench_new(dst1, src0, src1, BUF_SIZE);
87 static void test_hybrid_analysis(void)
89 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
90 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
91 LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
92 LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
94 declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
95 const INTFLOAT (*filter)[8][2],
96 ptrdiff_t stride, int n);
98 randomize((INTFLOAT *)in, 13 * 2);
99 randomize((INTFLOAT *)filter, N * 8 * 2);
101 randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
102 memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
104 call_ref(dst0, in, filter, STRIDE, N);
105 call_new(dst1, in, filter, STRIDE, N);
107 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
109 bench_new(dst1, in, filter, STRIDE, N);
112 static void test_hybrid_analysis_ileave(void)
114 LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
115 LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
116 LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
118 declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
121 randomize((INTFLOAT *)out0, 91 * 32 * 2);
122 randomize((INTFLOAT *)in, 2 * 38 * 64);
123 memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
125 /* len is hardcoded to 32 as that's the only value used in
126 libavcodec. asm functions are likely to be optimized
127 hardcoding this value in their loops and could fail with
129 i is hardcoded to the two values currently used by the
130 aac decoder because the arm neon implementation is
131 micro-optimized for them and will fail for almost every
133 call_ref(out0, in, 3, 32);
134 call_new(out1, in, 3, 32);
136 /* the function just moves data around, so memcmp is enough */
137 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
140 call_ref(out0, in, 5, 32);
141 call_new(out1, in, 5, 32);
143 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
146 bench_new(out1, in, 3, 32);
149 static void test_hybrid_synthesis_deint(void)
151 LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
152 LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
153 LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
155 declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
158 randomize((INTFLOAT *)in, 91 * 32 * 2);
159 randomize((INTFLOAT *)out0, 2 * 38 * 64);
160 memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
162 /* len is hardcoded to 32 as that's the only value used in
163 libavcodec. asm functions are likely to be optimized
164 hardcoding this value in their loops and could fail with
166 i is hardcoded to the two values currently used by the
167 aac decoder because the arm neon implementation is
168 micro-optimized for them and will fail for almost every
170 call_ref(out0, in, 3, 32);
171 call_new(out1, in, 3, 32);
173 /* the function just moves data around, so memcmp is enough */
174 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
177 call_ref(out0, in, 5, 32);
178 call_new(out1, in, 5, 32);
180 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
183 bench_new(out1, in, 3, 32);
186 static void test_stereo_interpolate(PSDSPContext *psdsp)
189 LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
190 LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]);
191 LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
192 LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
193 LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
194 LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
195 LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
196 LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
198 declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
199 INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
201 randomize((INTFLOAT *)l, BUF_SIZE * 2);
202 randomize((INTFLOAT *)r, BUF_SIZE * 2);
204 for (i = 0; i < 2; i++) {
205 if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
206 memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
208 memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
211 randomize((INTFLOAT *)h, 2 * 4);
212 randomize((INTFLOAT *)h_step, 2 * 4);
213 // Clear the least significant 14 bits of h_step, to avoid
214 // divergence when accumulating h_step BUF_SIZE times into
215 // a float variable which may or may not have extra intermediate
216 // precision. Therefore clear roughly log2(BUF_SIZE) less
217 // significant bits, to get the same result regardless of any
218 // extra precision in the accumulator.
219 clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
221 call_ref(l0, r0, h, h_step, BUF_SIZE);
222 call_new(l1, r1, h, h_step, BUF_SIZE);
223 if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
224 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
227 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
228 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
229 bench_new(l1, r1, h, h_step, BUF_SIZE);
234 void checkasm_check_aacpsdsp(void)
238 ff_psdsp_init(&psdsp);
240 if (check_func(psdsp.add_squares, "ps_add_squares"))
242 report("add_squares");
244 if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
245 test_mul_pair_single();
246 report("mul_pair_single");
248 if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
249 test_hybrid_analysis();
250 report("hybrid_analysis");
252 if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
253 test_hybrid_analysis_ileave();
254 report("hybrid_analysis_ileave");
256 if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
257 test_hybrid_synthesis_deint();
258 report("hybrid_synthesis_deint");
260 test_stereo_interpolate(&psdsp);
261 report("stereo_interpolate");