2 * Copyright (c) 2012 Mans Rullgard
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 function ff_sbr_sum64x5_neon, export=1
31 vld1.32 {q0}, [r0,:128]
32 vld1.32 {q1}, [r1,:128]!
34 vld1.32 {q2}, [r2,:128]!
36 vld1.32 {q3}, [r3,:128]!
38 vld1.32 {q8}, [lr,:128]!
40 vst1.32 {q0}, [r0,:128]!
46 function ff_sbr_sum_square_neon, export=1
49 vld1.32 {q1}, [r0,:128]!
55 NOVFP vmov.32 r0, d0[0]
59 function ff_sbr_neg_odd_64_neon, export=1
62 vld2.32 {q0,q1}, [r0,:128]!
64 vld2.32 {q2,q3}, [r0,:128]!
66 vst2.32 {q0,q1}, [r1,:128]!
68 vld2.32 {q0,q1}, [r0,:128]!
69 vst2.32 {q2,q3}, [r1,:128]!
71 vld2.32 {q2,q3}, [r0,:128]!
74 vst2.32 {q0,q1}, [r1,:128]!
75 vst2.32 {q2,q3}, [r1,:128]!
79 function ff_sbr_qmf_pre_shuffle_neon, export=1
82 vld1.32 {d0}, [r0,:64]!
83 vst1.32 {d0}, [r2,:64]!
87 vld1.32 {q0}, [r1,:128], r3
88 vld1.32 {d2}, [r0,:64]!
90 vld1.32 {d3,d4}, [r0,:128]!
92 vld1.32 {q9}, [r1,:128], r3
94 vld1.32 {d5,d6}, [r0,:128]!
97 vst2.32 {q0,q1}, [r2,:64]!
102 vld1.32 {q0}, [r1,:128], r3
103 vst2.32 {q9,q10}, [r2,:64]!
106 vld1.32 {d3,d4}, [r0,:128]!
108 vld1.32 {q9}, [r1,:128], r3
110 vld1.32 {d5}, [r0,:64]!
113 vst2.32 {q0,q1}, [r2,:64]!
116 vst2.32 {d3,d5}, [r2,:64]!
117 vst2.32 {d2[0],d4[0]}, [r2,:64]!
121 function ff_sbr_qmf_post_shuffle_neon, export=1
126 vld1.32 {q0}, [r2,:128], r3
127 vld1.32 {q1}, [r1,:128]!
133 vld1.32 {q2}, [r2,:128], r3
134 vld1.32 {q3}, [r1,:128]!
135 vst2.32 {d1,d3}, [r0,:128]!
136 vst2.32 {d0,d2}, [r0,:128]!
141 vld1.32 {q0}, [r2,:128], r3
142 vld1.32 {q1}, [r1,:128]!
143 vst2.32 {d5,d7}, [r0,:128]!
144 vst2.32 {d4,d6}, [r0,:128]!
150 function ff_sbr_qmf_deint_neg_neon, export=1
157 vld2.32 {d0,d1}, [r1,:128], r3
160 vst1.32 {d0}, [r2,:64]
161 vst1.32 {d1}, [r0,:64]!
168 function ff_sbr_qmf_deint_bfly_neon, export=1
175 vld1.32 {q0}, [r1,:128]!
176 vld1.32 {q1}, [r2,:128], lr
183 vst1.32 {q1}, [r3,:128], lr
184 vst1.32 {q0}, [r0,:128]!
190 function ff_sbr_hf_g_filt_neon, export=1
192 add r1, r1, r12, lsl #3
195 vld2.32 {d2[],d3[]},[r2,:64]!
196 vld1.32 {d0}, [r1,:64], r12
198 vld1.32 {d1}, [r1,:64], r12
200 vld2.32 {d2[],d3[]},[r2,:64]!
201 vld1.32 {d0}, [r1,:64], r12
202 vst1.32 {q3}, [r0,:64]!
208 vst1.32 {d0}, [r0,:64]!
212 function ff_sbr_hf_gen_neon, export=1
213 NOVFP vld1.32 {d1[]}, [sp,:32]
214 VFP vdup.32 d1, d0[0]
216 vld1.32 {d3}, [r2,:64]
217 vld1.32 {d2}, [r3,:64]
219 ldrd r2, r3, [sp, #4*!HAVE_VFP_ARGS]
223 add r0, r0, r2, lsl #3
224 add r1, r1, r2, lsl #3
227 vld1.32 {q1}, [r1,:128]!
229 vld1.32 {q3}, [r1,:128]!
234 vmla.f32 q3, q1, d0[0]
236 vmla.f32 d7, d20, d18
237 vmla.f32 d6, d3, d0[1]
238 vmla.f32 d7, d16, d0[1]
242 vst1.32 {q3}, [r0,:128]!
248 function ff_sbr_autocorrelate_neon, export=1
249 vld1.32 {q0}, [r0,:128]!
258 vld1.32 {q2}, [r0,:128]!
272 vld1.32 {q2}, [r0,:128]!
278 vadd.f32 d20, d20, d21
283 vmla.f32 d0, d16, d17
284 vmla.f32 d1, d16, d18
293 vmla.f32 d20, d22, d22
296 vpadd.f32 d20, d20, d21
297 vst1.32 {q3}, [r1,:128]!
298 vst1.32 {d20[1]}, [r1,:32]
300 vst1.32 {d0}, [r1,:64]
302 vst1.32 {d20[0]}, [r1,:32]
306 function ff_sbr_hf_apply_noise_0_neon, export=1
311 movrel r4, X(ff_sbr_noise_table)
316 add lr, r4, r3, lsl #3
317 vld2.32 {q0}, [r0,:64]
318 vld2.32 {q3}, [lr,:64]
319 vld1.32 {d2}, [r1,:64]!
320 vld1.32 {d18}, [r2,:64]!
331 vst2.32 {q0}, [r0,:64]!
335 add lr, r4, r3, lsl #3
336 vld1.32 {d0}, [r0,:64]
337 vld1.32 {d6}, [lr,:64]
338 vld1.32 {d2[]}, [r1,:32]!
339 vld1.32 {d3[]}, [r2,:32]!
346 vst1.32 {d0}, [r0,:64]!
351 function ff_sbr_hf_apply_noise_1_neon, export=1
359 movrel r4, X(ff_sbr_noise_table)
364 add lr, r4, r3, lsl #3
365 vld2.32 {q0}, [r0,:64]
366 vld2.32 {q3}, [lr,:64]
367 vld1.32 {d2}, [r1,:64]!
368 vld1.32 {d18}, [r2,:64]!
379 vst2.32 {q0}, [r0,:64]!
383 add lr, r4, r3, lsl #3
384 vld1.32 {d0}, [r0,:64]
385 vld1.32 {d6}, [lr,:64]
386 vld1.32 {d2[]}, [r1,:32]!
387 vld1.32 {d18[]}, [r2,:32]!
394 vst1.32 {d0}, [r0,:64]!
399 function ff_sbr_hf_apply_noise_2_neon, export=1
404 function ff_sbr_hf_apply_noise_3_neon, export=1