2 * Copyright (c) 2014 Peter Meerwald <pmeerw@pmeerw.net>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
23 #include "asm-offsets.h"
25 .macro resample_one fmt, es=2
26 function ff_resample_one_\fmt\()_neon, export=1
28 add r1, r1, r2, lsl #\es
30 ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
31 ldr ip, [sp, #8] /* index */
32 ldr r5, [r0, #FILTER_LENGTH]
33 and r2, ip, r2 /* (index & phase_mask) */
34 ldr r4, [r0, #PHASE_SHIFT]
35 lsr r4, ip, r4 /* compute sample_index */
38 ldr ip, [r0, #FILTER_BANK]
39 add r3, r3, r4, lsl #\es /* &src[sample_index] */
42 add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
66 4: /* remaining filter_length 1 to 7 */
73 2: /* remaining filter_length 1 to 3 */
80 1: /* remaining filter_length 1 */
104 vld1.32 {d0[0]}, [r0]! /* load filter */
105 vld1.32 {d4[0]}, [r3]! /* load src */
108 vld1.32 {d0}, [r0]! /* load filter */
109 vld1.32 {d4}, [r3]! /* load src */
112 vld1.32 {d0,d1}, [r0]! /* load filter */
113 vld1.32 {d4,d5}, [r3]! /* load src */
116 vmla.f32 d16, d0, d4[0]
133 vpadd.f32 d16, d16, d17
134 vpadd.f32 d16, d16, d16
144 vld1.32 {d0[0]}, [r0]! /* load filter */
145 vld1.32 {d4[0]}, [r3]! /* load src */
148 vld1.32 {d0}, [r0]! /* load filter */
149 vld1.32 {d4}, [r3]! /* load src */
152 vld1.32 {d0,d1}, [r0]! /* load filter */
153 vld1.32 {d4,d5}, [r3]! /* load src */
156 vmlal.s32 q8, d0, d4[0]
175 vadd.s64 d16, d16, d17
176 vqrshrn.s64 d16, q8, #30
186 vld1.16 {d0[0]}, [r0]! /* load filter */
187 vld1.16 {d4[0]}, [r3]! /* load src */
191 vld1.32 {d0[0]}, [r0]! /* load filter */
193 vld1.32 {d4[0]}, [r3]! /* load src */
196 vld1.16 {d0}, [r0]! /* load filter */
197 vld1.16 {d4}, [r3]! /* load src */
200 vmlal.s16 q8, d0, d4[0]
215 vpadd.s32 d16, d16, d17
216 vpadd.s32 d16, d16, d16
217 vqrshrn.s32 d16, q8, #15
224 .macro resample_linear fmt, es=2
225 function ff_resample_linear_\fmt\()_neon, export=1
227 add r1, r1, r2, lsl #\es
229 ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
230 ldr ip, [sp, #8] /* index */
231 ldr r5, [r0, #FILTER_LENGTH]
232 and r2, ip, r2 /* (index & phase_mask) */
233 ldr r4, [r0, #PHASE_SHIFT]
234 lsr r4, ip, r4 /* compute sample_index */
237 ldr ip, [r0, #FILTER_BANK]
238 add r3, r3, r4, lsl #\es /* &src[sample_index] */
241 ldr r4, [r0, #SRC_INCR]
242 add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
243 add r2, r0, r5, lsl #\es /* filter[... + c->filter_length] */
267 4: /* remaining filter_length 1 to 7 */
274 2: /* remaining filter_length 1 to 3 */
281 1: /* remaining filter_length 1 */
306 vld1.32 {d0[0]}, [r0]! /* load filter */
307 vld1.32 {d2[0]}, [r2]! /* load filter */
308 vld1.32 {d4[0]}, [r3]! /* load src */
311 vld1.32 {d0}, [r0]! /* load filter */
312 vld1.32 {d2}, [r2]! /* load filter */
313 vld1.32 {d4}, [r3]! /* load src */
316 vld1.32 {d0,d1}, [r0]! /* load filter */
317 vld1.32 {d2,d3}, [r2]! /* load filter */
318 vld1.32 {d4,d5}, [r3]! /* load src */
321 vmla.f32 d18, d0, d4[0]
322 vmla.f32 d16, d2, d4[0]
341 vldr s0, [sp, #12] /* frac */
345 vsub.f32 q8, q8, q9 /* v2 - val */
346 vpadd.f32 d18, d18, d19
347 vpadd.f32 d16, d16, d17
348 vpadd.f32 d2, d18, d18
349 vpadd.f32 d1, d16, d16
351 vmul.f32 s2, s2, s0 /* (v2 - val) * frac */
352 vdiv.f32 s2, s2, s1 /* / c->src_incr */
358 resample_linear flt, 2