2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
25 add \rt, r4, r6, lsr #1 @ revtab + n4
26 add r9, r3, r3, lsl #1 @ n3
27 add r8, r7, r6 @ tcos + n4
28 add r3, r2, r6, lsr #1 @ in + n4
29 add r9, r2, r9, lsl #1 @ in + n3
35 vld2.16 {d0,d1}, [r9, :128]!
36 vld2.16 {d2,d3}, [r11,:128], r12
37 vld2.16 {d4,d5}, [r3, :128]!
38 vld2.16 {d6,d7}, [r10,:128], r12
39 vld2.16 {d16,d17},[r7, :128]! @ cos, sin
40 vld2.16 {d18,d19},[r8, :128], r12
48 vhsub.s16 d0, d0, d3 @ re
49 vhsub.s16 d4, d7, d4 @ im
52 vmull.s16 q10, d0, d16
53 vmlsl.s16 q10, d4, d17
54 vmull.s16 q11, d0, d17
55 vmlal.s16 q11, d4, d16
56 vmull.s16 q12, d6, d18
57 vmlsl.s16 q12, d2, d19
58 vmull.s16 q13, d6, d19
59 vmlal.s16 q13, d2, d18
60 vshrn.s32 d0, q10, #15
61 vshrn.s32 d1, q11, #15
62 vshrn.s32 d2, q12, #15
63 vshrn.s32 d3, q13, #15
68 add lr, \dst, lr, lsl #2
69 add r2, \dst, r2, lsl #2
70 vst1.32 {d0[0]}, [lr,:32]
71 vst1.32 {d2[0]}, [r2,:32]
74 add lr, \dst, lr, lsl #2
75 add r2, \dst, r2, lsl #2
76 vst1.32 {d0[1]}, [lr,:32]
77 vst1.32 {d2[1]}, [r2,:32]
80 add lr, \dst, lr, lsl #2
81 add r2, \dst, r2, lsl #2
82 vst1.32 {d1[0]}, [lr,:32]
83 vst1.32 {d3[0]}, [r2,:32]
86 add lr, \dst, lr, lsl #2
87 add r2, \dst, r2, lsl #2
88 vst1.32 {d1[1]}, [lr,:32]
89 vst1.32 {d3[1]}, [r2,:32]
94 function ff_mdct_fixed_calc_neon, export=1
97 ldr r4, [r0, #8] @ revtab
98 ldr r6, [r0, #16] @ mdct_size; n
99 ldr r7, [r0, #24] @ tcos
104 bl X(ff_fft_fixed_calc_neon)
108 ldr r6, [r4, #16] @ mdct_size; n
109 ldr r7, [r4, #24] @ tcos
110 add r5, r5, r6, lsr #1
111 add r7, r7, r6, lsr #1
115 vld2.16 {d4,d5}, [r7,:128]!
116 vld2.16 {d6,d7}, [r2,:128], r12
117 vld2.16 {d0,d1}, [r5,:128]
118 vld2.16 {d2,d3}, [r1,:128]
123 vmull.s16 q11, d2, d6
124 vmlal.s16 q11, d3, d7
129 vmull.s16 q10, d2, d7
130 vmlsl.s16 q10, d3, d6
131 vshrn.s32 d0, q11, #15
132 vshrn.s32 d1, q8, #15
133 vshrn.s32 d2, q9, #15
134 vshrn.s32 d3, q10, #15
136 vst2.16 {d2,d3}, [r5,:128]!
137 vst2.16 {d0,d1}, [r1,:128], r12
144 function ff_mdct_fixed_calcw_neon, export=1
147 ldrd r4, r5, [r0, #8] @ revtab, tmp_buf
148 ldr r6, [r0, #16] @ mdct_size; n
149 ldr r7, [r0, #24] @ tcos
155 bl X(ff_fft_fixed_calc_neon)
159 ldr r6, [r4, #16] @ mdct_size; n
160 ldr r9, [r4, #24] @ tcos
161 add r5, r5, r6, lsr #1
163 add r9, r9, r6, lsr #1
168 vld2.16 {d4,d5}, [r9,:128]!
169 vld2.16 {d6,d7}, [r2,:128], r12
170 vld2.16 {d0,d1}, [r5,:128]!
171 vld2.16 {d2,d3}, [r3,:128], r12
180 vmull.s16 q10, d0, d4
181 vmlal.s16 q10, d1, d5
182 vmull.s16 q11, d2, d7
183 vmlsl.s16 q11, d3, d6
186 vst2.32 {q10,q11},[r7,:128]!
187 vst2.32 {d16,d18},[r1,:128], r12
188 vst2.32 {d17,d19},[r1,:128], r12