2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
27 add \rt, r4, r6, lsr #1 @ revtab + n4
28 add r9, r3, r3, lsl #1 @ n3
29 add r8, r7, r6 @ tcos + n4
30 add r3, r2, r6, lsr #1 @ in + n4
31 add r9, r2, r9, lsl #1 @ in + n3
37 vld2.16 {d0,d1}, [r9, :128]!
38 vld2.16 {d2,d3}, [r11,:128], r12
39 vld2.16 {d4,d5}, [r3, :128]!
40 vld2.16 {d6,d7}, [r10,:128], r12
41 vld2.16 {d16,d17},[r7, :128]! @ cos, sin
42 vld2.16 {d18,d19},[r8, :128], r12
50 vhsub.s16 d0, d0, d3 @ re
51 vhsub.s16 d4, d7, d4 @ im
54 vmull.s16 q10, d0, d16
55 vmlsl.s16 q10, d4, d17
56 vmull.s16 q11, d0, d17
57 vmlal.s16 q11, d4, d16
58 vmull.s16 q12, d6, d18
59 vmlsl.s16 q12, d2, d19
60 vmull.s16 q13, d6, d19
61 vmlal.s16 q13, d2, d18
62 vshrn.s32 d0, q10, #15
63 vshrn.s32 d1, q11, #15
64 vshrn.s32 d2, q12, #15
65 vshrn.s32 d3, q13, #15
70 add lr, \dst, lr, lsl #2
71 add r2, \dst, r2, lsl #2
72 vst1.32 {d0[0]}, [lr,:32]
73 vst1.32 {d2[0]}, [r2,:32]
76 add lr, \dst, lr, lsl #2
77 add r2, \dst, r2, lsl #2
78 vst1.32 {d0[1]}, [lr,:32]
79 vst1.32 {d2[1]}, [r2,:32]
82 add lr, \dst, lr, lsl #2
83 add r2, \dst, r2, lsl #2
84 vst1.32 {d1[0]}, [lr,:32]
85 vst1.32 {d3[0]}, [r2,:32]
88 add lr, \dst, lr, lsl #2
89 add r2, \dst, r2, lsl #2
90 vst1.32 {d1[1]}, [lr,:32]
91 vst1.32 {d3[1]}, [r2,:32]
96 function ff_mdct_fixed_calc_neon, export=1
99 ldr r4, [r0, #8] @ revtab
100 ldr r6, [r0, #16] @ mdct_size; n
101 ldr r7, [r0, #24] @ tcos
106 bl X(ff_fft_fixed_calc_neon)
110 ldr r6, [r4, #16] @ mdct_size; n
111 ldr r7, [r4, #24] @ tcos
112 add r5, r5, r6, lsr #1
113 add r7, r7, r6, lsr #1
117 vld2.16 {d4,d5}, [r7,:128]!
118 vld2.16 {d6,d7}, [r2,:128], r12
119 vld2.16 {d0,d1}, [r5,:128]
120 vld2.16 {d2,d3}, [r1,:128]
125 vmull.s16 q11, d2, d6
126 vmlal.s16 q11, d3, d7
131 vmull.s16 q10, d2, d7
132 vmlsl.s16 q10, d3, d6
133 vshrn.s32 d0, q11, #15
134 vshrn.s32 d1, q8, #15
135 vshrn.s32 d2, q9, #15
136 vshrn.s32 d3, q10, #15
138 vst2.16 {d2,d3}, [r5,:128]!
139 vst2.16 {d0,d1}, [r1,:128], r12
146 function ff_mdct_fixed_calcw_neon, export=1
149 ldrd r4, r5, [r0, #8] @ revtab, tmp_buf
150 ldr r6, [r0, #16] @ mdct_size; n
151 ldr r7, [r0, #24] @ tcos
157 bl X(ff_fft_fixed_calc_neon)
161 ldr r6, [r4, #16] @ mdct_size; n
162 ldr r9, [r4, #24] @ tcos
163 add r5, r5, r6, lsr #1
165 add r9, r9, r6, lsr #1
170 vld2.16 {d4,d5}, [r9,:128]!
171 vld2.16 {d6,d7}, [r2,:128], r12
172 vld2.16 {d0,d1}, [r5,:128]!
173 vld2.16 {d2,d3}, [r3,:128], r12
182 vmull.s16 q10, d0, d4
183 vmlal.s16 q10, d1, d5
184 vmull.s16 q11, d2, d7
185 vmlsl.s16 q11, d3, d6
188 vst2.32 {q10,q11},[r7,:128]!
189 vst2.32 {d16,d18},[r1,:128], r12
190 vst2.32 {d17,d19},[r1,:128], r12