2 * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/aarch64/asm.S"
22 #include "asm-offsets.h"
24 .macro resample_one fmt, es=2
26 .macro M_MUL2 x:vararg
28 .macro M_MLA2 x:vararg
31 function ff_resample_one_\fmt\()_neon, export=1
33 ldr x9, [x0, #FILTER_BANK]
34 ldr w6, [x0, #FILTER_LENGTH]
35 ldp w7, w8, [x0, #PHASE_SHIFT] // and phase_mask
36 lsr x10, x4, x7 // sample_index
38 lsl x11, x6, #\es // filter_length * elem_size
39 add x3, x3, x10, lsl #\es // src[sample_index]
40 madd x9, x11, x4, x9 // filter
43 8: // remaining filter_length at least 16
45 LOAD8 v4, v5, v6, v7, x3
46 LOAD8 v16, v17, v18, v19, x9
50 LOAD8 v20, v21, v22, v23, x3
53 LOAD8 v24, v25, v26, v27, x9
54 M_MLA v0, v20, v24, v1
58 M_MLA v0, v21, v25, v1
62 LOAD8 v4, v5, v6, v7, x3
63 LOAD8 v16, v17, v18, v19, x9
68 M_MLA v0, v21, v25, v1
70 STORE_ONE 0, x1, x2, v1
75 4: // remaining filter_length 1-15
85 2: // remaining filter_length 1-3
93 1: // remaining filter_length 1
98 STORE_ONE 0, x1, x2, v1
114 .macro LOAD1 d1, addr
115 ldr d\d1, [\addr], #8
117 .macro LOAD2 d1, addr
118 ld1 {v\d1\().2d}, [\addr], #16
120 .macro LOAD4 d1, d2, addr
121 ld1 {\d1\().2d,\d2\().2d}, [\addr], #32
123 .macro LOAD8 d1, d2, d3, d4, addr
124 ld1 {\d1\().2d,\d2\().2d,\d3\().2d,\d4\().2d}, [\addr], #64
126 .macro M_MLA d, r0, r1, d2:vararg
127 fmla \d\().2d, \r0\().2d, \r1\().2d
129 .macro M_MLA2 second:vararg
132 .macro M_MUL d, r0, r1, d2:vararg
133 fmul \d\().2d, \r0\().2d, \r1\().2d
135 .macro M_MUL2 second:vararg
138 .macro STORE_ONE rn, addr, idx, d2
139 fadd v\rn\().2d, v\rn\().2d, \d2\().2d
140 faddp d\rn\(), v\rn\().2d
141 str d\rn\(), [\addr, \idx, lsl #3]
147 .macro LOAD1 d1, addr
148 ldr s\d1, [\addr], #4
150 .macro LOAD2 d1, addr
151 ld1 {v\d1\().2s}, [\addr], #8
153 .macro LOAD4 d1, d2, addr
154 ld1 {\d1\().4s}, [\addr], #16
156 .macro LOAD8 d1, d2, d3, d4, addr
157 ld1 {\d1\().4s,\d2\().4s}, [\addr], #32
159 .macro M_MLA d, r0, r1, d2:vararg
160 fmla \d\().4s, \r0\().4s, \r1\().4s
162 .macro M_MUL d, r0, r1, d2:vararg
163 fmul \d\().4s, \r0\().4s, \r1\().4s
165 .macro STORE_ONE rn, addr, idx, d2
166 faddp v\rn\().4s, v\rn\().4s, v\rn\().4s
167 faddp s\rn\(), v\rn\().2s
168 str s\rn\(), [\addr, \idx, lsl #2]
174 .macro LOAD1 d1, addr
175 ldr h\d1, [\addr], #2
177 .macro LOAD2 d1, addr
178 ldr s\d1, [\addr], #4
180 .macro LOAD4 d1, d2, addr
181 ld1 {\d1\().4h}, [\addr], #8
183 .macro LOAD8 d1, d2, d3, d4, addr
184 ld1 {\d1\().4h,\d2\().4h}, [\addr], #16
186 .macro M_MLA d, r0, r1, d2:vararg
187 smlal \d\().4s, \r0\().4h, \r1\().4h
189 .macro M_MUL d, r0, r1, d2:vararg
190 smull \d\().4s, \r0\().4h, \r1\().4h
192 .macro STORE_ONE rn, addr, idx, d2
193 addp v\rn\().4s, v\rn\().4s, v\rn\().4s
194 addp v\rn\().4s, v\rn\().4s, v\rn\().4s
195 sqrshrn v\rn\().4h, v\rn\().4s, #15
196 str h\rn\(), [\addr, \idx, lsl #1]
202 .macro LOAD1 d1, addr
203 ldr s\d1, [\addr], #4
205 .macro LOAD2 d1, addr
206 ld1 {v\d1\().2s}, [\addr], #8
208 .macro LOAD4 d1, d2, addr
209 ld1 {\d1\().4s}, [\addr], #16
211 .macro LOAD8 d1, d2, d3, d4, addr
212 ld1 {\d1\().4s,\d2\().4s}, [\addr], #32
214 .macro M_MLA d1, r0, r1, d2:vararg
215 smlal \d1\().2d, \r0\().2s, \r1\().2s
217 smlal2 \d2\().2d, \r0\().4s, \r1\().4s
220 .macro M_MUL d1, r0, r1, d2:vararg
221 smull \d1\().2d, \r0\().2s, \r1\().2s
223 smull2 \d2\().2d, \r0\().4s, \r1\().4s
226 .macro STORE_ONE rn, addr, idx, d2
227 add v\rn\().2d, v\rn\().2d, \d2\().2d
228 addp d\rn\(), v\rn\().2d
229 sqrshrn v\rn\().2s, v\rn\().2d, #30
230 str s\rn\(), [\addr, \idx, lsl #2]