2 * ARM NEON optimised Format Conversion Utils
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 function ff_float_to_int16_neon, export=1
30 vld1.64 {d0-d1}, [r1,:128]!
31 vcvt.s32.f32 q8, q0, #16
32 vld1.64 {d2-d3}, [r1,:128]!
33 vcvt.s32.f32 q9, q1, #16
39 vld1.64 {d0-d1}, [r1,:128]!
40 vcvt.s32.f32 q0, q0, #16
42 vld1.64 {d2-d3}, [r1,:128]!
43 vcvt.s32.f32 q1, q1, #16
45 vst1.64 {d4-d5}, [r0,:128]!
47 vld1.64 {d16-d17},[r1,:128]!
48 vcvt.s32.f32 q8, q8, #16
49 vld1.64 {d18-d19},[r1,:128]!
50 vcvt.s32.f32 q9, q9, #16
51 vst1.64 {d6-d7}, [r0,:128]!
55 2: vld1.64 {d0-d1}, [r1,:128]!
57 vcvt.s32.f32 q0, q0, #16
58 vld1.64 {d2-d3}, [r1,:128]!
60 vcvt.s32.f32 q1, q1, #16
62 vst1.64 {d4-d5}, [r0,:128]!
64 vst1.64 {d6-d7}, [r0,:128]!
66 3: vshrn.s32 d4, q8, #16
68 vst1.64 {d4-d5}, [r0,:128]!
72 function ff_float_to_int16_interleave_neon, export=1
76 blt ff_float_to_int16_neon
83 vld1.64 {d0-d1}, [r3,:128]!
84 vcvt.s32.f32 q8, q0, #16
85 vld1.64 {d2-d3}, [r3,:128]!
86 vcvt.s32.f32 q9, q1, #16
87 vld1.64 {d20-d21},[r1,:128]!
88 vcvt.s32.f32 q10, q10, #16
89 vld1.64 {d22-d23},[r1,:128]!
90 vcvt.s32.f32 q11, q11, #16
95 vld1.64 {d0-d1}, [r3,:128]!
96 vcvt.s32.f32 q0, q0, #16
98 vld1.64 {d2-d3}, [r3,:128]!
99 vcvt.s32.f32 q1, q1, #16
100 vld1.64 {d24-d25},[r1,:128]!
101 vcvt.s32.f32 q12, q12, #16
102 vld1.64 {d26-d27},[r1,:128]!
104 vst1.64 {d20-d21},[r0,:128]!
105 vcvt.s32.f32 q13, q13, #16
106 vst1.64 {d22-d23},[r0,:128]!
108 vld1.64 {d16-d17},[r3,:128]!
110 vst1.64 {d24-d25},[r0,:128]!
111 vcvt.s32.f32 q8, q8, #16
112 vld1.64 {d18-d19},[r3,:128]!
113 vcvt.s32.f32 q9, q9, #16
114 vld1.64 {d20-d21},[r1,:128]!
115 vcvt.s32.f32 q10, q10, #16
116 vld1.64 {d22-d23},[r1,:128]!
117 vcvt.s32.f32 q11, q11, #16
118 vst1.64 {d26-d27},[r0,:128]!
122 2: vsri.32 q10, q8, #16
123 vld1.64 {d0-d1}, [r3,:128]!
124 vcvt.s32.f32 q0, q0, #16
125 vld1.64 {d2-d3}, [r3,:128]!
126 vcvt.s32.f32 q1, q1, #16
127 vld1.64 {d24-d25},[r1,:128]!
128 vcvt.s32.f32 q12, q12, #16
130 vld1.64 {d26-d27},[r1,:128]!
131 vcvt.s32.f32 q13, q13, #16
132 vst1.64 {d20-d21},[r0,:128]!
134 vst1.64 {d22-d23},[r0,:128]!
136 vst1.64 {d24-d27},[r0,:128]!
138 3: vsri.32 q10, q8, #16
140 vst1.64 {d20-d23},[r0,:128]!
149 5: ldmia r1!, {r4-r7}
152 vld1.64 {d16-d17},[r4,:128]!
153 vcvt.s32.f32 q8, q8, #16
154 vld1.64 {d18-d19},[r5,:128]!
155 vcvt.s32.f32 q9, q9, #16
156 vld1.64 {d20-d21},[r6,:128]!
157 vcvt.s32.f32 q10, q10, #16
158 vld1.64 {d22-d23},[r7,:128]!
159 vcvt.s32.f32 q11, q11, #16
161 vld1.64 {d0-d1}, [r4,:128]!
162 vcvt.s32.f32 q0, q0, #16
164 vld1.64 {d2-d3}, [r5,:128]!
165 vcvt.s32.f32 q1, q1, #16
166 vsri.32 q11, q10, #16
167 vld1.64 {d4-d5}, [r6,:128]!
168 vcvt.s32.f32 q2, q2, #16
170 vld1.64 {d6-d7}, [r7,:128]!
171 vcvt.s32.f32 q3, q3, #16
173 vst1.64 {d18}, [r8], ip
175 vst1.64 {d22}, [r8], ip
177 vst1.64 {d19}, [r8], ip
179 vst1.64 {d23}, [r8], ip
182 vld1.64 {d16-d17},[r4,:128]!
183 vcvt.s32.f32 q8, q8, #16
184 vst1.64 {d2}, [r8], ip
185 vld1.64 {d18-d19},[r5,:128]!
186 vcvt.s32.f32 q9, q9, #16
187 vst1.64 {d6}, [r8], ip
188 vld1.64 {d20-d21},[r6,:128]!
189 vcvt.s32.f32 q10, q10, #16
190 vst1.64 {d3}, [r8], ip
191 vld1.64 {d22-d23},[r7,:128]!
192 vcvt.s32.f32 q11, q11, #16
193 vst1.64 {d7}, [r8], ip
195 7: vst1.64 {d2}, [r8], ip
196 vst1.64 {d6}, [r8], ip
197 vst1.64 {d3}, [r8], ip
198 vst1.64 {d7}, [r8], ip
213 vld1.64 {d16-d17},[r4,:128]!
214 vcvt.s32.f32 q8, q8, #16
215 vld1.64 {d18-d19},[r5,:128]!
216 vcvt.s32.f32 q9, q9, #16
217 vld1.64 {d20-d21},[r4,:128]!
218 vcvt.s32.f32 q10, q10, #16
219 vld1.64 {d22-d23},[r5,:128]!
220 vcvt.s32.f32 q11, q11, #16
224 vsri.32 d18, d16, #16
225 vsri.32 d19, d17, #16
226 vld1.64 {d16-d17},[r4,:128]!
227 vcvt.s32.f32 q8, q8, #16
228 vst1.32 {d18[0]}, [r8], ip
229 vsri.32 d22, d20, #16
230 vst1.32 {d18[1]}, [r8], ip
231 vsri.32 d23, d21, #16
232 vst1.32 {d19[0]}, [r8], ip
233 vst1.32 {d19[1]}, [r8], ip
234 vld1.64 {d18-d19},[r5,:128]!
235 vcvt.s32.f32 q9, q9, #16
236 vst1.32 {d22[0]}, [r8], ip
237 vst1.32 {d22[1]}, [r8], ip
238 vld1.64 {d20-d21},[r4,:128]!
239 vcvt.s32.f32 q10, q10, #16
240 vst1.32 {d23[0]}, [r8], ip
241 vst1.32 {d23[1]}, [r8], ip
242 vld1.64 {d22-d23},[r5,:128]!
243 vcvt.s32.f32 q11, q11, #16
245 vld1.64 {d0-d1}, [r4,:128]!
246 vcvt.s32.f32 q0, q0, #16
247 vsri.32 d18, d16, #16
248 vld1.64 {d2-d3}, [r5,:128]!
249 vcvt.s32.f32 q1, q1, #16
250 vsri.32 d19, d17, #16
251 vld1.64 {d4-d5}, [r4,:128]!
252 vcvt.s32.f32 q2, q2, #16
253 vld1.64 {d6-d7}, [r5,:128]!
254 vcvt.s32.f32 q3, q3, #16
255 vst1.32 {d18[0]}, [r8], ip
256 vsri.32 d22, d20, #16
257 vst1.32 {d18[1]}, [r8], ip
258 vsri.32 d23, d21, #16
259 vst1.32 {d19[0]}, [r8], ip
261 vst1.32 {d19[1]}, [r8], ip
263 vst1.32 {d22[0]}, [r8], ip
265 vst1.32 {d22[1]}, [r8], ip
267 vst1.32 {d23[0]}, [r8], ip
268 vst1.32 {d23[1]}, [r8], ip
270 vld1.64 {d16-d17},[r4,:128]!
271 vcvt.s32.f32 q8, q8, #16
272 vst1.32 {d2[0]}, [r8], ip
273 vst1.32 {d2[1]}, [r8], ip
274 vld1.64 {d18-d19},[r5,:128]!
275 vcvt.s32.f32 q9, q9, #16
276 vst1.32 {d3[0]}, [r8], ip
277 vst1.32 {d3[1]}, [r8], ip
278 vld1.64 {d20-d21},[r4,:128]!
279 vcvt.s32.f32 q10, q10, #16
280 vst1.32 {d6[0]}, [r8], ip
281 vst1.32 {d6[1]}, [r8], ip
282 vld1.64 {d22-d23},[r5,:128]!
283 vcvt.s32.f32 q11, q11, #16
284 vst1.32 {d7[0]}, [r8], ip
285 vst1.32 {d7[1]}, [r8], ip
287 6: vst1.32 {d2[0]}, [r8], ip
288 vst1.32 {d2[1]}, [r8], ip
289 vst1.32 {d3[0]}, [r8], ip
290 vst1.32 {d3[1]}, [r8], ip
291 vst1.32 {d6[0]}, [r8], ip
292 vst1.32 {d6[1]}, [r8], ip
293 vst1.32 {d7[0]}, [r8], ip
294 vst1.32 {d7[1]}, [r8], ip
296 7: vsri.32 d18, d16, #16
297 vsri.32 d19, d17, #16
298 vst1.32 {d18[0]}, [r8], ip
299 vsri.32 d22, d20, #16
300 vst1.32 {d18[1]}, [r8], ip
301 vsri.32 d23, d21, #16
302 vst1.32 {d19[0]}, [r8], ip
303 vst1.32 {d19[1]}, [r8], ip
304 vst1.32 {d22[0]}, [r8], ip
305 vst1.32 {d22[1]}, [r8], ip
306 vst1.32 {d23[0]}, [r8], ip
307 vst1.32 {d23[1]}, [r8], ip
318 vld1.64 {d0-d1}, [r4,:128]!
319 vcvt.s32.f32 q0, q0, #16
320 vld1.64 {d2-d3}, [r4,:128]!
321 vcvt.s32.f32 q1, q1, #16
324 vld1.64 {d4-d5}, [r4,:128]!
325 vcvt.s32.f32 q2, q2, #16
326 vld1.64 {d6-d7}, [r4,:128]!
327 vcvt.s32.f32 q3, q3, #16
328 vst1.16 {d0[1]}, [r5,:16], ip
329 vst1.16 {d0[3]}, [r5,:16], ip
330 vst1.16 {d1[1]}, [r5,:16], ip
331 vst1.16 {d1[3]}, [r5,:16], ip
332 vst1.16 {d2[1]}, [r5,:16], ip
333 vst1.16 {d2[3]}, [r5,:16], ip
334 vst1.16 {d3[1]}, [r5,:16], ip
335 vst1.16 {d3[3]}, [r5,:16], ip
337 vld1.64 {d0-d1}, [r4,:128]!
338 vcvt.s32.f32 q0, q0, #16
339 vld1.64 {d2-d3}, [r4,:128]!
340 vcvt.s32.f32 q1, q1, #16
341 7: vst1.16 {d4[1]}, [r5,:16], ip
342 vst1.16 {d4[3]}, [r5,:16], ip
343 vst1.16 {d5[1]}, [r5,:16], ip
344 vst1.16 {d5[3]}, [r5,:16], ip
345 vst1.16 {d6[1]}, [r5,:16], ip
346 vst1.16 {d6[3]}, [r5,:16], ip
347 vst1.16 {d7[1]}, [r5,:16], ip
348 vst1.16 {d7[3]}, [r5,:16], ip
352 vst1.16 {d0[1]}, [r5,:16], ip
353 vst1.16 {d0[3]}, [r5,:16], ip
354 vst1.16 {d1[1]}, [r5,:16], ip
355 vst1.16 {d1[3]}, [r5,:16], ip
356 vst1.16 {d2[1]}, [r5,:16], ip
357 vst1.16 {d2[3]}, [r5,:16], ip
358 vst1.16 {d3[1]}, [r5,:16], ip
359 vst1.16 {d3[3]}, [r5,:16], ip
362 vld1.64 {d0-d1}, [r4,:128]!
363 vcvt.s32.f32 q0, q0, #16
364 vld1.64 {d2-d3}, [r4,:128]!
365 vcvt.s32.f32 q1, q1, #16
369 function ff_int32_to_float_fmul_scalar_neon, export=1
370 VFP vdup.32 q0, d0[0]
375 vld1.32 {q1},[r1,:128]!
377 vld1.32 {q2},[r1,:128]!
384 vld1.32 {q1},[r1,:128]!
386 vld1.32 {q2},[r1,:128]!
388 vst1.32 {q9}, [r0,:128]!
389 vst1.32 {q10},[r0,:128]!
391 2: vst1.32 {q9}, [r0,:128]!
392 vst1.32 {q10},[r0,:128]!