2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
3 * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "asm-offsets.h"
24 #include "libavutil/aarch64/asm.S"
27 ld1 {v29.4s}, [x9], x15
28 ld1 {v28.4s}, [x8], x15
29 ld1 {v30.4s}, [x10], x15
30 ld1 {v31.4s}, [x11], x15
32 ld1 {v24.4s}, [x4], x15
33 ld1 {v25.4s}, [x5], x15
35 ld1 {v26.4s}, [x6], x15
36 fmla v5.4s, v25.4s, v29.4s
37 ld1 {v27.4s}, [x7], x15
38 ext v28.16b, v28.16b, v28.16b, #8
39 ext v31.16b, v31.16b, v31.16b, #8
40 fmla v6.4s, v26.4s, v30.4s
41 fmls v4.4s, v24.4s, v28.4s
42 fmla v7.4s, v27.4s, v31.4s
45 function ff_synth_filter_float_neon, export=1
46 ldr w7, [x2] // *synth_buf_offset
47 ldr x9, [x0, #IMDCT_HALF] // imdct_half function pointer
49 stp x3, x4, [sp, #-64]!
50 add x1, x1, x7, lsl #2 // synth_buf
55 stp x7, x30, [sp, #32]
63 ldp x2, x4, [sp] // synct_buf_2, window
64 ldp x13, x9, [sp, #16] // out, synth_buf
65 ldp x0, x30, [sp, #32] // *synth_buf_offset
68 add x3, x2, #16*4 // synct_buf_2 + 16
69 add x14, x13, #16*4 // out + 16
74 add x10, x9, #16*4 // synth_buf
76 add x5, x4, #16*4 // window
80 ld1 {v4.4s}, [x2] // a
81 ld1 {v5.4s}, [x3] // b
103 fmul v4.4s, v4.4s, v0.s[0]
104 fmul v5.4s, v5.4s, v0.s[0]
105 st1 {v6.4s}, [x2], #16
106 st1 {v7.4s}, [x3], #16
107 st1 {v4.4s}, [x13], #16
108 st1 {v5.4s}, [x14], #16
111 sub x4, x4, #508*4 // window
112 add x9, x9, #4*4 // synth_buf
113 sub x8, x8, #4*4 // synth_buf