2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
23 function ff_ac3_max_msb_abs_int16_neon, export=1
26 1: vld1.16 {q1}, [r0,:128]!
28 vld1.16 {q3}, [r0,:128]!
42 function ff_ac3_exponent_min_neon, export=1
49 vld1.8 {q0}, [r0,:128]
52 2: vld1.8 {q1}, [r3,:128], r12
57 vst1.8 {q0}, [r0,:128]!
62 function ff_ac3_lshift_int16_neon, export=1
64 1: vld1.16 {q1}, [r0,:128]
66 vst1.16 {q1}, [r0,:128]!
72 function ff_ac3_rshift_int32_neon, export=1
75 1: vld1.32 {q1}, [r0,:128]
77 vst1.32 {q1}, [r0,:128]!
83 function ff_float_to_fixed24_neon, export=1
84 1: vld1.32 {q0-q1}, [r1,:128]!
85 vcvt.s32.f32 q0, q0, #24
86 vld1.32 {q2-q3}, [r1,:128]!
87 vcvt.s32.f32 q1, q1, #24
88 vcvt.s32.f32 q2, q2, #24
89 vst1.32 {q0-q1}, [r0,:128]!
90 vcvt.s32.f32 q3, q3, #24
91 vst1.32 {q2-q3}, [r0,:128]!
97 function ff_ac3_extract_exponents_neon, export=1
100 vld1.32 {q0}, [r1,:128]!
106 vst1.32 {d6[0]}, [r0,:32]!
112 function ff_apply_window_int16_neon, export=1
114 add r4, r1, r3, lsl #1
115 add lr, r0, r3, lsl #1
120 vld1.16 {q0}, [r1,:128]!
121 vld1.16 {q2}, [r2,:128]!
122 vld1.16 {q1}, [r4,:128], r12
124 vqrdmulh.s16 q0, q0, q2
125 vqrdmulh.s16 d2, d2, d7
126 vqrdmulh.s16 d3, d3, d6
127 vst1.16 {q0}, [r0,:128]!
128 vst1.16 {q1}, [lr,:128], r12
135 function ff_ac3_sum_square_butterfly_int32_neon, export=1
143 vadd.s32 d18, d16, d17
144 vsub.s32 d19, d16, d17
145 vmlal.s32 q0, d16, d16
146 vmlal.s32 q1, d17, d17
147 vmlal.s32 q2, d18, d18
148 vmlal.s32 q3, d19, d19
155 vst1.64 {q0-q1}, [r0]
159 function ff_ac3_sum_square_butterfly_float_neon, export=1
165 vadd.f32 d18, d16, d17
166 vsub.f32 d19, d16, d17
167 vmla.f32 d0, d16, d16
168 vmla.f32 d1, d17, d17
169 vmla.f32 d2, d18, d18
170 vmla.f32 d3, d19, d19