* ARM NEON optimised MDCT
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "asm.S"
- .fpu neon
+ preserve8
+
.text
#define ff_fft_calc_neon X(ff_fft_calc_neon)
push {r4-r8,lr}
mov r12, #1
- ldr lr, [r0, #28] @ mdct_bits
- ldr r4, [r0, #32] @ tcos
+ ldr lr, [r0, #20] @ mdct_bits
+ ldr r4, [r0, #24] @ tcos
ldr r3, [r0, #8] @ revtab
lsl r12, r12, lr @ n = 1 << nbits
lsr lr, r12, #2 @ n4 = n >> 2
bl ff_fft_calc_neon
mov r12, #1
- ldr lr, [r4, #28] @ mdct_bits
- ldr r4, [r4, #32] @ tcos
+ ldr lr, [r4, #20] @ mdct_bits
+ ldr r4, [r4, #24] @ tcos
lsl r12, r12, lr @ n = 1 << nbits
lsr lr, r12, #3 @ n8 = n >> 3
vst2.32 {d5,d7}, [r8,:128]
pop {r4-r8,pc}
-.endfunc
+endfunc
function ff_imdct_calc_neon, export=1
push {r4-r6,lr}
- ldr r3, [r0, #28]
+ ldr r3, [r0, #20]
mov r4, #1
mov r5, r1
lsl r4, r4, r3
bgt 1b
pop {r4-r6,pc}
-.endfunc
+endfunc
function ff_mdct_calc_neon, export=1
push {r4-r10,lr}
mov r12, #1
- ldr lr, [r0, #28] @ mdct_bits
- ldr r4, [r0, #32] @ tcos
+ ldr lr, [r0, #20] @ mdct_bits
+ ldr r4, [r0, #24] @ tcos
ldr r3, [r0, #8] @ revtab
lsl lr, r12, lr @ n = 1 << nbits
add r7, r2, lr @ in4u
vadd.f32 d17, d17, d3 @ in2u+in1d -I
1:
vmul.f32 d7, d0, d21 @ I*s
- ldr r10, [r3, lr, lsr #1]
+A ldr r10, [r3, lr, lsr #1]
+T lsr r10, lr, #1
+T ldr r10, [r3, r10]
vmul.f32 d6, d1, d20 @ -R*c
ldr r6, [r3, #4]!
vmul.f32 d4, d1, d21 @ -R*s
bl ff_fft_calc_neon
mov r12, #1
- ldr lr, [r4, #28] @ mdct_bits
- ldr r4, [r4, #32] @ tcos
+ ldr lr, [r4, #20] @ mdct_bits
+ ldr r4, [r4, #24] @ tcos
lsl r12, r12, lr @ n = 1 << nbits
lsr lr, r12, #3 @ n8 = n >> 3
vst2.32 {d5,d7}, [r8,:128]
pop {r4-r10,pc}
-.endfunc
+endfunc