* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2009 Naotoshi Nojiri
*
- * This file is part of FFmpeg.
+ * This algorithm (though not any of the implementation details) is
+ * based on libdjbfft by D. J. Bernstein.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "asm.S"
+#include "libavutil/arm/asm.S"
#define M_SQRT1_2 0.70710678118654752440
- .text
function fft4_neon
vld1.32 {d0-d3}, [r0,:128]
vst1.32 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
function fft8_neon
mov r1, r0
vst1.32 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
function fft16_neon
movrel r1, mppm
vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14}
vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6}
vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a}
- movrel r2, ff_cos_16
+ movrelx r2, X(ff_cos_16)
vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8}
vrev64.32 d1, d1
vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a}
vst2.32 {d26-d27},[r0,:128], r1
vst2.32 {d30-d31},[r0,:128]
bx lr
-.endfunc
+endfunc
function fft_pass_neon
push {r4-r6,lr}
bne 1b
pop {r4-r6,pc}
-.endfunc
+endfunc
.macro def_fft n, n2, n4
.align 6
bl fft\n4\()_neon
mov r0, r4
pop {r4, lr}
- movrel r1, ff_cos_\n
+ movrelx r1, X(ff_cos_\n)
mov r2, #\n4/2
b fft_pass_neon
-.endfunc
+endfunc
.endm
def_fft 32, 16, 8
ldr r3, [r3, r2, lsl #2]
mov r0, r1
bx r3
-.endfunc
+endfunc
function ff_fft_permute_neon, export=1
push {r4,lr}
mov r12, #1
ldr r2, [r0] @ nbits
- ldr r3, [r0, #20] @ tmp_buf
+ ldr r3, [r0, #12] @ tmp_buf
ldr r0, [r0, #8] @ revtab
lsl r12, r12, r2
mov r2, r12
1:
vld1.32 {d0-d1}, [r1,:128]!
ldr r4, [r0], #4
- uxtah lr, r3, r4
- uxtah r4, r3, r4, ror #16
+ uxth lr, r4
+ uxth r4, r4, ror #16
+ add lr, r3, lr, lsl #3
+ add r4, r3, r4, lsl #3
vst1.32 {d0}, [lr,:64]
vst1.32 {d1}, [r4,:64]
subs r12, r12, #2
bgt 1b
pop {r4,pc}
-.endfunc
+endfunc
- .section .rodata
- .align 4
-fft_tab_neon:
+const fft_tab_neon
.word fft4_neon
.word fft8_neon
.word fft16_neon
.word fft16384_neon
.word fft32768_neon
.word fft65536_neon
- .size fft_tab_neon, . - fft_tab_neon
+endconst
+
+const pmmp, align=4
+ .float +1.0, -1.0, -1.0, +1.0
+endconst
- .align 4
-pmmp: .float +1.0, -1.0, -1.0, +1.0
-mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+const mppm, align=4
+ .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+endconst