X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Farm%2Ffft_neon.S;h=c4d89189ea7906545ea83bd9377b6603a4feee39;hb=78670fbf07bd03479073d4218440de4914304680;hp=6ed5789fb77ba118f5d719dc1fea674f268bf3eb;hpb=648d792042cb6d58d032f3ae2518169d91a87274;p=ffmpeg diff --git a/libavcodec/arm/fft_neon.S b/libavcodec/arm/fft_neon.S index 6ed5789fb77..c4d89189ea7 100644 --- a/libavcodec/arm/fft_neon.S +++ b/libavcodec/arm/fft_neon.S @@ -4,28 +4,30 @@ * Copyright (c) 2009 Mans Rullgard * Copyright (c) 2009 Naotoshi Nojiri * - * This file is part of FFmpeg. + * This algorithm (though not any of the implementation details) is + * based on libdjbfft by D. J. Bernstein. * - * FFmpeg is free software; you can redistribute it and/or + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * FFmpeg is distributed in the hope that it will be useful, + * Libav is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software + * License along with Libav; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "asm.S" +#include "libavutil/arm/asm.S" #define M_SQRT1_2 0.70710678118654752440 - .text function fft4_neon vld1.32 {d0-d3}, [r0,:128] @@ -43,7 +45,7 @@ function fft4_neon vst1.32 {d0-d3}, [r0,:128] bx lr -.endfunc +endfunc function fft8_neon mov r1, r0 @@ -96,7 +98,7 @@ function fft8_neon vst1.32 {d0-d3}, [r0,:128] bx lr -.endfunc +endfunc function fft16_neon movrel r1, mppm @@ -141,7 +143,7 @@ function fft16_neon vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14} vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6} vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a} - movrel r2, ff_cos_16 + movrelx r2, X(ff_cos_16) vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8} vrev64.32 d1, d1 vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a} @@ -198,7 +200,7 @@ function fft16_neon vst2.32 {d26-d27},[r0,:128], r1 vst2.32 {d30-d31},[r0,:128] bx lr -.endfunc +endfunc function fft_pass_neon push {r4-r6,lr} @@ -274,7 +276,7 @@ function fft_pass_neon bne 1b pop {r4-r6,pc} -.endfunc +endfunc .macro def_fft n, n2, n4 .align 6 @@ -288,10 +290,10 @@ function fft\n\()_neon bl fft\n4\()_neon mov r0, r4 pop {r4, lr} - movrel r1, ff_cos_\n + movrelx r1, X(ff_cos_\n) mov r2, #\n4/2 b fft_pass_neon -.endfunc +endfunc .endm def_fft 32, 16, 8 @@ -314,21 +316,23 @@ function ff_fft_calc_neon, export=1 ldr r3, [r3, r2, lsl #2] mov r0, r1 bx r3 -.endfunc +endfunc function ff_fft_permute_neon, export=1 push {r4,lr} mov r12, #1 ldr r2, [r0] @ nbits - ldr r3, [r0, #20] @ tmp_buf + ldr r3, [r0, #12] @ tmp_buf ldr r0, [r0, #8] @ revtab lsl r12, r12, r2 mov r2, r12 1: vld1.32 {d0-d1}, [r1,:128]! ldr r4, [r0], #4 - uxtah lr, r3, r4 - uxtah r4, r3, r4, ror #16 + uxth lr, r4 + uxth r4, r4, ror #16 + add lr, r3, lr, lsl #3 + add r4, r3, r4, lsl #3 vst1.32 {d0}, [lr,:64] vst1.32 {d1}, [r4,:64] subs r12, r12, #2 @@ -342,11 +346,9 @@ function ff_fft_permute_neon, export=1 bgt 1b pop {r4,pc} -.endfunc +endfunc - .section .rodata - .align 4 -fft_tab_neon: +const fft_tab_neon .word fft4_neon .word fft8_neon .word fft16_neon @@ -362,8 +364,12 @@ fft_tab_neon: .word fft16384_neon .word fft32768_neon .word fft65536_neon - .size fft_tab_neon, . - fft_tab_neon +endconst + +const pmmp, align=4 + .float +1.0, -1.0, -1.0, +1.0 +endconst - .align 4 -pmmp: .float +1.0, -1.0, -1.0, +1.0 -mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2 +const mppm, align=4 + .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2 +endconst