2 * Copyright (c) 2013 RISC OS Open Ltd
3 * Author: Ben Avison <bavison@riscosopen.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/arm/asm.S"
37 .macro prerotation_innerloop
39 .set trig_hi, n4 - k - 2
40 .set in_lo, trig_lo * 2
41 .set in_hi, trig_hi * 2
42 vldr d8, [TCOS, #trig_lo*4] @ s16,s17
43 vldr d9, [TCOS, #trig_hi*4] @ s18,s19
44 vldr s0, [IN, #in_hi*4 + 12]
45 vldr s1, [IN, #in_hi*4 + 4]
46 vldr s2, [IN, #in_lo*4 + 12]
47 vldr s3, [IN, #in_lo*4 + 4]
48 vmul.f s8, s0, s16 @ vector operation
49 vldr d10, [TSIN, #trig_lo*4] @ s20,s21
50 vldr d11, [TSIN, #trig_hi*4] @ s22,s23
51 vldr s4, [IN, #in_lo*4]
52 vldr s5, [IN, #in_lo*4 + 8]
53 vldr s6, [IN, #in_hi*4]
54 vldr s7, [IN, #in_hi*4 + 8]
55 ldr J0, [REVTAB, #trig_lo*2]
56 vmul.f s12, s0, s20 @ vector operation
57 ldr J2, [REVTAB, #trig_hi*2]
59 and J0, J0, #255 @ halfword value will be < n4
60 vmls.f s8, s4, s20 @ vector operation
62 and J2, J2, #255 @ halfword value will be < n4
63 add J0, OUT, J0, lsl #3
64 vmla.f s12, s4, s16 @ vector operation
65 add J1, OUT, J1, lsl #3
66 add J2, OUT, J2, lsl #3
67 add J3, OUT, J3, lsl #3
79 .macro postrotation_innerloop tail, head
80 .set trig_lo_head, n8 - k - 2
81 .set trig_hi_head, n8 + k
82 .set out_lo_head, trig_lo_head * 2
83 .set out_hi_head, trig_hi_head * 2
84 .set trig_lo_tail, n8 - (k - 2) - 2
85 .set trig_hi_tail, n8 + (k - 2)
86 .set out_lo_tail, trig_lo_tail * 2
87 .set out_hi_tail, trig_hi_tail * 2
89 TCOS_D0_HEAD .req d10 @ s20,s21
90 TCOS_D1_HEAD .req d11 @ s22,s23
93 TCOS_D0_HEAD .req d12 @ s24,s25
94 TCOS_D1_HEAD .req d13 @ s26,s27
98 vmls.f s8, s0, TCOS_S0_TAIL @ vector operation
101 vldr d8, [TSIN, #trig_lo_head*4] @ s16,s17
102 vldr d9, [TSIN, #trig_hi_head*4] @ s18,s19
103 vldr TCOS_D0_HEAD, [TCOS, #trig_lo_head*4]
106 vmla.f s12, s4, TCOS_S0_TAIL @ vector operation
109 vldr s0, [OUT, #out_lo_head*4]
110 vldr s1, [OUT, #out_lo_head*4 + 8]
111 vldr s2, [OUT, #out_hi_head*4]
112 vldr s3, [OUT, #out_hi_head*4 + 8]
113 vldr s4, [OUT, #out_lo_head*4 + 4]
114 vldr s5, [OUT, #out_lo_head*4 + 12]
115 vldr s6, [OUT, #out_hi_head*4 + 4]
116 vldr s7, [OUT, #out_hi_head*4 + 12]
119 vstr s8, [OUT, #out_lo_tail*4]
120 vstr s9, [OUT, #out_lo_tail*4 + 8]
121 vstr s10, [OUT, #out_hi_tail*4]
122 vstr s11, [OUT, #out_hi_tail*4 + 8]
125 vmul.f s8, s4, s16 @ vector operation
128 vstr s12, [OUT, #out_hi_tail*4 + 12]
129 vstr s13, [OUT, #out_hi_tail*4 + 4]
130 vstr s14, [OUT, #out_lo_tail*4 + 12]
131 vstr s15, [OUT, #out_lo_tail*4 + 4]
134 vmul.f s12, s0, s16 @ vector operation
135 vldr TCOS_D1_HEAD, [TCOS, #trig_hi_head*4]
146 /* void ff_imdct_half_vfp(FFTContext *s,
148 * const FFTSample *input)
150 function ff_imdct_half_vfp, export=1
151 ldr ip, [CONTEXT, #5*4] @ mdct_bits
154 bne X(ff_imdct_half_c) @ only case currently accelerated is the one used by DCA
164 ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
167 ldr REVTAB, [CONTEXT, #2*4]
168 ldr TCOS, [CONTEXT, #6*4]
169 ldr TSIN, [CONTEXT, #7*4]
173 prerotation_innerloop
179 ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
183 postrotation_innerloop , head
185 postrotation_innerloop tail, head
187 postrotation_innerloop tail