2 * Bluetooth low-complexity, subband codec (SBC)
4 * Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org>
5 * Copyright (C) 2008-2010 Nokia Corporation
6 * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
7 * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
8 * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
32 #include "libavutil/arm/asm.S"
34 function ff_sbc_analyze_4_armv6, export=1
35 @ r0 = in, r1 = out, r2 = consts
40 ldrd r8, r9, [r0, #16]
41 ldrd r10, r11, [r2, #16]
44 smlad r12, r5, r7, r14
45 ldrd r4, r5, [r0, #32]
46 ldrd r6, r7, [r2, #32]
48 smlad r12, r9, r11, r12
49 ldrd r8, r9, [r0, #48]
50 ldrd r10, r11, [r2, #48]
52 smlad r12, r5, r7, r12
53 ldrd r4, r5, [r0, #64]
54 ldrd r6, r7, [r2, #64]
56 smlad r12, r9, r11, r12
58 ldrd r10, r11, [r2, #8]
59 smlad r3, r4, r6, r3 @ t1[0] is done
60 smlad r12, r5, r7, r12 @ t1[1] is done
61 ldrd r4, r5, [r0, #24]
62 ldrd r6, r7, [r2, #24]
63 pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1]
64 smlad r12, r8, r10, r14
65 smlad r14, r9, r11, r14
66 ldrd r8, r9, [r0, #40]
67 ldrd r10, r11, [r2, #40]
68 smlad r12, r4, r6, r12
69 smlad r14, r5, r7, r14
70 ldrd r4, r5, [r0, #56]
71 ldrd r6, r7, [r2, #56]
72 smlad r12, r8, r10, r12
73 smlad r14, r9, r11, r14
74 ldrd r8, r9, [r0, #72]
75 ldrd r10, r11, [r2, #72]
76 smlad r12, r4, r6, r12
77 smlad r14, r5, r7, r14
78 ldrd r4, r5, [r2, #80] @ start loading cos table
79 smlad r12, r8, r10, r12 @ t1[2] is done
80 smlad r14, r9, r11, r14 @ t1[3] is done
81 ldrd r6, r7, [r2, #88]
82 ldrd r8, r9, [r2, #96]
83 ldrd r10, r11, [r2, #104] @ cos table fully loaded
84 pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3]
91 smlad r6, r12, r10, r6
92 smlad r7, r12, r11, r7
94 stmia r1, {r4, r5, r6, r7}
98 function ff_sbc_analyze_8_armv6, export=1
99 @ r0 = in, r1 = out, r2 = consts
102 ldrd r4, r5, [r0, #24]
103 ldrd r6, r7, [r2, #24]
104 ldrd r8, r9, [r0, #56]
105 ldrd r10, r11, [r2, #56]
107 smlad r3, r4, r6, r14
108 smlad r12, r5, r7, r14
109 ldrd r4, r5, [r0, #88]
110 ldrd r6, r7, [r2, #88]
111 smlad r3, r8, r10, r3
112 smlad r12, r9, r11, r12
113 ldrd r8, r9, [r0, #120]
114 ldrd r10, r11, [r2, #120]
116 smlad r12, r5, r7, r12
117 ldrd r4, r5, [r0, #152]
118 ldrd r6, r7, [r2, #152]
119 smlad r3, r8, r10, r3
120 smlad r12, r9, r11, r12
121 ldrd r8, r9, [r0, #16]
122 ldrd r10, r11, [r2, #16]
123 smlad r3, r4, r6, r3 @ t1[6] is done
124 smlad r12, r5, r7, r12 @ t1[7] is done
125 ldrd r4, r5, [r0, #48]
126 ldrd r6, r7, [r2, #48]
127 pkhtb r3, r12, r3, asr #16 @ combine t1[6] and t1[7]
128 str r3, [sp, #-4]! @ save to stack
129 smlad r3, r8, r10, r14
130 smlad r12, r9, r11, r14
131 ldrd r8, r9, [r0, #80]
132 ldrd r10, r11, [r2, #80]
134 smlad r12, r5, r7, r12
135 ldrd r4, r5, [r0, #112]
136 ldrd r6, r7, [r2, #112]
137 smlad r3, r8, r10, r3
138 smlad r12, r9, r11, r12
139 ldrd r8, r9, [r0, #144]
140 ldrd r10, r11, [r2, #144]
142 smlad r12, r5, r7, r12
143 ldrd r4, r5, [r0, #0]
144 ldrd r6, r7, [r2, #0]
145 smlad r3, r8, r10, r3 @ t1[4] is done
146 smlad r12, r9, r11, r12 @ t1[5] is done
147 ldrd r8, r9, [r0, #32]
148 ldrd r10, r11, [r2, #32]
149 pkhtb r3, r12, r3, asr #16 @ combine t1[4] and t1[5]
150 str r3, [sp, #-4]! @ save to stack
151 smlad r3, r4, r6, r14
152 smlad r12, r5, r7, r14
153 ldrd r4, r5, [r0, #64]
154 ldrd r6, r7, [r2, #64]
155 smlad r3, r8, r10, r3
156 smlad r12, r9, r11, r12
157 ldrd r8, r9, [r0, #96]
158 ldrd r10, r11, [r2, #96]
160 smlad r12, r5, r7, r12
161 ldrd r4, r5, [r0, #128]
162 ldrd r6, r7, [r2, #128]
163 smlad r3, r8, r10, r3
164 smlad r12, r9, r11, r12
165 ldrd r8, r9, [r0, #8]
166 ldrd r10, r11, [r2, #8]
167 smlad r3, r4, r6, r3 @ t1[0] is done
168 smlad r12, r5, r7, r12 @ t1[1] is done
169 ldrd r4, r5, [r0, #40]
170 ldrd r6, r7, [r2, #40]
171 pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1]
172 smlad r12, r8, r10, r14
173 smlad r14, r9, r11, r14
174 ldrd r8, r9, [r0, #72]
175 ldrd r10, r11, [r2, #72]
176 smlad r12, r4, r6, r12
177 smlad r14, r5, r7, r14
178 ldrd r4, r5, [r0, #104]
179 ldrd r6, r7, [r2, #104]
180 smlad r12, r8, r10, r12
181 smlad r14, r9, r11, r14
182 ldrd r8, r9, [r0, #136]
183 ldrd r10, r11, [r2, #136]!
184 smlad r12, r4, r6, r12
185 smlad r14, r5, r7, r14
186 ldrd r4, r5, [r2, #(160 - 136 + 0)]
187 smlad r12, r8, r10, r12 @ t1[2] is done
188 smlad r14, r9, r11, r14 @ t1[3] is done
189 ldrd r6, r7, [r2, #(160 - 136 + 8)]
192 pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3]
195 pop {r0, r14} @ t2[4:5], t2[6:7]
196 ldrd r8, r9, [r2, #(160 - 136 + 32)]
199 ldrd r10, r11, [r2, #(160 - 136 + 40)]
200 smlad r4, r12, r8, r4
201 smlad r5, r12, r9, r5
202 ldrd r8, r9, [r2, #(160 - 136 + 64)]
203 smlad r6, r12, r10, r6
204 smlad r7, r12, r11, r7
205 ldrd r10, r11, [r2, #(160 - 136 + 72)]
208 ldrd r8, r9, [r2, #(160 - 136 + 96)]
209 smlad r6, r0, r10, r6
210 smlad r7, r0, r11, r7
211 ldrd r10, r11, [r2, #(160 - 136 + 104)]
212 smlad r4, r14, r8, r4
213 smlad r5, r14, r9, r5
214 ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]
215 smlad r6, r14, r10, r6
216 smlad r7, r14, r11, r7
217 ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]
221 ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]
225 ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]
226 smlad r4, r12, r8, r4
227 smlad r5, r12, r9, r5
228 ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]
229 smlad r6, r12, r10, r6
230 smlad r7, r12, r11, r7
231 ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]
234 ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]
235 smlad r6, r0, r10, r6
236 smlad r7, r0, r11, r7
237 ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]
238 smlad r4, r14, r8, r4
239 smlad r5, r14, r9, r5
240 smlad r6, r14, r10, r6
241 smlad r7, r14, r11, r7
243 stmia r1!, {r4, r5, r6, r7}