]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/sbcdsp_armv6.S
avcodec: Remove redundant freeing of extradata of encoders
[ffmpeg] / libavcodec / arm / sbcdsp_armv6.S
1 /*
2  * Bluetooth low-complexity, subband codec (SBC)
3  *
4  * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
5  * Copyright (C) 2008-2010  Nokia Corporation
6  * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
7  * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
8  * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26
27 /**
28  * @file
29  * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
30  */
31
32 #include "libavutil/arm/asm.S"
33
34 function ff_sbc_analyze_4_armv6, export=1
35         @ r0 = in, r1 = out, r2 = consts
36         push            {r1, r3-r7, lr}
37         push            {r8-r12, r14}
38         ldrd            r4,  r5,  [r0, #0]
39         ldrd            r6,  r7,  [r2, #0]
40         ldrd            r8,  r9,  [r0, #16]
41         ldrd            r10, r11, [r2, #16]
42         mov             r14, #0x8000
43         smlad           r3,  r4,  r6,  r14
44         smlad           r12, r5,  r7,  r14
45         ldrd            r4,  r5,  [r0, #32]
46         ldrd            r6,  r7,  [r2, #32]
47         smlad           r3,  r8,  r10, r3
48         smlad           r12, r9,  r11, r12
49         ldrd            r8,  r9,  [r0, #48]
50         ldrd            r10, r11, [r2, #48]
51         smlad           r3,  r4,  r6,  r3
52         smlad           r12, r5,  r7,  r12
53         ldrd            r4,  r5,  [r0, #64]
54         ldrd            r6,  r7,  [r2, #64]
55         smlad           r3,  r8,  r10, r3
56         smlad           r12, r9,  r11, r12
57         ldrd            r8,  r9,  [r0, #8]
58         ldrd            r10, r11, [r2, #8]
59         smlad           r3,  r4,  r6,  r3        @ t1[0] is done
60         smlad           r12, r5,  r7,  r12       @ t1[1] is done
61         ldrd            r4,  r5,  [r0, #24]
62         ldrd            r6,  r7,  [r2, #24]
63         pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
64         smlad           r12, r8,  r10, r14
65         smlad           r14, r9,  r11, r14
66         ldrd            r8,  r9,  [r0, #40]
67         ldrd            r10, r11, [r2, #40]
68         smlad           r12, r4,  r6,  r12
69         smlad           r14, r5,  r7,  r14
70         ldrd            r4,  r5,  [r0, #56]
71         ldrd            r6,  r7,  [r2, #56]
72         smlad           r12, r8,  r10, r12
73         smlad           r14, r9,  r11, r14
74         ldrd            r8,  r9,  [r0, #72]
75         ldrd            r10, r11, [r2, #72]
76         smlad           r12, r4,  r6,  r12
77         smlad           r14, r5,  r7,  r14
78         ldrd            r4,  r5,  [r2, #80]      @ start loading cos table
79         smlad           r12, r8,  r10, r12       @ t1[2] is done
80         smlad           r14, r9,  r11, r14       @ t1[3] is done
81         ldrd            r6,  r7,  [r2, #88]
82         ldrd            r8,  r9,  [r2, #96]
83         ldrd            r10, r11, [r2, #104]     @ cos table fully loaded
84         pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
85         smuad           r4,  r3,  r4
86         smuad           r5,  r3,  r5
87         smlad           r4,  r12, r8,  r4
88         smlad           r5,  r12, r9,  r5
89         smuad           r6,  r3,  r6
90         smuad           r7,  r3,  r7
91         smlad           r6,  r12, r10, r6
92         smlad           r7,  r12, r11, r7
93         pop             {r8-r12, r14}
94         stmia           r1, {r4, r5, r6, r7}
95         pop             {r1, r3-r7, pc}
96 endfunc
97
98 function ff_sbc_analyze_8_armv6, export=1
99         @ r0 = in, r1 = out, r2 = consts
100         push            {r1, r3-r7, lr}
101         push            {r8-r12, r14}
102         ldrd            r4,  r5,  [r0, #24]
103         ldrd            r6,  r7,  [r2, #24]
104         ldrd            r8,  r9,  [r0, #56]
105         ldrd            r10, r11, [r2, #56]
106         mov             r14, #0x8000
107         smlad           r3,  r4,  r6,  r14
108         smlad           r12, r5,  r7,  r14
109         ldrd            r4,  r5,  [r0, #88]
110         ldrd            r6,  r7,  [r2, #88]
111         smlad           r3,  r8,  r10, r3
112         smlad           r12, r9,  r11, r12
113         ldrd            r8,  r9,  [r0, #120]
114         ldrd            r10, r11, [r2, #120]
115         smlad           r3,  r4,  r6,  r3
116         smlad           r12, r5,  r7,  r12
117         ldrd            r4,  r5,  [r0, #152]
118         ldrd            r6,  r7,  [r2, #152]
119         smlad           r3,  r8,  r10, r3
120         smlad           r12, r9,  r11, r12
121         ldrd            r8,  r9,  [r0, #16]
122         ldrd            r10, r11, [r2, #16]
123         smlad           r3,  r4,  r6,  r3        @ t1[6] is done
124         smlad           r12, r5,  r7,  r12       @ t1[7] is done
125         ldrd            r4,  r5,  [r0, #48]
126         ldrd            r6,  r7,  [r2, #48]
127         pkhtb           r3,  r12, r3, asr #16    @ combine t1[6] and t1[7]
128         str             r3,  [sp, #-4]!          @ save to stack
129         smlad           r3,  r8,  r10, r14
130         smlad           r12, r9,  r11, r14
131         ldrd            r8,  r9,  [r0, #80]
132         ldrd            r10, r11, [r2, #80]
133         smlad           r3,  r4,  r6,  r3
134         smlad           r12, r5,  r7,  r12
135         ldrd            r4,  r5,  [r0, #112]
136         ldrd            r6,  r7,  [r2, #112]
137         smlad           r3,  r8,  r10, r3
138         smlad           r12, r9,  r11, r12
139         ldrd            r8,  r9,  [r0, #144]
140         ldrd            r10, r11, [r2, #144]
141         smlad           r3,  r4,  r6,  r3
142         smlad           r12, r5,  r7,  r12
143         ldrd            r4,  r5,  [r0, #0]
144         ldrd            r6,  r7,  [r2, #0]
145         smlad           r3,  r8,  r10, r3        @ t1[4] is done
146         smlad           r12, r9,  r11, r12       @ t1[5] is done
147         ldrd            r8,  r9,  [r0, #32]
148         ldrd            r10, r11, [r2, #32]
149         pkhtb           r3,  r12, r3, asr #16    @ combine t1[4] and t1[5]
150         str             r3,  [sp, #-4]!          @ save to stack
151         smlad           r3,  r4,  r6,  r14
152         smlad           r12, r5,  r7,  r14
153         ldrd            r4,  r5,  [r0, #64]
154         ldrd            r6,  r7,  [r2, #64]
155         smlad           r3,  r8,  r10, r3
156         smlad           r12, r9,  r11, r12
157         ldrd            r8,  r9,  [r0, #96]
158         ldrd            r10, r11, [r2, #96]
159         smlad           r3,  r4,  r6,  r3
160         smlad           r12, r5,  r7,  r12
161         ldrd            r4,  r5,  [r0, #128]
162         ldrd            r6,  r7,  [r2, #128]
163         smlad           r3,  r8,  r10, r3
164         smlad           r12, r9,  r11, r12
165         ldrd            r8,  r9,  [r0, #8]
166         ldrd            r10, r11, [r2, #8]
167         smlad           r3,  r4,  r6,  r3        @ t1[0] is done
168         smlad           r12, r5,  r7,  r12       @ t1[1] is done
169         ldrd            r4,  r5,  [r0, #40]
170         ldrd            r6,  r7,  [r2, #40]
171         pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
172         smlad           r12, r8,  r10, r14
173         smlad           r14, r9,  r11, r14
174         ldrd            r8,  r9,  [r0, #72]
175         ldrd            r10, r11, [r2, #72]
176         smlad           r12, r4,  r6,  r12
177         smlad           r14, r5,  r7,  r14
178         ldrd            r4,  r5,  [r0, #104]
179         ldrd            r6,  r7,  [r2, #104]
180         smlad           r12, r8,  r10, r12
181         smlad           r14, r9,  r11, r14
182         ldrd            r8,  r9,  [r0, #136]
183         ldrd            r10, r11, [r2, #136]!
184         smlad           r12, r4,  r6,  r12
185         smlad           r14, r5,  r7,  r14
186         ldrd            r4,  r5,  [r2, #(160 - 136 + 0)]
187         smlad           r12, r8,  r10, r12       @ t1[2] is done
188         smlad           r14, r9,  r11, r14       @ t1[3] is done
189         ldrd            r6,  r7,  [r2, #(160 - 136 + 8)]
190         smuad           r4,  r3,  r4
191         smuad           r5,  r3,  r5
192         pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
193                                                  @ r3  = t2[0:1]
194                                                  @ r12 = t2[2:3]
195         pop             {r0, r14}                @ t2[4:5], t2[6:7]
196         ldrd            r8,  r9,  [r2, #(160 - 136 + 32)]
197         smuad           r6,  r3,  r6
198         smuad           r7,  r3,  r7
199         ldrd            r10, r11, [r2, #(160 - 136 + 40)]
200         smlad           r4,  r12, r8,  r4
201         smlad           r5,  r12, r9,  r5
202         ldrd            r8,  r9,  [r2, #(160 - 136 + 64)]
203         smlad           r6,  r12, r10, r6
204         smlad           r7,  r12, r11, r7
205         ldrd            r10, r11, [r2, #(160 - 136 + 72)]
206         smlad           r4,  r0,  r8,  r4
207         smlad           r5,  r0,  r9,  r5
208         ldrd            r8,  r9,  [r2, #(160 - 136 + 96)]
209         smlad           r6,  r0,  r10, r6
210         smlad           r7,  r0,  r11, r7
211         ldrd            r10, r11, [r2, #(160 - 136 + 104)]
212         smlad           r4,  r14, r8,  r4
213         smlad           r5,  r14, r9,  r5
214         ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 0)]
215         smlad           r6,  r14, r10, r6
216         smlad           r7,  r14, r11, r7
217         ldrd            r10, r11, [r2, #(160 - 136 + 16 + 8)]
218         stmia           r1!, {r4, r5}
219         smuad           r4,  r3,  r8
220         smuad           r5,  r3,  r9
221         ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 32)]
222         stmia           r1!, {r6, r7}
223         smuad           r6,  r3,  r10
224         smuad           r7,  r3,  r11
225         ldrd            r10, r11, [r2, #(160 - 136 + 16 + 40)]
226         smlad           r4,  r12, r8,  r4
227         smlad           r5,  r12, r9,  r5
228         ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 64)]
229         smlad           r6,  r12, r10, r6
230         smlad           r7,  r12, r11, r7
231         ldrd            r10, r11, [r2, #(160 - 136 + 16 + 72)]
232         smlad           r4,  r0,  r8,  r4
233         smlad           r5,  r0,  r9,  r5
234         ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 96)]
235         smlad           r6,  r0,  r10, r6
236         smlad           r7,  r0,  r11, r7
237         ldrd            r10, r11, [r2, #(160 - 136 + 16 + 104)]
238         smlad           r4,  r14, r8,  r4
239         smlad           r5,  r14, r9,  r5
240         smlad           r6,  r14, r10, r6
241         smlad           r7,  r14, r11, r7
242         pop             {r8-r12, r14}
243         stmia           r1!, {r4, r5, r6, r7}
244         pop             {r1, r3-r7, pc}
245 endfunc