]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/dsputil_armv6.S
aac_latm: reconfigure decoder on audio specific config changes
[ffmpeg] / libavcodec / arm / dsputil_armv6.S
1 /*
2  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "asm.S"
22
23         preserve8
24
25 .macro  call_2x_pixels  type, subp
26 function ff_\type\()_pixels16\subp\()_armv6, export=1
27         push            {r0-r3, lr}
28         bl              ff_\type\()_pixels8\subp\()_armv6
29         pop             {r0-r3, lr}
30         add             r0,  r0,  #8
31         add             r1,  r1,  #8
32         b               ff_\type\()_pixels8\subp\()_armv6
33 endfunc
34 .endm
35
36 call_2x_pixels          avg
37 call_2x_pixels          put, _x2
38 call_2x_pixels          put, _y2
39 call_2x_pixels          put, _x2_no_rnd
40 call_2x_pixels          put, _y2_no_rnd
41
42 function ff_put_pixels16_armv6, export=1
43         push            {r4-r11}
44 1:
45         ldr             r5,  [r1, #4]
46         ldr             r6,  [r1, #8]
47         ldr             r7,  [r1, #12]
48         ldr_post        r4,  r1,  r2
49         strd            r6,  r7,  [r0, #8]
50         ldr             r9,  [r1, #4]
51         strd_post       r4,  r5,  r0,  r2
52         ldr             r10, [r1, #8]
53         ldr             r11, [r1, #12]
54         ldr_post        r8,  r1,  r2
55         strd            r10, r11, [r0, #8]
56         subs            r3,  r3,  #2
57         strd_post       r8,  r9,  r0,  r2
58         bne             1b
59
60         pop             {r4-r11}
61         bx              lr
62 endfunc
63
64 function ff_put_pixels8_armv6, export=1
65         push            {r4-r7}
66 1:
67         ldr             r5,  [r1, #4]
68         ldr_post        r4,  r1,  r2
69         ldr             r7,  [r1, #4]
70         strd_post       r4,  r5,  r0,  r2
71         ldr_post        r6,  r1,  r2
72         subs            r3,  r3,  #2
73         strd_post       r6,  r7,  r0,  r2
74         bne             1b
75
76         pop             {r4-r7}
77         bx              lr
78 endfunc
79
80 function ff_put_pixels8_x2_armv6, export=1
81         push            {r4-r11, lr}
82         mov             r12, #1
83         orr             r12, r12, r12, lsl #8
84         orr             r12, r12, r12, lsl #16
85 1:
86         ldr             r4,  [r1]
87         subs            r3,  r3,  #2
88         ldr             r5,  [r1, #4]
89         ldr             r7,  [r1, #5]
90         lsr             r6,  r4,  #8
91         ldr_pre         r8,  r1,  r2
92         orr             r6,  r6,  r5,  lsl #24
93         ldr             r9,  [r1, #4]
94         ldr             r11, [r1, #5]
95         lsr             r10, r8,  #8
96         add             r1,  r1,  r2
97         orr             r10, r10, r9,  lsl #24
98         eor             r14, r4,  r6
99         uhadd8          r4,  r4,  r6
100         eor             r6,  r5,  r7
101         uhadd8          r5,  r5,  r7
102         and             r14, r14, r12
103         and             r6,  r6,  r12
104         uadd8           r4,  r4,  r14
105         eor             r14, r8,  r10
106         uadd8           r5,  r5,  r6
107         eor             r6,  r9,  r11
108         uhadd8          r8,  r8,  r10
109         and             r14, r14, r12
110         uhadd8          r9,  r9,  r11
111         and             r6,  r6,  r12
112         uadd8           r8,  r8,  r14
113         strd_post       r4,  r5,  r0,  r2
114         uadd8           r9,  r9,  r6
115         strd_post       r8,  r9,  r0,  r2
116         bne             1b
117
118         pop             {r4-r11, pc}
119 endfunc
120
121 function ff_put_pixels8_y2_armv6, export=1
122         push            {r4-r11}
123         mov             r12, #1
124         orr             r12, r12, r12, lsl #8
125         orr             r12, r12, r12, lsl #16
126         ldr             r4,  [r1]
127         ldr             r5,  [r1, #4]
128         ldr_pre         r6,  r1,  r2
129         ldr             r7,  [r1, #4]
130 1:
131         subs            r3,  r3,  #2
132         uhadd8          r8,  r4,  r6
133         eor             r10, r4,  r6
134         uhadd8          r9,  r5,  r7
135         eor             r11, r5,  r7
136         and             r10, r10, r12
137         ldr_pre         r4,  r1,  r2
138         uadd8           r8,  r8,  r10
139         and             r11, r11, r12
140         uadd8           r9,  r9,  r11
141         ldr             r5,  [r1, #4]
142         uhadd8          r10, r4,  r6
143         eor             r6,  r4,  r6
144         uhadd8          r11, r5,  r7
145         and             r6,  r6,  r12
146         eor             r7,  r5,  r7
147         uadd8           r10, r10, r6
148         and             r7,  r7,  r12
149         ldr_pre         r6,  r1,  r2
150         uadd8           r11, r11, r7
151         strd_post       r8,  r9,  r0,  r2
152         ldr             r7,  [r1, #4]
153         strd_post       r10, r11, r0,  r2
154         bne             1b
155
156         pop             {r4-r11}
157         bx              lr
158 endfunc
159
160 function ff_put_pixels8_x2_no_rnd_armv6, export=1
161         push            {r4-r9, lr}
162 1:
163         subs            r3,  r3,  #2
164         ldr             r4,  [r1]
165         ldr             r5,  [r1, #4]
166         ldr             r7,  [r1, #5]
167         ldr_pre         r8,  r1,  r2
168         ldr             r9,  [r1, #4]
169         ldr             r14, [r1, #5]
170         add             r1,  r1,  r2
171         lsr             r6,  r4,  #8
172         orr             r6,  r6,  r5,  lsl #24
173         lsr             r12, r8,  #8
174         orr             r12, r12, r9,  lsl #24
175         uhadd8          r4,  r4,  r6
176         uhadd8          r5,  r5,  r7
177         uhadd8          r8,  r8,  r12
178         uhadd8          r9,  r9,  r14
179         stm             r0,  {r4,r5}
180         add             r0,  r0,  r2
181         stm             r0,  {r8,r9}
182         add             r0,  r0,  r2
183         bne             1b
184
185         pop             {r4-r9, pc}
186 endfunc
187
188 function ff_put_pixels8_y2_no_rnd_armv6, export=1
189         push            {r4-r9, lr}
190         ldr             r4,  [r1]
191         ldr             r5,  [r1, #4]
192         ldr_pre         r6,  r1,  r2
193         ldr             r7,  [r1, #4]
194 1:
195         subs            r3,  r3,  #2
196         uhadd8          r8,  r4,  r6
197         ldr_pre         r4,  r1,  r2
198         uhadd8          r9,  r5,  r7
199         ldr             r5,  [r1, #4]
200         uhadd8          r12, r4,  r6
201         ldr_pre         r6,  r1,  r2
202         uhadd8          r14, r5,  r7
203         ldr             r7,  [r1, #4]
204         stm             r0,  {r8,r9}
205         add             r0,  r0,  r2
206         stm             r0,  {r12,r14}
207         add             r0,  r0,  r2
208         bne             1b
209
210         pop             {r4-r9, pc}
211 endfunc
212
213 function ff_avg_pixels8_armv6, export=1
214         pld             [r1, r2]
215         push            {r4-r10, lr}
216         mov             lr,  #1
217         orr             lr,  lr,  lr,  lsl #8
218         orr             lr,  lr,  lr,  lsl #16
219         ldrd            r4,  r5,  [r0]
220         ldr             r10, [r1, #4]
221         ldr_post        r9,  r1,  r2
222         subs            r3,  r3,  #2
223 1:
224         pld             [r1, r2]
225         eor             r8,  r4,  r9
226         uhadd8          r4,  r4,  r9
227         eor             r12, r5,  r10
228         ldrd_reg        r6,  r7,  r0,  r2
229         uhadd8          r5,  r5,  r10
230         and             r8,  r8,  lr
231         ldr             r10, [r1, #4]
232         and             r12, r12, lr
233         uadd8           r4,  r4,  r8
234         ldr_post        r9,  r1,  r2
235         eor             r8,  r6,  r9
236         uadd8           r5,  r5,  r12
237         pld             [r1, r2,  lsl #1]
238         eor             r12, r7,  r10
239         uhadd8          r6,  r6,  r9
240         strd_post       r4,  r5,  r0,  r2
241         uhadd8          r7,  r7,  r10
242         beq             2f
243         and             r8,  r8,  lr
244         ldrd_reg        r4,  r5,  r0,  r2
245         uadd8           r6,  r6,  r8
246         ldr             r10, [r1, #4]
247         and             r12, r12, lr
248         subs            r3,  r3,  #2
249         uadd8           r7,  r7,  r12
250         ldr_post        r9,  r1,  r2
251         strd_post       r6,  r7,  r0,  r2
252         b               1b
253 2:
254         and             r8,  r8,  lr
255         and             r12, r12, lr
256         uadd8           r6,  r6,  r8
257         uadd8           r7,  r7,  r12
258         strd_post       r6,  r7,  r0,  r2
259
260         pop             {r4-r10, pc}
261 endfunc
262
263 function ff_add_pixels_clamped_armv6, export=1
264         push            {r4-r8,lr}
265         mov             r3,  #8
266 1:
267         ldm             r0!, {r4,r5,r12,lr}
268         ldrd            r6,  r7,  [r1]
269         pkhbt           r8,  r4,  r5,  lsl #16
270         pkhtb           r5,  r5,  r4,  asr #16
271         pkhbt           r4,  r12, lr,  lsl #16
272         pkhtb           lr,  lr,  r12, asr #16
273         pld             [r1, r2]
274         uxtab16         r8,  r8,  r6
275         uxtab16         r5,  r5,  r6,  ror #8
276         uxtab16         r4,  r4,  r7
277         uxtab16         lr,  lr,  r7,  ror #8
278         usat16          r8,  #8,  r8
279         usat16          r5,  #8,  r5
280         usat16          r4,  #8,  r4
281         usat16          lr,  #8,  lr
282         orr             r6,  r8,  r5,  lsl #8
283         orr             r7,  r4,  lr,  lsl #8
284         subs            r3,  r3,  #1
285         strd_post       r6,  r7,  r1,  r2
286         bgt             1b
287         pop             {r4-r8,pc}
288 endfunc
289
290 function ff_get_pixels_armv6, export=1
291         pld             [r1, r2]
292         push            {r4-r8, lr}
293         mov             lr,  #8
294 1:
295         ldrd_post       r4,  r5,  r1,  r2
296         subs            lr,  lr,  #1
297         uxtb16          r6,  r4
298         uxtb16          r4,  r4,  ror #8
299         uxtb16          r12, r5
300         uxtb16          r8,  r5,  ror #8
301         pld             [r1, r2]
302         pkhbt           r5,  r6,  r4,  lsl #16
303         pkhtb           r6,  r4,  r6,  asr #16
304         pkhbt           r7,  r12, r8,  lsl #16
305         pkhtb           r12, r8,  r12, asr #16
306         stm             r0!, {r5,r6,r7,r12}
307         bgt             1b
308
309         pop             {r4-r8, pc}
310 endfunc
311
312 function ff_diff_pixels_armv6, export=1
313         pld             [r1, r3]
314         pld             [r2, r3]
315         push            {r4-r9, lr}
316         mov             lr,  #8
317 1:
318         ldrd_post       r4,  r5,  r1,  r3
319         ldrd_post       r6,  r7,  r2,  r3
320         uxtb16          r8,  r4
321         uxtb16          r4,  r4,  ror #8
322         uxtb16          r9,  r6
323         uxtb16          r6,  r6,  ror #8
324         pld             [r1, r3]
325         ssub16          r9,  r8,  r9
326         ssub16          r6,  r4,  r6
327         uxtb16          r8,  r5
328         uxtb16          r5,  r5,  ror #8
329         pld             [r2, r3]
330         pkhbt           r4,  r9,  r6,  lsl #16
331         pkhtb           r6,  r6,  r9,  asr #16
332         uxtb16          r9,  r7
333         uxtb16          r7,  r7,  ror #8
334         ssub16          r9,  r8,  r9
335         ssub16          r5,  r5,  r7
336         subs            lr,  lr,  #1
337         pkhbt           r8,  r9,  r5,  lsl #16
338         pkhtb           r9,  r5,  r9,  asr #16
339         stm             r0!, {r4,r6,r8,r9}
340         bgt             1b
341
342         pop             {r4-r9, pc}
343 endfunc
344
345 function ff_pix_abs16_armv6, export=1
346         ldr             r0,  [sp]
347         push            {r4-r9, lr}
348         mov             r12, #0
349         mov             lr,  #0
350         ldm             r1,  {r4-r7}
351         ldr             r8,  [r2]
352 1:
353         ldr             r9,  [r2, #4]
354         pld             [r1, r3]
355         usada8          r12, r4,  r8,  r12
356         ldr             r8,  [r2, #8]
357         pld             [r2, r3]
358         usada8          lr,  r5,  r9,  lr
359         ldr             r9,  [r2, #12]
360         usada8          r12, r6,  r8,  r12
361         subs            r0,  r0,  #1
362         usada8          lr,  r7,  r9,  lr
363         beq             2f
364         add             r1,  r1,  r3
365         ldm             r1,  {r4-r7}
366         add             r2,  r2,  r3
367         ldr             r8,  [r2]
368         b               1b
369 2:
370         add             r0,  r12, lr
371         pop             {r4-r9, pc}
372 endfunc
373
374 function ff_pix_abs16_x2_armv6, export=1
375         ldr             r12, [sp]
376         push            {r4-r11, lr}
377         mov             r0,  #0
378         mov             lr,  #1
379         orr             lr,  lr,  lr,  lsl #8
380         orr             lr,  lr,  lr,  lsl #16
381 1:
382         ldr             r8,  [r2]
383         ldr             r9,  [r2, #4]
384         lsr             r10, r8,  #8
385         ldr             r4,  [r1]
386         lsr             r6,  r9,  #8
387         orr             r10, r10, r9,  lsl #24
388         ldr             r5,  [r2, #8]
389         eor             r11, r8,  r10
390         uhadd8          r7,  r8,  r10
391         orr             r6,  r6,  r5,  lsl #24
392         and             r11, r11, lr
393         uadd8           r7,  r7,  r11
394         ldr             r8,  [r1, #4]
395         usada8          r0,  r4,  r7,  r0
396         eor             r7,  r9,  r6
397         lsr             r10, r5,  #8
398         and             r7,  r7,  lr
399         uhadd8          r4,  r9,  r6
400         ldr             r6,  [r2, #12]
401         uadd8           r4,  r4,  r7
402         pld             [r1, r3]
403         orr             r10, r10, r6,  lsl #24
404         usada8          r0,  r8,  r4,  r0
405         ldr             r4,  [r1, #8]
406         eor             r11, r5,  r10
407         ldrb            r7,  [r2, #16]
408         and             r11, r11, lr
409         uhadd8          r8,  r5,  r10
410         ldr             r5,  [r1, #12]
411         uadd8           r8,  r8,  r11
412         pld             [r2, r3]
413         lsr             r10, r6,  #8
414         usada8          r0,  r4,  r8,  r0
415         orr             r10, r10, r7,  lsl #24
416         subs            r12,  r12,  #1
417         eor             r11, r6,  r10
418         add             r1,  r1,  r3
419         uhadd8          r9,  r6,  r10
420         and             r11, r11, lr
421         uadd8           r9,  r9,  r11
422         add             r2,  r2,  r3
423         usada8          r0,  r5,  r9,  r0
424         bgt             1b
425
426         pop             {r4-r11, pc}
427 endfunc
428
429 .macro  usad_y2         p0,  p1,  p2,  p3,  n0,  n1,  n2,  n3
430         ldr             \n0, [r2]
431         eor             \n1, \p0, \n0
432         uhadd8          \p0, \p0, \n0
433         and             \n1, \n1, lr
434         ldr             \n2, [r1]
435         uadd8           \p0, \p0, \n1
436         ldr             \n1, [r2, #4]
437         usada8          r0,  \p0, \n2, r0
438         pld             [r1,  r3]
439         eor             \n3, \p1, \n1
440         uhadd8          \p1, \p1, \n1
441         and             \n3, \n3, lr
442         ldr             \p0, [r1, #4]
443         uadd8           \p1, \p1, \n3
444         ldr             \n2, [r2, #8]
445         usada8          r0,  \p1, \p0, r0
446         pld             [r2,  r3]
447         eor             \p0, \p2, \n2
448         uhadd8          \p2, \p2, \n2
449         and             \p0, \p0, lr
450         ldr             \p1, [r1, #8]
451         uadd8           \p2, \p2, \p0
452         ldr             \n3, [r2, #12]
453         usada8          r0,  \p2, \p1, r0
454         eor             \p1, \p3, \n3
455         uhadd8          \p3, \p3, \n3
456         and             \p1, \p1, lr
457         ldr             \p0,  [r1, #12]
458         uadd8           \p3, \p3, \p1
459         add             r1,  r1,  r3
460         usada8          r0,  \p3, \p0,  r0
461         add             r2,  r2,  r3
462 .endm
463
464 function ff_pix_abs16_y2_armv6, export=1
465         pld             [r1]
466         pld             [r2]
467         ldr             r12, [sp]
468         push            {r4-r11, lr}
469         mov             r0,  #0
470         mov             lr,  #1
471         orr             lr,  lr,  lr,  lsl #8
472         orr             lr,  lr,  lr,  lsl #16
473         ldr             r4,  [r2]
474         ldr             r5,  [r2, #4]
475         ldr             r6,  [r2, #8]
476         ldr             r7,  [r2, #12]
477         add             r2,  r2,  r3
478 1:
479         usad_y2         r4,  r5,  r6,  r7,  r8,  r9,  r10, r11
480         subs            r12, r12, #2
481         usad_y2         r8,  r9,  r10, r11, r4,  r5,  r6,  r7
482         bgt             1b
483
484         pop             {r4-r11, pc}
485 endfunc
486
487 function ff_pix_abs8_armv6, export=1
488         pld             [r2, r3]
489         ldr             r12, [sp]
490         push            {r4-r9, lr}
491         mov             r0,  #0
492         mov             lr,  #0
493         ldrd_post       r4,  r5,  r1,  r3
494 1:
495         subs            r12, r12, #2
496         ldr             r7,  [r2, #4]
497         ldr_post        r6,  r2,  r3
498         ldrd_post       r8,  r9,  r1,  r3
499         usada8          r0,  r4,  r6,  r0
500         pld             [r2, r3]
501         usada8          lr,  r5,  r7,  lr
502         ldr             r7,  [r2, #4]
503         ldr_post        r6,  r2,  r3
504         beq             2f
505         ldrd_post       r4,  r5,  r1,  r3
506         usada8          r0,  r8,  r6,  r0
507         pld             [r2, r3]
508         usada8          lr,  r9,  r7,  lr
509         b               1b
510 2:
511         usada8          r0,  r8,  r6,  r0
512         usada8          lr,  r9,  r7,  lr
513         add             r0,  r0,  lr
514         pop             {r4-r9, pc}
515 endfunc
516
517 function ff_sse16_armv6, export=1
518         ldr             r12, [sp]
519         push            {r4-r9, lr}
520         mov             r0,  #0
521 1:
522         ldrd            r4,  r5,  [r1]
523         ldr             r8,  [r2]
524         uxtb16          lr,  r4
525         uxtb16          r4,  r4,  ror #8
526         uxtb16          r9,  r8
527         uxtb16          r8,  r8,  ror #8
528         ldr             r7,  [r2, #4]
529         usub16          lr,  lr,  r9
530         usub16          r4,  r4,  r8
531         smlad           r0,  lr,  lr,  r0
532         uxtb16          r6,  r5
533         uxtb16          lr,  r5,  ror #8
534         uxtb16          r8,  r7
535         uxtb16          r9,  r7,  ror #8
536         smlad           r0,  r4,  r4,  r0
537         ldrd            r4,  r5,  [r1, #8]
538         usub16          r6,  r6,  r8
539         usub16          r8,  lr,  r9
540         ldr             r7,  [r2, #8]
541         smlad           r0,  r6,  r6,  r0
542         uxtb16          lr,  r4
543         uxtb16          r4,  r4,  ror #8
544         uxtb16          r9,  r7
545         uxtb16          r7,  r7, ror #8
546         smlad           r0,  r8,  r8,  r0
547         ldr             r8,  [r2, #12]
548         usub16          lr,  lr,  r9
549         usub16          r4,  r4,  r7
550         smlad           r0,  lr,  lr,  r0
551         uxtb16          r6,  r5
552         uxtb16          r5,  r5,  ror #8
553         uxtb16          r9,  r8
554         uxtb16          r8,  r8,  ror #8
555         smlad           r0,  r4,  r4,  r0
556         usub16          r6,  r6,  r9
557         usub16          r5,  r5,  r8
558         smlad           r0,  r6,  r6,  r0
559         add             r1,  r1,  r3
560         add             r2,  r2,  r3
561         subs            r12, r12, #1
562         smlad           r0,  r5,  r5,  r0
563         bgt             1b
564
565         pop             {r4-r9, pc}
566 endfunc
567
568 function ff_pix_norm1_armv6, export=1
569         push            {r4-r6, lr}
570         mov             r12, #16
571         mov             lr,  #0
572 1:
573         ldm             r0,  {r2-r5}
574         uxtb16          r6,  r2
575         uxtb16          r2,  r2,  ror #8
576         smlad           lr,  r6,  r6,  lr
577         uxtb16          r6,  r3
578         smlad           lr,  r2,  r2,  lr
579         uxtb16          r3,  r3,  ror #8
580         smlad           lr,  r6,  r6,  lr
581         uxtb16          r6,  r4
582         smlad           lr,  r3,  r3,  lr
583         uxtb16          r4,  r4,  ror #8
584         smlad           lr,  r6,  r6,  lr
585         uxtb16          r6,  r5
586         smlad           lr,  r4,  r4,  lr
587         uxtb16          r5,  r5,  ror #8
588         smlad           lr,  r6,  r6,  lr
589         subs            r12, r12, #1
590         add             r0,  r0,  r1
591         smlad           lr,  r5,  r5,  lr
592         bgt             1b
593
594         mov             r0,  lr
595         pop             {r4-r6, pc}
596 endfunc
597
598 function ff_pix_sum_armv6, export=1
599         push            {r4-r7, lr}
600         mov             r12, #16
601         mov             r2,  #0
602         mov             r3,  #0
603         mov             lr,  #0
604         ldr             r4,  [r0]
605 1:
606         subs            r12, r12, #1
607         ldr             r5,  [r0, #4]
608         usada8          r2,  r4,  lr,  r2
609         ldr             r6,  [r0, #8]
610         usada8          r3,  r5,  lr,  r3
611         ldr             r7,  [r0, #12]
612         usada8          r2,  r6,  lr,  r2
613         beq             2f
614         ldr_pre         r4,  r0,  r1
615         usada8          r3,  r7,  lr,  r3
616         bgt             1b
617 2:
618         usada8          r3,  r7,  lr,  r3
619         add             r0,  r2,  r3
620         pop             {r4-r7, pc}
621 endfunc