]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/dsputil_arm.S
lavf: move ff_codec_get_tag() and ff_codec_get_id() definitions to internal.h
[ffmpeg] / libavcodec / arm / dsputil_arm.S
1 @
2 @ ARMv4 optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of Libav.
6 @
7 @ Libav is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ Libav is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with Libav; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "libavutil/arm/asm.S"
24
25 #if HAVE_ARMV5TE
26 function ff_prefetch_arm, export=1
27         subs            r2,  r2,  #1
28         pld             [r0]
29         add             r0,  r0,  r1
30         bne             ff_prefetch_arm
31         bx              lr
32 endfunc
33 #else
34 #define pld @
35 #endif
36
37 .macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
38         mov             \Rd0, \Rn0, lsr #(\shift * 8)
39         mov             \Rd1, \Rn1, lsr #(\shift * 8)
40         mov             \Rd2, \Rn2, lsr #(\shift * 8)
41         mov             \Rd3, \Rn3, lsr #(\shift * 8)
42         orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
43         orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
44         orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
45         orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
46 .endm
47 .macro  ALIGN_DWORD shift, R0, R1, R2
48         mov             \R0, \R0, lsr #(\shift * 8)
49         orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
50         mov             \R1, \R1, lsr #(\shift * 8)
51         orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
52 .endm
53 .macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
54         mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
55         mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
56         orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
57         orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
58 .endm
59
60 .macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
61         @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
62         @ Rmask = 0xFEFEFEFE
63         @ Rn = destroy
64         eor             \Rd0, \Rn0, \Rm0
65         eor             \Rd1, \Rn1, \Rm1
66         orr             \Rn0, \Rn0, \Rm0
67         orr             \Rn1, \Rn1, \Rm1
68         and             \Rd0, \Rd0, \Rmask
69         and             \Rd1, \Rd1, \Rmask
70         sub             \Rd0, \Rn0, \Rd0, lsr #1
71         sub             \Rd1, \Rn1, \Rd1, lsr #1
72 .endm
73
74 .macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
75         @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
76         @ Rmask = 0xFEFEFEFE
77         @ Rn = destroy
78         eor             \Rd0, \Rn0, \Rm0
79         eor             \Rd1, \Rn1, \Rm1
80         and             \Rn0, \Rn0, \Rm0
81         and             \Rn1, \Rn1, \Rm1
82         and             \Rd0, \Rd0, \Rmask
83         and             \Rd1, \Rd1, \Rmask
84         add             \Rd0, \Rn0, \Rd0, lsr #1
85         add             \Rd1, \Rn1, \Rd1, lsr #1
86 .endm
87
88 .macro  JMP_ALIGN tmp, reg
89         ands            \tmp, \reg, #3
90         bic             \reg, \reg, #3
91         beq             1f
92         subs            \tmp, \tmp, #1
93         beq             2f
94         subs            \tmp, \tmp, #1
95         beq             3f
96         b    4f
97 .endm
98
99 @ ----------------------------------------------------------------
100         .align 5
101 function ff_put_pixels16_arm, export=1
102         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
103         @ block = word aligned, pixles = unaligned
104         pld             [r1]
105         push            {r4-r11, lr}
106         JMP_ALIGN       r5,  r1
107 1:
108         ldm             r1,  {r4-r7}
109         add             r1,  r1,  r2
110         stm             r0,  {r4-r7}
111         pld             [r1]
112         subs            r3,  r3,  #1
113         add             r0,  r0,  r2
114         bne             1b
115         pop             {r4-r11, pc}
116         .align 5
117 2:
118         ldm             r1,  {r4-r8}
119         add             r1,  r1,  r2
120         ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
121         pld             [r1]
122         subs            r3,  r3,  #1
123         stm             r0,  {r9-r12}
124         add             r0,  r0,  r2
125         bne             2b
126         pop             {r4-r11, pc}
127         .align 5
128 3:
129         ldm             r1,  {r4-r8}
130         add             r1,  r1,  r2
131         ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
132         pld             [r1]
133         subs            r3,  r3,  #1
134         stm             r0,  {r9-r12}
135         add             r0,  r0,  r2
136         bne             3b
137         pop             {r4-r11, pc}
138         .align 5
139 4:
140         ldm             r1,  {r4-r8}
141         add             r1,  r1,  r2
142         ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
143         pld             [r1]
144         subs            r3,  r3,  #1
145         stm             r0,  {r9-r12}
146         add             r0,  r0,  r2
147         bne             4b
148         pop             {r4-r11,pc}
149 endfunc
150
151 @ ----------------------------------------------------------------
152         .align 5
153 function ff_put_pixels8_arm, export=1
154         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
155         @ block = word aligned, pixles = unaligned
156         pld             [r1]
157         push            {r4-r5,lr}
158         JMP_ALIGN       r5,  r1
159 1:
160         ldm             r1,  {r4-r5}
161         add             r1,  r1,  r2
162         subs            r3,  r3,  #1
163         pld             [r1]
164         stm             r0,  {r4-r5}
165         add             r0,  r0,  r2
166         bne             1b
167         pop             {r4-r5,pc}
168         .align 5
169 2:
170         ldm             r1,  {r4-r5, r12}
171         add             r1,  r1,  r2
172         ALIGN_DWORD     1,   r4,  r5,  r12
173         pld             [r1]
174         subs            r3,  r3,  #1
175         stm             r0,  {r4-r5}
176         add             r0,  r0,  r2
177         bne             2b
178         pop             {r4-r5,pc}
179         .align 5
180 3:
181         ldm             r1,  {r4-r5, r12}
182         add             r1,  r1,  r2
183         ALIGN_DWORD     2,   r4,  r5,  r12
184         pld             [r1]
185         subs            r3,  r3,  #1
186         stm             r0,  {r4-r5}
187         add             r0,  r0,  r2
188         bne             3b
189         pop             {r4-r5,pc}
190         .align 5
191 4:
192         ldm             r1,  {r4-r5, r12}
193         add             r1,  r1,  r2
194         ALIGN_DWORD     3,   r4,  r5,  r12
195         pld             [r1]
196         subs            r3,  r3,  #1
197         stm             r0,  {r4-r5}
198         add             r0,  r0,  r2
199         bne             4b
200         pop             {r4-r5,pc}
201 endfunc
202
203 @ ----------------------------------------------------------------
204         .align 5
205 function ff_put_pixels8_x2_arm, export=1
206         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
207         @ block = word aligned, pixles = unaligned
208         pld             [r1]
209         push            {r4-r10,lr}
210         ldr             r12, =0xfefefefe
211         JMP_ALIGN       r5,  r1
212 1:
213         ldm             r1,  {r4-r5, r10}
214         add             r1,  r1,  r2
215         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
216         pld             [r1]
217         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
218         subs            r3,  r3,  #1
219         stm             r0,  {r8-r9}
220         add             r0,  r0,  r2
221         bne             1b
222         pop             {r4-r10,pc}
223         .align 5
224 2:
225         ldm             r1,  {r4-r5, r10}
226         add             r1,  r1,  r2
227         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
228         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
229         pld             [r1]
230         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
231         subs            r3,  r3,  #1
232         stm             r0,  {r4-r5}
233         add             r0,  r0,  r2
234         bne             2b
235         pop             {r4-r10,pc}
236         .align 5
237 3:
238         ldm             r1,  {r4-r5, r10}
239         add             r1,  r1,  r2
240         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
241         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
242         pld             [r1]
243         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
244         subs            r3,  r3,  #1
245         stm             r0,  {r4-r5}
246         add             r0,  r0,  r2
247         bne             3b
248         pop             {r4-r10,pc}
249         .align 5
250 4:
251         ldm             r1,  {r4-r5, r10}
252         add             r1,  r1,  r2
253         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
254         pld             [r1]
255         RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
256         subs            r3,  r3,  #1
257         stm             r0,  {r8-r9}
258         add             r0,  r0,  r2
259         bne             4b
260         pop             {r4-r10,pc}
261 endfunc
262
263         .align 5
264 function ff_put_no_rnd_pixels8_x2_arm, export=1
265         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
266         @ block = word aligned, pixles = unaligned
267         pld             [r1]
268         push            {r4-r10,lr}
269         ldr             r12, =0xfefefefe
270         JMP_ALIGN       r5,  r1
271 1:
272         ldm             r1,  {r4-r5, r10}
273         add             r1,  r1,  r2
274         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
275         pld             [r1]
276         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
277         subs            r3,  r3,  #1
278         stm             r0,  {r8-r9}
279         add             r0,  r0,  r2
280         bne             1b
281         pop             {r4-r10,pc}
282         .align 5
283 2:
284         ldm             r1,  {r4-r5, r10}
285         add             r1,  r1,  r2
286         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
287         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
288         pld             [r1]
289         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
290         subs            r3,  r3,  #1
291         stm             r0,  {r4-r5}
292         add             r0,  r0,  r2
293         bne             2b
294         pop             {r4-r10,pc}
295         .align 5
296 3:
297         ldm             r1,  {r4-r5, r10}
298         add             r1,  r1,  r2
299         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
300         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
301         pld             [r1]
302         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
303         subs            r3,  r3,  #1
304         stm             r0,  {r4-r5}
305         add             r0,  r0,  r2
306         bne             3b
307         pop             {r4-r10,pc}
308         .align 5
309 4:
310         ldm             r1,  {r4-r5, r10}
311         add             r1,  r1,  r2
312         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
313         pld             [r1]
314         NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
315         subs            r3,  r3,  #1
316         stm             r0,  {r8-r9}
317         add             r0,  r0,  r2
318         bne             4b
319         pop             {r4-r10,pc}
320 endfunc
321
322
323 @ ----------------------------------------------------------------
324         .align 5
325 function ff_put_pixels8_y2_arm, export=1
326         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
327         @ block = word aligned, pixles = unaligned
328         pld             [r1]
329         push            {r4-r11,lr}
330         mov             r3,  r3,  lsr #1
331         ldr             r12, =0xfefefefe
332         JMP_ALIGN       r5,  r1
333 1:
334         ldm             r1,  {r4-r5}
335         add             r1,  r1,  r2
336 6:      ldm             r1,  {r6-r7}
337         add             r1,  r1,  r2
338         pld             [r1]
339         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
340         ldm             r1,  {r4-r5}
341         add             r1,  r1,  r2
342         stm             r0,  {r8-r9}
343         add             r0,  r0,  r2
344         pld             [r1]
345         RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
346         subs            r3,  r3,  #1
347         stm             r0,  {r8-r9}
348         add             r0,  r0,  r2
349         bne             6b
350         pop             {r4-r11,pc}
351         .align 5
352 2:
353         ldm             r1,  {r4-r6}
354         add             r1,  r1,  r2
355         pld             [r1]
356         ALIGN_DWORD     1,   r4,  r5,  r6
357 6:      ldm             r1,  {r7-r9}
358         add             r1,  r1,  r2
359         pld             [r1]
360         ALIGN_DWORD     1,   r7,  r8,  r9
361         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
362         stm             r0,  {r10-r11}
363         add             r0,  r0,  r2
364         ldm             r1,  {r4-r6}
365         add             r1,  r1,  r2
366         pld             [r1]
367         ALIGN_DWORD     1,   r4,  r5,  r6
368         subs            r3,  r3,  #1
369         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
370         stm             r0,  {r10-r11}
371         add             r0,  r0,  r2
372         bne             6b
373         pop             {r4-r11,pc}
374         .align 5
375 3:
376         ldm             r1,  {r4-r6}
377         add             r1,  r1,  r2
378         pld             [r1]
379         ALIGN_DWORD     2,   r4,  r5,  r6
380 6:      ldm             r1,  {r7-r9}
381         add             r1,  r1,  r2
382         pld             [r1]
383         ALIGN_DWORD     2,   r7,  r8,  r9
384         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
385         stm             r0,  {r10-r11}
386         add             r0,  r0,  r2
387         ldm             r1,  {r4-r6}
388         add             r1,  r1,  r2
389         pld             [r1]
390         ALIGN_DWORD     2,   r4,  r5,  r6
391         subs            r3,  r3,  #1
392         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
393         stm             r0,  {r10-r11}
394         add             r0,  r0,  r2
395         bne             6b
396         pop             {r4-r11,pc}
397         .align 5
398 4:
399         ldm             r1,  {r4-r6}
400         add             r1,  r1,  r2
401         pld             [r1]
402         ALIGN_DWORD     3,   r4,  r5,  r6
403 6:      ldm             r1,  {r7-r9}
404         add             r1,  r1,  r2
405         pld             [r1]
406         ALIGN_DWORD     3,   r7,  r8,  r9
407         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
408         stm             r0,  {r10-r11}
409         add             r0,  r0,  r2
410         ldm             r1,  {r4-r6}
411         add             r1,  r1,  r2
412         pld             [r1]
413         ALIGN_DWORD     3,   r4,  r5,  r6
414         subs            r3,  r3,  #1
415         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
416         stm             r0,  {r10-r11}
417         add             r0,  r0,  r2
418         bne             6b
419         pop             {r4-r11,pc}
420 endfunc
421
422         .align 5
423 function ff_put_no_rnd_pixels8_y2_arm, export=1
424         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
425         @ block = word aligned, pixles = unaligned
426         pld             [r1]
427         push            {r4-r11,lr}
428         mov             r3,  r3,  lsr #1
429         ldr             r12, =0xfefefefe
430         JMP_ALIGN       r5,  r1
431 1:
432         ldm             r1,  {r4-r5}
433         add             r1,  r1,  r2
434 6:      ldm             r1,  {r6-r7}
435         add             r1,  r1,  r2
436         pld             [r1]
437         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
438         ldm             r1,  {r4-r5}
439         add             r1,  r1,  r2
440         stm             r0,  {r8-r9}
441         add             r0,  r0,  r2
442         pld             [r1]
443         NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
444         subs            r3,  r3,  #1
445         stm             r0,  {r8-r9}
446         add             r0,  r0,  r2
447         bne             6b
448         pop             {r4-r11,pc}
449         .align 5
450 2:
451         ldm             r1,  {r4-r6}
452         add             r1,  r1,  r2
453         pld             [r1]
454         ALIGN_DWORD     1,   r4,  r5,  r6
455 6:      ldm             r1,  {r7-r9}
456         add             r1,  r1,  r2
457         pld             [r1]
458         ALIGN_DWORD     1,   r7,  r8,  r9
459         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
460         stm             r0,  {r10-r11}
461         add             r0,  r0,  r2
462         ldm             r1,  {r4-r6}
463         add             r1,  r1,  r2
464         pld             [r1]
465         ALIGN_DWORD     1,   r4,  r5,  r6
466         subs            r3,  r3,  #1
467         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
468         stm             r0,  {r10-r11}
469         add             r0,  r0,  r2
470         bne             6b
471         pop             {r4-r11,pc}
472         .align 5
473 3:
474         ldm             r1,  {r4-r6}
475         add             r1,  r1,  r2
476         pld             [r1]
477         ALIGN_DWORD     2,   r4,  r5,  r6
478 6:      ldm             r1,  {r7-r9}
479         add             r1,  r1,  r2
480         pld             [r1]
481         ALIGN_DWORD     2,   r7,  r8,  r9
482         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
483         stm             r0,  {r10-r11}
484         add             r0,  r0,  r2
485         ldm             r1,  {r4-r6}
486         add             r1,  r1,  r2
487         pld             [r1]
488         ALIGN_DWORD     2,   r4,  r5,  r6
489         subs            r3,  r3,  #1
490         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
491         stm             r0,  {r10-r11}
492         add             r0,  r0,  r2
493         bne             6b
494         pop             {r4-r11,pc}
495         .align 5
496 4:
497         ldm             r1,  {r4-r6}
498         add             r1,  r1,  r2
499         pld             [r1]
500         ALIGN_DWORD     3,   r4,  r5,  r6
501 6:      ldm             r1,  {r7-r9}
502         add             r1,  r1,  r2
503         pld             [r1]
504         ALIGN_DWORD     3,   r7,  r8,  r9
505         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
506         stm             r0,  {r10-r11}
507         add             r0,  r0,  r2
508         ldm             r1,  {r4-r6}
509         add             r1,  r1,  r2
510         pld             [r1]
511         ALIGN_DWORD     3,   r4,  r5,  r6
512         subs            r3,  r3,  #1
513         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
514         stm             r0,  {r10-r11}
515         add             r0,  r0,  r2
516         bne             6b
517         pop             {r4-r11,pc}
518 endfunc
519
520         .ltorg
521
522 @ ----------------------------------------------------------------
523 .macro  RND_XY2_IT align, rnd
524         @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
525         @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
526 .if \align == 0
527         ldm             r1,  {r6-r8}
528 .elseif \align == 3
529         ldm             r1,  {r5-r7}
530 .else
531         ldm             r1,  {r8-r10}
532 .endif
533         add             r1,  r1,  r2
534         pld             [r1]
535 .if \align == 0
536         ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
537 .elseif \align == 1
538         ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
539         ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
540 .elseif \align == 2
541         ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
542         ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
543 .elseif \align == 3
544         ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
545 .endif
546         ldr             r14, =0x03030303
547         tst             r3,  #1
548         and             r8,  r4,  r14
549         and             r9,  r5,  r14
550         and             r10, r6,  r14
551         and             r11, r7,  r14
552         it              eq
553         andeq           r14, r14, r14, \rnd #1
554         add             r8,  r8,  r10
555         add             r9,  r9,  r11
556         ldr             r12, =0xfcfcfcfc >> 2
557         itt             eq
558         addeq           r8,  r8,  r14
559         addeq           r9,  r9,  r14
560         and             r4,  r12, r4,  lsr #2
561         and             r5,  r12, r5,  lsr #2
562         and             r6,  r12, r6,  lsr #2
563         and             r7,  r12, r7,  lsr #2
564         add             r10, r4,  r6
565         add             r11, r5,  r7
566         subs            r3,  r3,  #1
567 .endm
568
569 .macro RND_XY2_EXPAND align, rnd
570         RND_XY2_IT      \align, \rnd
571 6:      push            {r8-r11}
572         RND_XY2_IT      \align, \rnd
573         pop             {r4-r7}
574         add             r4,  r4,  r8
575         add             r5,  r5,  r9
576         ldr             r14, =0x0f0f0f0f
577         add             r6,  r6,  r10
578         add             r7,  r7,  r11
579         and             r4,  r14, r4,  lsr #2
580         and             r5,  r14, r5,  lsr #2
581         add             r4,  r4,  r6
582         add             r5,  r5,  r7
583         stm             r0,  {r4-r5}
584         add             r0,  r0,  r2
585         bge             6b
586         pop             {r4-r11,pc}
587 .endm
588
589         .align 5
590 function ff_put_pixels8_xy2_arm, export=1
591         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
592         @ block = word aligned, pixles = unaligned
593         pld             [r1]
594         push            {r4-r11,lr} @ R14 is also called LR
595         JMP_ALIGN       r5,  r1
596 1:      RND_XY2_EXPAND  0, lsl
597         .align 5
598 2:      RND_XY2_EXPAND  1, lsl
599         .align 5
600 3:      RND_XY2_EXPAND  2, lsl
601         .align 5
602 4:      RND_XY2_EXPAND  3, lsl
603 endfunc
604
605         .align 5
606 function ff_put_no_rnd_pixels8_xy2_arm, export=1
607         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
608         @ block = word aligned, pixles = unaligned
609         pld             [r1]
610         push            {r4-r11,lr}
611         JMP_ALIGN       r5,  r1
612 1:      RND_XY2_EXPAND  0, lsr
613         .align 5
614 2:      RND_XY2_EXPAND  1, lsr
615         .align 5
616 3:      RND_XY2_EXPAND  2, lsr
617         .align 5
618 4:      RND_XY2_EXPAND  3, lsr
619 endfunc
620
621         .align 5
622 @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
623 function ff_add_pixels_clamped_arm, export=1
624         push            {r4-r10}
625         mov             r10, #8
626 1:
627         ldr             r4,  [r1]               /* load dest */
628         /* block[0] and block[1]*/
629         ldrsh           r5,  [r0]
630         ldrsh           r7,  [r0, #2]
631         and             r6,  r4,  #0xFF
632         and             r8,  r4,  #0xFF00
633         add             r6,  r6,  r5
634         add             r8,  r7,  r8,  lsr #8
635         mvn             r5,  r5
636         mvn             r7,  r7
637         tst             r6,  #0x100
638         it              ne
639         movne           r6,  r5,  lsr #24
640         tst             r8,  #0x100
641         it              ne
642         movne           r8,  r7,  lsr #24
643         mov             r9,  r6
644         ldrsh           r5,  [r0, #4]           /* moved form [A] */
645         orr             r9,  r9,  r8,  lsl #8
646         /* block[2] and block[3] */
647         /* [A] */
648         ldrsh           r7,  [r0, #6]
649         and             r6,  r4,  #0xFF0000
650         and             r8,  r4,  #0xFF000000
651         add             r6,  r5,  r6,  lsr #16
652         add             r8,  r7,  r8,  lsr #24
653         mvn             r5,  r5
654         mvn             r7,  r7
655         tst             r6,  #0x100
656         it              ne
657         movne           r6,  r5,  lsr #24
658         tst             r8,  #0x100
659         it              ne
660         movne           r8,  r7,  lsr #24
661         orr             r9,  r9,  r6,  lsl #16
662         ldr             r4,  [r1, #4]           /* moved form [B] */
663         orr             r9,  r9,  r8,  lsl #24
664         /* store dest */
665         ldrsh           r5,  [r0, #8]           /* moved form [C] */
666         str             r9,  [r1]
667
668         /* load dest */
669         /* [B] */
670         /* block[4] and block[5] */
671         /* [C] */
672         ldrsh           r7,  [r0, #10]
673         and             r6,  r4,  #0xFF
674         and             r8,  r4,  #0xFF00
675         add             r6,  r6,  r5
676         add             r8,  r7,  r8,  lsr #8
677         mvn             r5,  r5
678         mvn             r7,  r7
679         tst             r6,  #0x100
680         it              ne
681         movne           r6,  r5,  lsr #24
682         tst             r8,  #0x100
683         it              ne
684         movne           r8,  r7,  lsr #24
685         mov             r9,  r6
686         ldrsh           r5,  [r0, #12]          /* moved from [D] */
687         orr             r9,  r9,  r8,  lsl #8
688         /* block[6] and block[7] */
689         /* [D] */
690         ldrsh           r7,  [r0, #14]
691         and             r6,  r4,  #0xFF0000
692         and             r8,  r4,  #0xFF000000
693         add             r6,  r5,  r6,  lsr #16
694         add             r8,  r7,  r8,  lsr #24
695         mvn             r5,  r5
696         mvn             r7,  r7
697         tst             r6,  #0x100
698         it              ne
699         movne           r6,  r5,  lsr #24
700         tst             r8,  #0x100
701         it              ne
702         movne           r8,  r7,  lsr #24
703         orr             r9,  r9,  r6,  lsl #16
704         add             r0,  r0,  #16           /* moved from [E] */
705         orr             r9,  r9,  r8,  lsl #24
706         subs            r10, r10, #1            /* moved from [F] */
707         /* store dest */
708         str             r9,  [r1, #4]
709
710         /* [E] */
711         /* [F] */
712         add             r1,  r1,  r2
713         bne             1b
714
715         pop             {r4-r10}
716         bx              lr
717 endfunc