]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/hpeldsp_arm.S
arm: Use the matching endfunc macro instead of the assembler directive directly
[ffmpeg] / libavcodec / arm / hpeldsp_arm.S
1 @
2 @ ARMv4 optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of Libav.
6 @
7 @ Libav is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ Libav is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with Libav; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "libavutil/arm/asm.S"
24
25 #if !HAVE_ARMV5TE_EXTERNAL
26 #define pld @
27 #endif
28
29 .macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
30         mov             \Rd0, \Rn0, lsr #(\shift * 8)
31         mov             \Rd1, \Rn1, lsr #(\shift * 8)
32         mov             \Rd2, \Rn2, lsr #(\shift * 8)
33         mov             \Rd3, \Rn3, lsr #(\shift * 8)
34         orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
35         orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
36         orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
37         orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
38 .endm
39 .macro  ALIGN_DWORD shift, R0, R1, R2
40         mov             \R0, \R0, lsr #(\shift * 8)
41         orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
42         mov             \R1, \R1, lsr #(\shift * 8)
43         orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
44 .endm
45 .macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
46         mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
47         mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
48         orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
49         orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
50 .endm
51
52 .macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
53         @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
54         @ Rmask = 0xFEFEFEFE
55         @ Rn = destroy
56         eor             \Rd0, \Rn0, \Rm0
57         eor             \Rd1, \Rn1, \Rm1
58         orr             \Rn0, \Rn0, \Rm0
59         orr             \Rn1, \Rn1, \Rm1
60         and             \Rd0, \Rd0, \Rmask
61         and             \Rd1, \Rd1, \Rmask
62         sub             \Rd0, \Rn0, \Rd0, lsr #1
63         sub             \Rd1, \Rn1, \Rd1, lsr #1
64 .endm
65
66 .macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
67         @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
68         @ Rmask = 0xFEFEFEFE
69         @ Rn = destroy
70         eor             \Rd0, \Rn0, \Rm0
71         eor             \Rd1, \Rn1, \Rm1
72         and             \Rn0, \Rn0, \Rm0
73         and             \Rn1, \Rn1, \Rm1
74         and             \Rd0, \Rd0, \Rmask
75         and             \Rd1, \Rd1, \Rmask
76         add             \Rd0, \Rn0, \Rd0, lsr #1
77         add             \Rd1, \Rn1, \Rd1, lsr #1
78 .endm
79
80 .macro  JMP_ALIGN tmp, reg
81         ands            \tmp, \reg, #3
82         bic             \reg, \reg, #3
83         beq             1f
84         subs            \tmp, \tmp, #1
85         beq             2f
86         subs            \tmp, \tmp, #1
87         beq             3f
88         b    4f
89 .endm
90
91 @ ----------------------------------------------------------------
92         .align 5
93 function ff_put_pixels16_arm, export=1
94         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
95         @ block = word aligned, pixles = unaligned
96         pld             [r1]
97         push            {r4-r11, lr}
98         JMP_ALIGN       r5,  r1
99 1:
100         ldm             r1,  {r4-r7}
101         add             r1,  r1,  r2
102         stm             r0,  {r4-r7}
103         pld             [r1]
104         subs            r3,  r3,  #1
105         add             r0,  r0,  r2
106         bne             1b
107         pop             {r4-r11, pc}
108         .align 5
109 2:
110         ldm             r1,  {r4-r8}
111         add             r1,  r1,  r2
112         ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
113         pld             [r1]
114         subs            r3,  r3,  #1
115         stm             r0,  {r9-r12}
116         add             r0,  r0,  r2
117         bne             2b
118         pop             {r4-r11, pc}
119         .align 5
120 3:
121         ldm             r1,  {r4-r8}
122         add             r1,  r1,  r2
123         ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
124         pld             [r1]
125         subs            r3,  r3,  #1
126         stm             r0,  {r9-r12}
127         add             r0,  r0,  r2
128         bne             3b
129         pop             {r4-r11, pc}
130         .align 5
131 4:
132         ldm             r1,  {r4-r8}
133         add             r1,  r1,  r2
134         ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
135         pld             [r1]
136         subs            r3,  r3,  #1
137         stm             r0,  {r9-r12}
138         add             r0,  r0,  r2
139         bne             4b
140         pop             {r4-r11,pc}
141 endfunc
142
143 @ ----------------------------------------------------------------
144         .align 5
145 function ff_put_pixels8_arm, export=1
146         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
147         @ block = word aligned, pixles = unaligned
148         pld             [r1]
149         push            {r4-r5,lr}
150         JMP_ALIGN       r5,  r1
151 1:
152         ldm             r1,  {r4-r5}
153         add             r1,  r1,  r2
154         subs            r3,  r3,  #1
155         pld             [r1]
156         stm             r0,  {r4-r5}
157         add             r0,  r0,  r2
158         bne             1b
159         pop             {r4-r5,pc}
160         .align 5
161 2:
162         ldm             r1,  {r4-r5, r12}
163         add             r1,  r1,  r2
164         ALIGN_DWORD     1,   r4,  r5,  r12
165         pld             [r1]
166         subs            r3,  r3,  #1
167         stm             r0,  {r4-r5}
168         add             r0,  r0,  r2
169         bne             2b
170         pop             {r4-r5,pc}
171         .align 5
172 3:
173         ldm             r1,  {r4-r5, r12}
174         add             r1,  r1,  r2
175         ALIGN_DWORD     2,   r4,  r5,  r12
176         pld             [r1]
177         subs            r3,  r3,  #1
178         stm             r0,  {r4-r5}
179         add             r0,  r0,  r2
180         bne             3b
181         pop             {r4-r5,pc}
182         .align 5
183 4:
184         ldm             r1,  {r4-r5, r12}
185         add             r1,  r1,  r2
186         ALIGN_DWORD     3,   r4,  r5,  r12
187         pld             [r1]
188         subs            r3,  r3,  #1
189         stm             r0,  {r4-r5}
190         add             r0,  r0,  r2
191         bne             4b
192         pop             {r4-r5,pc}
193 endfunc
194
195 @ ----------------------------------------------------------------
196         .align 5
197 function ff_put_pixels8_x2_arm, export=1
198         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
199         @ block = word aligned, pixles = unaligned
200         pld             [r1]
201         push            {r4-r10,lr}
202         ldr             r12, =0xfefefefe
203         JMP_ALIGN       r5,  r1
204 1:
205         ldm             r1,  {r4-r5, r10}
206         add             r1,  r1,  r2
207         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
208         pld             [r1]
209         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
210         subs            r3,  r3,  #1
211         stm             r0,  {r8-r9}
212         add             r0,  r0,  r2
213         bne             1b
214         pop             {r4-r10,pc}
215         .align 5
216 2:
217         ldm             r1,  {r4-r5, r10}
218         add             r1,  r1,  r2
219         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
220         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
221         pld             [r1]
222         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
223         subs            r3,  r3,  #1
224         stm             r0,  {r4-r5}
225         add             r0,  r0,  r2
226         bne             2b
227         pop             {r4-r10,pc}
228         .align 5
229 3:
230         ldm             r1,  {r4-r5, r10}
231         add             r1,  r1,  r2
232         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
233         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
234         pld             [r1]
235         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
236         subs            r3,  r3,  #1
237         stm             r0,  {r4-r5}
238         add             r0,  r0,  r2
239         bne             3b
240         pop             {r4-r10,pc}
241         .align 5
242 4:
243         ldm             r1,  {r4-r5, r10}
244         add             r1,  r1,  r2
245         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
246         pld             [r1]
247         RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
248         subs            r3,  r3,  #1
249         stm             r0,  {r8-r9}
250         add             r0,  r0,  r2
251         bne             4b
252         pop             {r4-r10,pc}
253 endfunc
254
255         .align 5
256 function ff_put_no_rnd_pixels8_x2_arm, export=1
257         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
258         @ block = word aligned, pixles = unaligned
259         pld             [r1]
260         push            {r4-r10,lr}
261         ldr             r12, =0xfefefefe
262         JMP_ALIGN       r5,  r1
263 1:
264         ldm             r1,  {r4-r5, r10}
265         add             r1,  r1,  r2
266         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
267         pld             [r1]
268         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
269         subs            r3,  r3,  #1
270         stm             r0,  {r8-r9}
271         add             r0,  r0,  r2
272         bne             1b
273         pop             {r4-r10,pc}
274         .align 5
275 2:
276         ldm             r1,  {r4-r5, r10}
277         add             r1,  r1,  r2
278         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
279         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
280         pld             [r1]
281         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
282         subs            r3,  r3,  #1
283         stm             r0,  {r4-r5}
284         add             r0,  r0,  r2
285         bne             2b
286         pop             {r4-r10,pc}
287         .align 5
288 3:
289         ldm             r1,  {r4-r5, r10}
290         add             r1,  r1,  r2
291         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
292         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
293         pld             [r1]
294         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
295         subs            r3,  r3,  #1
296         stm             r0,  {r4-r5}
297         add             r0,  r0,  r2
298         bne             3b
299         pop             {r4-r10,pc}
300         .align 5
301 4:
302         ldm             r1,  {r4-r5, r10}
303         add             r1,  r1,  r2
304         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
305         pld             [r1]
306         NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
307         subs            r3,  r3,  #1
308         stm             r0,  {r8-r9}
309         add             r0,  r0,  r2
310         bne             4b
311         pop             {r4-r10,pc}
312 endfunc
313
314
315 @ ----------------------------------------------------------------
316         .align 5
317 function ff_put_pixels8_y2_arm, export=1
318         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
319         @ block = word aligned, pixles = unaligned
320         pld             [r1]
321         push            {r4-r11,lr}
322         mov             r3,  r3,  lsr #1
323         ldr             r12, =0xfefefefe
324         JMP_ALIGN       r5,  r1
325 1:
326         ldm             r1,  {r4-r5}
327         add             r1,  r1,  r2
328 6:      ldm             r1,  {r6-r7}
329         add             r1,  r1,  r2
330         pld             [r1]
331         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
332         ldm             r1,  {r4-r5}
333         add             r1,  r1,  r2
334         stm             r0,  {r8-r9}
335         add             r0,  r0,  r2
336         pld             [r1]
337         RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
338         subs            r3,  r3,  #1
339         stm             r0,  {r8-r9}
340         add             r0,  r0,  r2
341         bne             6b
342         pop             {r4-r11,pc}
343         .align 5
344 2:
345         ldm             r1,  {r4-r6}
346         add             r1,  r1,  r2
347         pld             [r1]
348         ALIGN_DWORD     1,   r4,  r5,  r6
349 6:      ldm             r1,  {r7-r9}
350         add             r1,  r1,  r2
351         pld             [r1]
352         ALIGN_DWORD     1,   r7,  r8,  r9
353         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
354         stm             r0,  {r10-r11}
355         add             r0,  r0,  r2
356         ldm             r1,  {r4-r6}
357         add             r1,  r1,  r2
358         pld             [r1]
359         ALIGN_DWORD     1,   r4,  r5,  r6
360         subs            r3,  r3,  #1
361         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
362         stm             r0,  {r10-r11}
363         add             r0,  r0,  r2
364         bne             6b
365         pop             {r4-r11,pc}
366         .align 5
367 3:
368         ldm             r1,  {r4-r6}
369         add             r1,  r1,  r2
370         pld             [r1]
371         ALIGN_DWORD     2,   r4,  r5,  r6
372 6:      ldm             r1,  {r7-r9}
373         add             r1,  r1,  r2
374         pld             [r1]
375         ALIGN_DWORD     2,   r7,  r8,  r9
376         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
377         stm             r0,  {r10-r11}
378         add             r0,  r0,  r2
379         ldm             r1,  {r4-r6}
380         add             r1,  r1,  r2
381         pld             [r1]
382         ALIGN_DWORD     2,   r4,  r5,  r6
383         subs            r3,  r3,  #1
384         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
385         stm             r0,  {r10-r11}
386         add             r0,  r0,  r2
387         bne             6b
388         pop             {r4-r11,pc}
389         .align 5
390 4:
391         ldm             r1,  {r4-r6}
392         add             r1,  r1,  r2
393         pld             [r1]
394         ALIGN_DWORD     3,   r4,  r5,  r6
395 6:      ldm             r1,  {r7-r9}
396         add             r1,  r1,  r2
397         pld             [r1]
398         ALIGN_DWORD     3,   r7,  r8,  r9
399         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
400         stm             r0,  {r10-r11}
401         add             r0,  r0,  r2
402         ldm             r1,  {r4-r6}
403         add             r1,  r1,  r2
404         pld             [r1]
405         ALIGN_DWORD     3,   r4,  r5,  r6
406         subs            r3,  r3,  #1
407         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
408         stm             r0,  {r10-r11}
409         add             r0,  r0,  r2
410         bne             6b
411         pop             {r4-r11,pc}
412 endfunc
413
414         .align 5
415 function ff_put_no_rnd_pixels8_y2_arm, export=1
416         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
417         @ block = word aligned, pixles = unaligned
418         pld             [r1]
419         push            {r4-r11,lr}
420         mov             r3,  r3,  lsr #1
421         ldr             r12, =0xfefefefe
422         JMP_ALIGN       r5,  r1
423 1:
424         ldm             r1,  {r4-r5}
425         add             r1,  r1,  r2
426 6:      ldm             r1,  {r6-r7}
427         add             r1,  r1,  r2
428         pld             [r1]
429         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
430         ldm             r1,  {r4-r5}
431         add             r1,  r1,  r2
432         stm             r0,  {r8-r9}
433         add             r0,  r0,  r2
434         pld             [r1]
435         NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
436         subs            r3,  r3,  #1
437         stm             r0,  {r8-r9}
438         add             r0,  r0,  r2
439         bne             6b
440         pop             {r4-r11,pc}
441         .align 5
442 2:
443         ldm             r1,  {r4-r6}
444         add             r1,  r1,  r2
445         pld             [r1]
446         ALIGN_DWORD     1,   r4,  r5,  r6
447 6:      ldm             r1,  {r7-r9}
448         add             r1,  r1,  r2
449         pld             [r1]
450         ALIGN_DWORD     1,   r7,  r8,  r9
451         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
452         stm             r0,  {r10-r11}
453         add             r0,  r0,  r2
454         ldm             r1,  {r4-r6}
455         add             r1,  r1,  r2
456         pld             [r1]
457         ALIGN_DWORD     1,   r4,  r5,  r6
458         subs            r3,  r3,  #1
459         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
460         stm             r0,  {r10-r11}
461         add             r0,  r0,  r2
462         bne             6b
463         pop             {r4-r11,pc}
464         .align 5
465 3:
466         ldm             r1,  {r4-r6}
467         add             r1,  r1,  r2
468         pld             [r1]
469         ALIGN_DWORD     2,   r4,  r5,  r6
470 6:      ldm             r1,  {r7-r9}
471         add             r1,  r1,  r2
472         pld             [r1]
473         ALIGN_DWORD     2,   r7,  r8,  r9
474         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
475         stm             r0,  {r10-r11}
476         add             r0,  r0,  r2
477         ldm             r1,  {r4-r6}
478         add             r1,  r1,  r2
479         pld             [r1]
480         ALIGN_DWORD     2,   r4,  r5,  r6
481         subs            r3,  r3,  #1
482         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
483         stm             r0,  {r10-r11}
484         add             r0,  r0,  r2
485         bne             6b
486         pop             {r4-r11,pc}
487         .align 5
488 4:
489         ldm             r1,  {r4-r6}
490         add             r1,  r1,  r2
491         pld             [r1]
492         ALIGN_DWORD     3,   r4,  r5,  r6
493 6:      ldm             r1,  {r7-r9}
494         add             r1,  r1,  r2
495         pld             [r1]
496         ALIGN_DWORD     3,   r7,  r8,  r9
497         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
498         stm             r0,  {r10-r11}
499         add             r0,  r0,  r2
500         ldm             r1,  {r4-r6}
501         add             r1,  r1,  r2
502         pld             [r1]
503         ALIGN_DWORD     3,   r4,  r5,  r6
504         subs            r3,  r3,  #1
505         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
506         stm             r0,  {r10-r11}
507         add             r0,  r0,  r2
508         bne             6b
509         pop             {r4-r11,pc}
510 endfunc
511
512         .ltorg
513
514 @ ----------------------------------------------------------------
515 .macro  RND_XY2_IT align, rnd
516         @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
517         @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
518 .if \align == 0
519         ldm             r1,  {r6-r8}
520 .elseif \align == 3
521         ldm             r1,  {r5-r7}
522 .else
523         ldm             r1,  {r8-r10}
524 .endif
525         add             r1,  r1,  r2
526         pld             [r1]
527 .if \align == 0
528         ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
529 .elseif \align == 1
530         ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
531         ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
532 .elseif \align == 2
533         ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
534         ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
535 .elseif \align == 3
536         ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
537 .endif
538         ldr             r14, =0x03030303
539         tst             r3,  #1
540         and             r8,  r4,  r14
541         and             r9,  r5,  r14
542         and             r10, r6,  r14
543         and             r11, r7,  r14
544         it              eq
545         andeq           r14, r14, r14, \rnd #1
546         add             r8,  r8,  r10
547         add             r9,  r9,  r11
548         ldr             r12, =0xfcfcfcfc >> 2
549         itt             eq
550         addeq           r8,  r8,  r14
551         addeq           r9,  r9,  r14
552         and             r4,  r12, r4,  lsr #2
553         and             r5,  r12, r5,  lsr #2
554         and             r6,  r12, r6,  lsr #2
555         and             r7,  r12, r7,  lsr #2
556         add             r10, r4,  r6
557         add             r11, r5,  r7
558         subs            r3,  r3,  #1
559 .endm
560
561 .macro RND_XY2_EXPAND align, rnd
562         RND_XY2_IT      \align, \rnd
563 6:      push            {r8-r11}
564         RND_XY2_IT      \align, \rnd
565         pop             {r4-r7}
566         add             r4,  r4,  r8
567         add             r5,  r5,  r9
568         ldr             r14, =0x0f0f0f0f
569         add             r6,  r6,  r10
570         add             r7,  r7,  r11
571         and             r4,  r14, r4,  lsr #2
572         and             r5,  r14, r5,  lsr #2
573         add             r4,  r4,  r6
574         add             r5,  r5,  r7
575         stm             r0,  {r4-r5}
576         add             r0,  r0,  r2
577         bge             6b
578         pop             {r4-r11,pc}
579 .endm
580
581         .align 5
582 function ff_put_pixels8_xy2_arm, export=1
583         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
584         @ block = word aligned, pixles = unaligned
585         pld             [r1]
586         push            {r4-r11,lr} @ R14 is also called LR
587         JMP_ALIGN       r5,  r1
588 1:      RND_XY2_EXPAND  0, lsl
589         .align 5
590 2:      RND_XY2_EXPAND  1, lsl
591         .align 5
592 3:      RND_XY2_EXPAND  2, lsl
593         .align 5
594 4:      RND_XY2_EXPAND  3, lsl
595 endfunc
596
597         .align 5
598 function ff_put_no_rnd_pixels8_xy2_arm, export=1
599         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
600         @ block = word aligned, pixles = unaligned
601         pld             [r1]
602         push            {r4-r11,lr}
603         JMP_ALIGN       r5,  r1
604 1:      RND_XY2_EXPAND  0, lsr
605         .align 5
606 2:      RND_XY2_EXPAND  1, lsr
607         .align 5
608 3:      RND_XY2_EXPAND  2, lsr
609         .align 5
610 4:      RND_XY2_EXPAND  3, lsr
611 endfunc