]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/hpeldsp_arm.S
Merge commit 'cef914e08310166112ac09567e66452a7679bfc8'
[ffmpeg] / libavcodec / arm / hpeldsp_arm.S
1 @
2 @ ARMv4-optimized halfpel functions
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of FFmpeg.
6 @
7 @ FFmpeg is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ FFmpeg is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with FFmpeg; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "libavutil/arm/asm.S"
24
25 #if !HAVE_ARMV5TE_EXTERNAL
26 #define pld @
27 #endif
28
29 .macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
30         mov             \Rd0, \Rn0, lsr #(\shift * 8)
31         mov             \Rd1, \Rn1, lsr #(\shift * 8)
32         mov             \Rd2, \Rn2, lsr #(\shift * 8)
33         mov             \Rd3, \Rn3, lsr #(\shift * 8)
34         orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
35         orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
36         orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
37         orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
38 .endm
39 .macro  ALIGN_DWORD shift, R0, R1, R2
40         mov             \R0, \R0, lsr #(\shift * 8)
41         orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
42         mov             \R1, \R1, lsr #(\shift * 8)
43         orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
44 .endm
45 .macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
46         mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
47         mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
48         orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
49         orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
50 .endm
51
52 .macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
53         @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
54         @ Rmask = 0xFEFEFEFE
55         @ Rn = destroy
56         eor             \Rd0, \Rn0, \Rm0
57         eor             \Rd1, \Rn1, \Rm1
58         orr             \Rn0, \Rn0, \Rm0
59         orr             \Rn1, \Rn1, \Rm1
60         and             \Rd0, \Rd0, \Rmask
61         and             \Rd1, \Rd1, \Rmask
62         sub             \Rd0, \Rn0, \Rd0, lsr #1
63         sub             \Rd1, \Rn1, \Rd1, lsr #1
64 .endm
65
66 .macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
67         @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
68         @ Rmask = 0xFEFEFEFE
69         @ Rn = destroy
70         eor             \Rd0, \Rn0, \Rm0
71         eor             \Rd1, \Rn1, \Rm1
72         and             \Rn0, \Rn0, \Rm0
73         and             \Rn1, \Rn1, \Rm1
74         and             \Rd0, \Rd0, \Rmask
75         and             \Rd1, \Rd1, \Rmask
76         add             \Rd0, \Rn0, \Rd0, lsr #1
77         add             \Rd1, \Rn1, \Rd1, lsr #1
78 .endm
79
80 .macro  JMP_ALIGN tmp, reg
81         ands            \tmp, \reg, #3
82         bic             \reg, \reg, #3
83         beq             1f
84         subs            \tmp, \tmp, #1
85         beq             2f
86         subs            \tmp, \tmp, #1
87         beq             3f
88         b    4f
89 .endm
90
91 @ ----------------------------------------------------------------
92 function ff_put_pixels16_arm, export=1, align=5
93         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
94         @ block = word aligned, pixles = unaligned
95         pld             [r1]
96         push            {r4-r11, lr}
97         JMP_ALIGN       r5,  r1
98 1:
99         ldm             r1,  {r4-r7}
100         add             r1,  r1,  r2
101         stm             r0,  {r4-r7}
102         pld             [r1]
103         subs            r3,  r3,  #1
104         add             r0,  r0,  r2
105         bne             1b
106         pop             {r4-r11, pc}
107         .align 5
108 2:
109         ldm             r1,  {r4-r8}
110         add             r1,  r1,  r2
111         ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
112         pld             [r1]
113         subs            r3,  r3,  #1
114         stm             r0,  {r9-r12}
115         add             r0,  r0,  r2
116         bne             2b
117         pop             {r4-r11, pc}
118         .align 5
119 3:
120         ldm             r1,  {r4-r8}
121         add             r1,  r1,  r2
122         ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
123         pld             [r1]
124         subs            r3,  r3,  #1
125         stm             r0,  {r9-r12}
126         add             r0,  r0,  r2
127         bne             3b
128         pop             {r4-r11, pc}
129         .align 5
130 4:
131         ldm             r1,  {r4-r8}
132         add             r1,  r1,  r2
133         ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
134         pld             [r1]
135         subs            r3,  r3,  #1
136         stm             r0,  {r9-r12}
137         add             r0,  r0,  r2
138         bne             4b
139         pop             {r4-r11,pc}
140 endfunc
141
142 @ ----------------------------------------------------------------
143 function ff_put_pixels8_arm, export=1, align=5
144         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
145         @ block = word aligned, pixles = unaligned
146         pld             [r1]
147         push            {r4-r5,lr}
148         JMP_ALIGN       r5,  r1
149 1:
150         ldm             r1,  {r4-r5}
151         add             r1,  r1,  r2
152         subs            r3,  r3,  #1
153         pld             [r1]
154         stm             r0,  {r4-r5}
155         add             r0,  r0,  r2
156         bne             1b
157         pop             {r4-r5,pc}
158         .align 5
159 2:
160         ldm             r1,  {r4-r5, r12}
161         add             r1,  r1,  r2
162         ALIGN_DWORD     1,   r4,  r5,  r12
163         pld             [r1]
164         subs            r3,  r3,  #1
165         stm             r0,  {r4-r5}
166         add             r0,  r0,  r2
167         bne             2b
168         pop             {r4-r5,pc}
169         .align 5
170 3:
171         ldm             r1,  {r4-r5, r12}
172         add             r1,  r1,  r2
173         ALIGN_DWORD     2,   r4,  r5,  r12
174         pld             [r1]
175         subs            r3,  r3,  #1
176         stm             r0,  {r4-r5}
177         add             r0,  r0,  r2
178         bne             3b
179         pop             {r4-r5,pc}
180         .align 5
181 4:
182         ldm             r1,  {r4-r5, r12}
183         add             r1,  r1,  r2
184         ALIGN_DWORD     3,   r4,  r5,  r12
185         pld             [r1]
186         subs            r3,  r3,  #1
187         stm             r0,  {r4-r5}
188         add             r0,  r0,  r2
189         bne             4b
190         pop             {r4-r5,pc}
191 endfunc
192
193 @ ----------------------------------------------------------------
194 function ff_put_pixels8_x2_arm, export=1, align=5
195         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
196         @ block = word aligned, pixles = unaligned
197         pld             [r1]
198         push            {r4-r10,lr}
199         ldr             r12, =0xfefefefe
200         JMP_ALIGN       r5,  r1
201 1:
202         ldm             r1,  {r4-r5, r10}
203         add             r1,  r1,  r2
204         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
205         pld             [r1]
206         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
207         subs            r3,  r3,  #1
208         stm             r0,  {r8-r9}
209         add             r0,  r0,  r2
210         bne             1b
211         pop             {r4-r10,pc}
212         .align 5
213 2:
214         ldm             r1,  {r4-r5, r10}
215         add             r1,  r1,  r2
216         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
217         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
218         pld             [r1]
219         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
220         subs            r3,  r3,  #1
221         stm             r0,  {r4-r5}
222         add             r0,  r0,  r2
223         bne             2b
224         pop             {r4-r10,pc}
225         .align 5
226 3:
227         ldm             r1,  {r4-r5, r10}
228         add             r1,  r1,  r2
229         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
230         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
231         pld             [r1]
232         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
233         subs            r3,  r3,  #1
234         stm             r0,  {r4-r5}
235         add             r0,  r0,  r2
236         bne             3b
237         pop             {r4-r10,pc}
238         .align 5
239 4:
240         ldm             r1,  {r4-r5, r10}
241         add             r1,  r1,  r2
242         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
243         pld             [r1]
244         RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
245         subs            r3,  r3,  #1
246         stm             r0,  {r8-r9}
247         add             r0,  r0,  r2
248         bne             4b
249         pop             {r4-r10,pc}
250 endfunc
251
252 function ff_put_no_rnd_pixels8_x2_arm, export=1, align=5
253         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
254         @ block = word aligned, pixles = unaligned
255         pld             [r1]
256         push            {r4-r10,lr}
257         ldr             r12, =0xfefefefe
258         JMP_ALIGN       r5,  r1
259 1:
260         ldm             r1,  {r4-r5, r10}
261         add             r1,  r1,  r2
262         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
263         pld             [r1]
264         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
265         subs            r3,  r3,  #1
266         stm             r0,  {r8-r9}
267         add             r0,  r0,  r2
268         bne             1b
269         pop             {r4-r10,pc}
270         .align 5
271 2:
272         ldm             r1,  {r4-r5, r10}
273         add             r1,  r1,  r2
274         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
275         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
276         pld             [r1]
277         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
278         subs            r3,  r3,  #1
279         stm             r0,  {r4-r5}
280         add             r0,  r0,  r2
281         bne             2b
282         pop             {r4-r10,pc}
283         .align 5
284 3:
285         ldm             r1,  {r4-r5, r10}
286         add             r1,  r1,  r2
287         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
288         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
289         pld             [r1]
290         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
291         subs            r3,  r3,  #1
292         stm             r0,  {r4-r5}
293         add             r0,  r0,  r2
294         bne             3b
295         pop             {r4-r10,pc}
296         .align 5
297 4:
298         ldm             r1,  {r4-r5, r10}
299         add             r1,  r1,  r2
300         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
301         pld             [r1]
302         NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
303         subs            r3,  r3,  #1
304         stm             r0,  {r8-r9}
305         add             r0,  r0,  r2
306         bne             4b
307         pop             {r4-r10,pc}
308 endfunc
309
310
311 @ ----------------------------------------------------------------
312 function ff_put_pixels8_y2_arm, export=1, align=5
313         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
314         @ block = word aligned, pixles = unaligned
315         pld             [r1]
316         push            {r4-r11,lr}
317         mov             r3,  r3,  lsr #1
318         ldr             r12, =0xfefefefe
319         JMP_ALIGN       r5,  r1
320 1:
321         ldm             r1,  {r4-r5}
322         add             r1,  r1,  r2
323 6:      ldm             r1,  {r6-r7}
324         add             r1,  r1,  r2
325         pld             [r1]
326         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
327         ldm             r1,  {r4-r5}
328         add             r1,  r1,  r2
329         stm             r0,  {r8-r9}
330         add             r0,  r0,  r2
331         pld             [r1]
332         RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
333         subs            r3,  r3,  #1
334         stm             r0,  {r8-r9}
335         add             r0,  r0,  r2
336         bne             6b
337         pop             {r4-r11,pc}
338         .align 5
339 2:
340         ldm             r1,  {r4-r6}
341         add             r1,  r1,  r2
342         pld             [r1]
343         ALIGN_DWORD     1,   r4,  r5,  r6
344 6:      ldm             r1,  {r7-r9}
345         add             r1,  r1,  r2
346         pld             [r1]
347         ALIGN_DWORD     1,   r7,  r8,  r9
348         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
349         stm             r0,  {r10-r11}
350         add             r0,  r0,  r2
351         ldm             r1,  {r4-r6}
352         add             r1,  r1,  r2
353         pld             [r1]
354         ALIGN_DWORD     1,   r4,  r5,  r6
355         subs            r3,  r3,  #1
356         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
357         stm             r0,  {r10-r11}
358         add             r0,  r0,  r2
359         bne             6b
360         pop             {r4-r11,pc}
361         .align 5
362 3:
363         ldm             r1,  {r4-r6}
364         add             r1,  r1,  r2
365         pld             [r1]
366         ALIGN_DWORD     2,   r4,  r5,  r6
367 6:      ldm             r1,  {r7-r9}
368         add             r1,  r1,  r2
369         pld             [r1]
370         ALIGN_DWORD     2,   r7,  r8,  r9
371         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
372         stm             r0,  {r10-r11}
373         add             r0,  r0,  r2
374         ldm             r1,  {r4-r6}
375         add             r1,  r1,  r2
376         pld             [r1]
377         ALIGN_DWORD     2,   r4,  r5,  r6
378         subs            r3,  r3,  #1
379         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
380         stm             r0,  {r10-r11}
381         add             r0,  r0,  r2
382         bne             6b
383         pop             {r4-r11,pc}
384         .align 5
385 4:
386         ldm             r1,  {r4-r6}
387         add             r1,  r1,  r2
388         pld             [r1]
389         ALIGN_DWORD     3,   r4,  r5,  r6
390 6:      ldm             r1,  {r7-r9}
391         add             r1,  r1,  r2
392         pld             [r1]
393         ALIGN_DWORD     3,   r7,  r8,  r9
394         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
395         stm             r0,  {r10-r11}
396         add             r0,  r0,  r2
397         ldm             r1,  {r4-r6}
398         add             r1,  r1,  r2
399         pld             [r1]
400         ALIGN_DWORD     3,   r4,  r5,  r6
401         subs            r3,  r3,  #1
402         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
403         stm             r0,  {r10-r11}
404         add             r0,  r0,  r2
405         bne             6b
406         pop             {r4-r11,pc}
407 endfunc
408
409 function ff_put_no_rnd_pixels8_y2_arm, export=1, align=5
410         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
411         @ block = word aligned, pixles = unaligned
412         pld             [r1]
413         push            {r4-r11,lr}
414         mov             r3,  r3,  lsr #1
415         ldr             r12, =0xfefefefe
416         JMP_ALIGN       r5,  r1
417 1:
418         ldm             r1,  {r4-r5}
419         add             r1,  r1,  r2
420 6:      ldm             r1,  {r6-r7}
421         add             r1,  r1,  r2
422         pld             [r1]
423         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
424         ldm             r1,  {r4-r5}
425         add             r1,  r1,  r2
426         stm             r0,  {r8-r9}
427         add             r0,  r0,  r2
428         pld             [r1]
429         NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
430         subs            r3,  r3,  #1
431         stm             r0,  {r8-r9}
432         add             r0,  r0,  r2
433         bne             6b
434         pop             {r4-r11,pc}
435         .align 5
436 2:
437         ldm             r1,  {r4-r6}
438         add             r1,  r1,  r2
439         pld             [r1]
440         ALIGN_DWORD     1,   r4,  r5,  r6
441 6:      ldm             r1,  {r7-r9}
442         add             r1,  r1,  r2
443         pld             [r1]
444         ALIGN_DWORD     1,   r7,  r8,  r9
445         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
446         stm             r0,  {r10-r11}
447         add             r0,  r0,  r2
448         ldm             r1,  {r4-r6}
449         add             r1,  r1,  r2
450         pld             [r1]
451         ALIGN_DWORD     1,   r4,  r5,  r6
452         subs            r3,  r3,  #1
453         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
454         stm             r0,  {r10-r11}
455         add             r0,  r0,  r2
456         bne             6b
457         pop             {r4-r11,pc}
458         .align 5
459 3:
460         ldm             r1,  {r4-r6}
461         add             r1,  r1,  r2
462         pld             [r1]
463         ALIGN_DWORD     2,   r4,  r5,  r6
464 6:      ldm             r1,  {r7-r9}
465         add             r1,  r1,  r2
466         pld             [r1]
467         ALIGN_DWORD     2,   r7,  r8,  r9
468         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
469         stm             r0,  {r10-r11}
470         add             r0,  r0,  r2
471         ldm             r1,  {r4-r6}
472         add             r1,  r1,  r2
473         pld             [r1]
474         ALIGN_DWORD     2,   r4,  r5,  r6
475         subs            r3,  r3,  #1
476         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
477         stm             r0,  {r10-r11}
478         add             r0,  r0,  r2
479         bne             6b
480         pop             {r4-r11,pc}
481         .align 5
482 4:
483         ldm             r1,  {r4-r6}
484         add             r1,  r1,  r2
485         pld             [r1]
486         ALIGN_DWORD     3,   r4,  r5,  r6
487 6:      ldm             r1,  {r7-r9}
488         add             r1,  r1,  r2
489         pld             [r1]
490         ALIGN_DWORD     3,   r7,  r8,  r9
491         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
492         stm             r0,  {r10-r11}
493         add             r0,  r0,  r2
494         ldm             r1,  {r4-r6}
495         add             r1,  r1,  r2
496         pld             [r1]
497         ALIGN_DWORD     3,   r4,  r5,  r6
498         subs            r3,  r3,  #1
499         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
500         stm             r0,  {r10-r11}
501         add             r0,  r0,  r2
502         bne             6b
503         pop             {r4-r11,pc}
504 endfunc
505
506         .ltorg
507
508 @ ----------------------------------------------------------------
509 .macro  RND_XY2_IT align, rnd
510         @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
511         @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
512 .if \align == 0
513         ldm             r1,  {r6-r8}
514 .elseif \align == 3
515         ldm             r1,  {r5-r7}
516 .else
517         ldm             r1,  {r8-r10}
518 .endif
519         add             r1,  r1,  r2
520         pld             [r1]
521 .if \align == 0
522         ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
523 .elseif \align == 1
524         ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
525         ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
526 .elseif \align == 2
527         ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
528         ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
529 .elseif \align == 3
530         ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
531 .endif
532         ldr             r14, =0x03030303
533         tst             r3,  #1
534         and             r8,  r4,  r14
535         and             r9,  r5,  r14
536         and             r10, r6,  r14
537         and             r11, r7,  r14
538         it              eq
539         andeq           r14, r14, r14, \rnd #1
540         add             r8,  r8,  r10
541         add             r9,  r9,  r11
542         ldr             r12, =0xfcfcfcfc >> 2
543         itt             eq
544         addeq           r8,  r8,  r14
545         addeq           r9,  r9,  r14
546         and             r4,  r12, r4,  lsr #2
547         and             r5,  r12, r5,  lsr #2
548         and             r6,  r12, r6,  lsr #2
549         and             r7,  r12, r7,  lsr #2
550         add             r10, r4,  r6
551         add             r11, r5,  r7
552         subs            r3,  r3,  #1
553 .endm
554
555 .macro RND_XY2_EXPAND align, rnd
556         RND_XY2_IT      \align, \rnd
557 6:      push            {r8-r11}
558         RND_XY2_IT      \align, \rnd
559         pop             {r4-r7}
560         add             r4,  r4,  r8
561         add             r5,  r5,  r9
562         ldr             r14, =0x0f0f0f0f
563         add             r6,  r6,  r10
564         add             r7,  r7,  r11
565         and             r4,  r14, r4,  lsr #2
566         and             r5,  r14, r5,  lsr #2
567         add             r4,  r4,  r6
568         add             r5,  r5,  r7
569         stm             r0,  {r4-r5}
570         add             r0,  r0,  r2
571         bge             6b
572         pop             {r4-r11,pc}
573 .endm
574
575 function ff_put_pixels8_xy2_arm, export=1, align=5
576         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
577         @ block = word aligned, pixles = unaligned
578         pld             [r1]
579         push            {r4-r11,lr} @ R14 is also called LR
580         JMP_ALIGN       r5,  r1
581 1:      RND_XY2_EXPAND  0, lsl
582         .align 5
583 2:      RND_XY2_EXPAND  1, lsl
584         .align 5
585 3:      RND_XY2_EXPAND  2, lsl
586         .align 5
587 4:      RND_XY2_EXPAND  3, lsl
588 endfunc
589
590 function ff_put_no_rnd_pixels8_xy2_arm, export=1, align=5
591         @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
592         @ block = word aligned, pixles = unaligned
593         pld             [r1]
594         push            {r4-r11,lr}
595         JMP_ALIGN       r5,  r1
596 1:      RND_XY2_EXPAND  0, lsr
597         .align 5
598 2:      RND_XY2_EXPAND  1, lsr
599         .align 5
600 3:      RND_XY2_EXPAND  2, lsr
601         .align 5
602 4:      RND_XY2_EXPAND  3, lsr
603 endfunc