]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/dsputil_arm.S
ARM: clean up file/function naming conventions
[ffmpeg] / libavcodec / arm / dsputil_arm.S
1 @
2 @ ARMv4 optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of FFmpeg.
6 @
7 @ FFmpeg is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ FFmpeg is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with FFmpeg; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "asm.S"
24
25         preserve8
26
27 #if !HAVE_PLD
28 .macro pld reg
29 .endm
30 #endif
31
32 #if HAVE_ARMV5TE
33 function ff_prefetch_arm, export=1
34         subs            r2,  r2,  #1
35         pld             [r0]
36         add             r0,  r0,  r1
37         bne             ff_prefetch_arm
38         bx              lr
39         .endfunc
40 #endif
41
42 .macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43         mov             \Rd0, \Rn0, lsr #(\shift * 8)
44         mov             \Rd1, \Rn1, lsr #(\shift * 8)
45         mov             \Rd2, \Rn2, lsr #(\shift * 8)
46         mov             \Rd3, \Rn3, lsr #(\shift * 8)
47         orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48         orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49         orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50         orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51 .endm
52 .macro  ALIGN_DWORD shift, R0, R1, R2
53         mov             \R0, \R0, lsr #(\shift * 8)
54         orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
55         mov             \R1, \R1, lsr #(\shift * 8)
56         orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
57 .endm
58 .macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59         mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
60         mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
61         orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62         orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63 .endm
64
65 .macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66         @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67         @ Rmask = 0xFEFEFEFE
68         @ Rn = destroy
69         eor             \Rd0, \Rn0, \Rm0
70         eor             \Rd1, \Rn1, \Rm1
71         orr             \Rn0, \Rn0, \Rm0
72         orr             \Rn1, \Rn1, \Rm1
73         and             \Rd0, \Rd0, \Rmask
74         and             \Rd1, \Rd1, \Rmask
75         sub             \Rd0, \Rn0, \Rd0, lsr #1
76         sub             \Rd1, \Rn1, \Rd1, lsr #1
77 .endm
78
79 .macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80         @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81         @ Rmask = 0xFEFEFEFE
82         @ Rn = destroy
83         eor             \Rd0, \Rn0, \Rm0
84         eor             \Rd1, \Rn1, \Rm1
85         and             \Rn0, \Rn0, \Rm0
86         and             \Rn1, \Rn1, \Rm1
87         and             \Rd0, \Rd0, \Rmask
88         and             \Rd1, \Rd1, \Rmask
89         add             \Rd0, \Rn0, \Rd0, lsr #1
90         add             \Rd1, \Rn1, \Rd1, lsr #1
91 .endm
92
93 .macro  JMP_ALIGN tmp, reg
94         ands            \tmp, \reg, #3
95         bic             \reg, \reg, #3
96         beq             1f
97         subs            \tmp, \tmp, #1
98         beq             2f
99         subs            \tmp, \tmp, #1
100         beq             3f
101         b    4f
102 .endm
103
104 @ ----------------------------------------------------------------
105         .align 5
106 function put_pixels16_arm, export=1
107         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108         @ block = word aligned, pixles = unaligned
109         pld             [r1]
110         push            {r4-r11, lr}
111         JMP_ALIGN       r5,  r1
112 1:
113         ldm             r1,  {r4-r7}
114         add             r1,  r1,  r2
115         stm             r0,  {r4-r7}
116         pld             [r1]
117         subs            r3,  r3,  #1
118         add             r0,  r0,  r2
119         bne             1b
120         pop             {r4-r11, pc}
121         .align 5
122 2:
123         ldm             r1,  {r4-r8}
124         add             r1,  r1,  r2
125         ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
126         pld             [r1]
127         subs            r3,  r3,  #1
128         stm             r0,  {r9-r12}
129         add             r0,  r0,  r2
130         bne             2b
131         pop             {r4-r11, pc}
132         .align 5
133 3:
134         ldm             r1,  {r4-r8}
135         add             r1,  r1,  r2
136         ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
137         pld             [r1]
138         subs            r3,  r3,  #1
139         stm             r0,  {r9-r12}
140         add             r0,  r0,  r2
141         bne             3b
142         pop             {r4-r11, pc}
143         .align 5
144 4:
145         ldm             r1,  {r4-r8}
146         add             r1,  r1,  r2
147         ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
148         pld             [r1]
149         subs            r3,  r3,  #1
150         stm             r0,  {r9-r12}
151         add             r0,  r0,  r2
152         bne             4b
153         pop             {r4-r11,pc}
154         .endfunc
155
156 @ ----------------------------------------------------------------
157         .align 5
158 function put_pixels8_arm, export=1
159         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160         @ block = word aligned, pixles = unaligned
161         pld             [r1]
162         push            {r4-r5,lr}
163         JMP_ALIGN       r5,  r1
164 1:
165         ldm             r1,  {r4-r5}
166         add             r1,  r1,  r2
167         subs            r3,  r3,  #1
168         pld             [r1]
169         stm             r0,  {r4-r5}
170         add             r0,  r0,  r2
171         bne             1b
172         pop             {r4-r5,pc}
173         .align 5
174 2:
175         ldm             r1,  {r4-r5, r12}
176         add             r1,  r1,  r2
177         ALIGN_DWORD     1,   r4,  r5,  r12
178         pld             [r1]
179         subs            r3,  r3,  #1
180         stm             r0,  {r4-r5}
181         add             r0,  r0,  r2
182         bne             2b
183         pop             {r4-r5,pc}
184         .align 5
185 3:
186         ldm             r1,  {r4-r5, r12}
187         add             r1,  r1,  r2
188         ALIGN_DWORD     2,   r4,  r5,  r12
189         pld             [r1]
190         subs            r3,  r3,  #1
191         stm             r0,  {r4-r5}
192         add             r0,  r0,  r2
193         bne             3b
194         pop             {r4-r5,pc}
195         .align 5
196 4:
197         ldm             r1,  {r4-r5, r12}
198         add             r1,  r1,  r2
199         ALIGN_DWORD     3,   r4,  r5,  r12
200         pld             [r1]
201         subs            r3,  r3,  #1
202         stm             r0,  {r4-r5}
203         add             r0,  r0,  r2
204         bne             4b
205         pop             {r4-r5,pc}
206         .endfunc
207
208 @ ----------------------------------------------------------------
209         .align 5
210 function put_pixels8_x2_arm, export=1
211         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212         @ block = word aligned, pixles = unaligned
213         pld             [r1]
214         push            {r4-r10,lr}
215         ldr             r12, =0xfefefefe
216         JMP_ALIGN       r5,  r1
217 1:
218         ldm             r1,  {r4-r5, r10}
219         add             r1,  r1,  r2
220         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
221         pld             [r1]
222         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
223         subs            r3,  r3,  #1
224         stm             r0,  {r8-r9}
225         add             r0,  r0,  r2
226         bne             1b
227         pop             {r4-r10,pc}
228         .align 5
229 2:
230         ldm             r1,  {r4-r5, r10}
231         add             r1,  r1,  r2
232         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
233         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
234         pld             [r1]
235         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
236         subs            r3,  r3,  #1
237         stm             r0,  {r4-r5}
238         add             r0,  r0,  r2
239         bne             2b
240         pop             {r4-r10,pc}
241         .align 5
242 3:
243         ldm             r1,  {r4-r5, r10}
244         add             r1,  r1,  r2
245         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
246         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
247         pld             [r1]
248         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
249         subs            r3,  r3,  #1
250         stm             r0,  {r4-r5}
251         add             r0,  r0,  r2
252         bne             3b
253         pop             {r4-r10,pc}
254         .align 5
255 4:
256         ldm             r1,  {r4-r5, r10}
257         add             r1,  r1,  r2
258         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
259         pld             [r1]
260         RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
261         subs            r3,  r3,  #1
262         stm             r0,  {r8-r9}
263         add             r0,  r0,  r2
264         bne             4b
265         pop             {r4-r10,pc}
266         .endfunc
267
268         .align 5
269 function put_no_rnd_pixels8_x2_arm, export=1
270         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271         @ block = word aligned, pixles = unaligned
272         pld             [r1]
273         push            {r4-r10,lr}
274         ldr             r12, =0xfefefefe
275         JMP_ALIGN       r5,  r1
276 1:
277         ldm             r1,  {r4-r5, r10}
278         add             r1,  r1,  r2
279         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
280         pld             [r1]
281         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
282         subs            r3,  r3,  #1
283         stm             r0,  {r8-r9}
284         add             r0,  r0,  r2
285         bne             1b
286         pop             {r4-r10,pc}
287         .align 5
288 2:
289         ldm             r1,  {r4-r5, r10}
290         add             r1,  r1,  r2
291         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
292         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
293         pld             [r1]
294         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
295         subs            r3,  r3,  #1
296         stm             r0,  {r4-r5}
297         add             r0,  r0,  r2
298         bne             2b
299         pop             {r4-r10,pc}
300         .align 5
301 3:
302         ldm             r1,  {r4-r5, r10}
303         add             r1,  r1,  r2
304         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
305         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
306         pld             [r1]
307         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
308         subs            r3,  r3,  #1
309         stm             r0,  {r4-r5}
310         add             r0,  r0,  r2
311         bne             3b
312         pop             {r4-r10,pc}
313         .align 5
314 4:
315         ldm             r1,  {r4-r5, r10}
316         add             r1,  r1,  r2
317         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
318         pld             [r1]
319         NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
320         subs            r3,  r3,  #1
321         stm             r0,  {r8-r9}
322         add             r0,  r0,  r2
323         bne             4b
324         pop             {r4-r10,pc}
325         .endfunc
326
327
328 @ ----------------------------------------------------------------
329         .align 5
330 function put_pixels8_y2_arm, export=1
331         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332         @ block = word aligned, pixles = unaligned
333         pld             [r1]
334         push            {r4-r11,lr}
335         mov             r3,  r3,  lsr #1
336         ldr             r12, =0xfefefefe
337         JMP_ALIGN       r5,  r1
338 1:
339         ldm             r1,  {r4-r5}
340         add             r1,  r1,  r2
341 6:      ldm             r1,  {r6-r7}
342         add             r1,  r1,  r2
343         pld             [r1]
344         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
345         ldm             r1,  {r4-r5}
346         add             r1,  r1,  r2
347         stm             r0,  {r8-r9}
348         add             r0,  r0,  r2
349         pld             [r1]
350         RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
351         subs            r3,  r3,  #1
352         stm             r0,  {r8-r9}
353         add             r0,  r0,  r2
354         bne             6b
355         pop             {r4-r11,pc}
356         .align 5
357 2:
358         ldm             r1,  {r4-r6}
359         add             r1,  r1,  r2
360         pld             [r1]
361         ALIGN_DWORD     1,   r4,  r5,  r6
362 6:      ldm             r1,  {r7-r9}
363         add             r1,  r1,  r2
364         pld             [r1]
365         ALIGN_DWORD     1,   r7,  r8,  r9
366         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
367         stm             r0,  {r10-r11}
368         add             r0,  r0,  r2
369         ldm             r1,  {r4-r6}
370         add             r1,  r1,  r2
371         pld             [r1]
372         ALIGN_DWORD     1,   r4,  r5,  r6
373         subs            r3,  r3,  #1
374         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
375         stm             r0,  {r10-r11}
376         add             r0,  r0,  r2
377         bne             6b
378         pop             {r4-r11,pc}
379         .align 5
380 3:
381         ldm             r1,  {r4-r6}
382         add             r1,  r1,  r2
383         pld             [r1]
384         ALIGN_DWORD     2,   r4,  r5,  r6
385 6:      ldm             r1,  {r7-r9}
386         add             r1,  r1,  r2
387         pld             [r1]
388         ALIGN_DWORD     2,   r7,  r8,  r9
389         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
390         stm             r0,  {r10-r11}
391         add             r0,  r0,  r2
392         ldm             r1,  {r4-r6}
393         add             r1,  r1,  r2
394         pld             [r1]
395         ALIGN_DWORD     2,   r4,  r5,  r6
396         subs            r3,  r3,  #1
397         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
398         stm             r0,  {r10-r11}
399         add             r0,  r0,  r2
400         bne             6b
401         pop             {r4-r11,pc}
402         .align 5
403 4:
404         ldm             r1,  {r4-r6}
405         add             r1,  r1,  r2
406         pld             [r1]
407         ALIGN_DWORD     3,   r4,  r5,  r6
408 6:      ldm             r1,  {r7-r9}
409         add             r1,  r1,  r2
410         pld             [r1]
411         ALIGN_DWORD     3,   r7,  r8,  r9
412         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
413         stm             r0,  {r10-r11}
414         add             r0,  r0,  r2
415         ldm             r1,  {r4-r6}
416         add             r1,  r1,  r2
417         pld             [r1]
418         ALIGN_DWORD     3,   r4,  r5,  r6
419         subs            r3,  r3,  #1
420         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
421         stm             r0,  {r10-r11}
422         add             r0,  r0,  r2
423         bne             6b
424         pop             {r4-r11,pc}
425         .endfunc
426
427         .align 5
428 function put_no_rnd_pixels8_y2_arm, export=1
429         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430         @ block = word aligned, pixles = unaligned
431         pld             [r1]
432         push            {r4-r11,lr}
433         mov             r3,  r3,  lsr #1
434         ldr             r12, =0xfefefefe
435         JMP_ALIGN       r5,  r1
436 1:
437         ldm             r1,  {r4-r5}
438         add             r1,  r1,  r2
439 6:      ldm             r1,  {r6-r7}
440         add             r1,  r1,  r2
441         pld             [r1]
442         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
443         ldm             r1,  {r4-r5}
444         add             r1,  r1,  r2
445         stm             r0,  {r8-r9}
446         add             r0,  r0,  r2
447         pld             [r1]
448         NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
449         subs            r3,  r3,  #1
450         stm             r0,  {r8-r9}
451         add             r0,  r0,  r2
452         bne             6b
453         pop             {r4-r11,pc}
454         .align 5
455 2:
456         ldm             r1,  {r4-r6}
457         add             r1,  r1,  r2
458         pld             [r1]
459         ALIGN_DWORD     1,   r4,  r5,  r6
460 6:      ldm             r1,  {r7-r9}
461         add             r1,  r1,  r2
462         pld             [r1]
463         ALIGN_DWORD     1,   r7,  r8,  r9
464         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
465         stm             r0,  {r10-r11}
466         add             r0,  r0,  r2
467         ldm             r1,  {r4-r6}
468         add             r1,  r1,  r2
469         pld             [r1]
470         ALIGN_DWORD     1,   r4,  r5,  r6
471         subs            r3,  r3,  #1
472         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
473         stm             r0,  {r10-r11}
474         add             r0,  r0,  r2
475         bne             6b
476         pop             {r4-r11,pc}
477         .align 5
478 3:
479         ldm             r1,  {r4-r6}
480         add             r1,  r1,  r2
481         pld             [r1]
482         ALIGN_DWORD     2,   r4,  r5,  r6
483 6:      ldm             r1,  {r7-r9}
484         add             r1,  r1,  r2
485         pld             [r1]
486         ALIGN_DWORD     2,   r7,  r8,  r9
487         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
488         stm             r0,  {r10-r11}
489         add             r0,  r0,  r2
490         ldm             r1,  {r4-r6}
491         add             r1,  r1,  r2
492         pld             [r1]
493         ALIGN_DWORD     2,   r4,  r5,  r6
494         subs            r3,  r3,  #1
495         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
496         stm             r0,  {r10-r11}
497         add             r0,  r0,  r2
498         bne             6b
499         pop             {r4-r11,pc}
500         .align 5
501 4:
502         ldm             r1,  {r4-r6}
503         add             r1,  r1,  r2
504         pld             [r1]
505         ALIGN_DWORD     3,   r4,  r5,  r6
506 6:      ldm             r1,  {r7-r9}
507         add             r1,  r1,  r2
508         pld             [r1]
509         ALIGN_DWORD     3,   r7,  r8,  r9
510         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
511         stm             r0,  {r10-r11}
512         add             r0,  r0,  r2
513         ldm             r1,  {r4-r6}
514         add             r1,  r1,  r2
515         pld             [r1]
516         ALIGN_DWORD     3,   r4,  r5,  r6
517         subs            r3,  r3,  #1
518         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
519         stm             r0,  {r10-r11}
520         add             r0,  r0,  r2
521         bne             6b
522         pop             {r4-r11,pc}
523         .endfunc
524
525         .ltorg
526
527 @ ----------------------------------------------------------------
528 .macro  RND_XY2_IT align, rnd
529         @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530         @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531 .if \align == 0
532         ldm             r1,  {r6-r8}
533 .elseif \align == 3
534         ldm             r1,  {r5-r7}
535 .else
536         ldm             r1,  {r8-r10}
537 .endif
538         add             r1,  r1,  r2
539         pld             [r1]
540 .if \align == 0
541         ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
542 .elseif \align == 1
543         ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
544         ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
545 .elseif \align == 2
546         ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
547         ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
548 .elseif \align == 3
549         ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
550 .endif
551         ldr             r14, =0x03030303
552         tst             r3,  #1
553         and             r8,  r4,  r14
554         and             r9,  r5,  r14
555         and             r10, r6,  r14
556         and             r11, r7,  r14
557         andeq           r14, r14, r14, \rnd #1
558         add             r8,  r8,  r10
559         add             r9,  r9,  r11
560         ldr             r12, =0xfcfcfcfc >> 2
561         addeq           r8,  r8,  r14
562         addeq           r9,  r9,  r14
563         and             r4,  r12, r4,  lsr #2
564         and             r5,  r12, r5,  lsr #2
565         and             r6,  r12, r6,  lsr #2
566         and             r7,  r12, r7,  lsr #2
567         add             r10, r4,  r6
568         add             r11, r5,  r7
569         subs            r3,  r3,  #1
570 .endm
571
572 .macro RND_XY2_EXPAND align, rnd
573         RND_XY2_IT      \align, \rnd
574 6:      push            {r8-r11}
575         RND_XY2_IT      \align, \rnd
576         pop             {r4-r7}
577         add             r4,  r4,  r8
578         add             r5,  r5,  r9
579         ldr             r14, =0x0f0f0f0f
580         add             r6,  r6,  r10
581         add             r7,  r7,  r11
582         and             r4,  r14, r4,  lsr #2
583         and             r5,  r14, r5,  lsr #2
584         add             r4,  r4,  r6
585         add             r5,  r5,  r7
586         stm             r0,  {r4-r5}
587         add             r0,  r0,  r2
588         bge             6b
589         pop             {r4-r11,pc}
590 .endm
591
592         .align 5
593 function put_pixels8_xy2_arm, export=1
594         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595         @ block = word aligned, pixles = unaligned
596         pld             [r1]
597         push            {r4-r11,lr} @ R14 is also called LR
598         JMP_ALIGN       r5,  r1
599 1:
600         RND_XY2_EXPAND  0, lsl
601
602         .align 5
603 2:
604         RND_XY2_EXPAND  1, lsl
605
606         .align 5
607 3:
608         RND_XY2_EXPAND  2, lsl
609
610         .align 5
611 4:
612         RND_XY2_EXPAND  3, lsl
613         .endfunc
614
615         .align 5
616 function put_no_rnd_pixels8_xy2_arm, export=1
617         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618         @ block = word aligned, pixles = unaligned
619         pld             [r1]
620         push            {r4-r11,lr}
621         JMP_ALIGN       r5,  r1
622 1:
623         RND_XY2_EXPAND  0, lsr
624
625         .align 5
626 2:
627         RND_XY2_EXPAND  1, lsr
628
629         .align 5
630 3:
631         RND_XY2_EXPAND  2, lsr
632
633         .align 5
634 4:
635         RND_XY2_EXPAND  3, lsr
636         .endfunc
637
638         .align 5
639 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
640 function ff_add_pixels_clamped_ARM, export=1
641         push            {r4-r10}
642         mov             r10, #8
643 1:
644         ldr             r4,  [r1]               /* load dest */
645         /* block[0] and block[1]*/
646         ldrsh           r5,  [r0]
647         ldrsh           r7,  [r0, #2]
648         and             r6,  r4,  #0xFF
649         and             r8,  r4,  #0xFF00
650         add             r6,  r5,  r6
651         add             r8,  r7,  r8,  lsr #8
652         mvn             r5,  r5
653         mvn             r7,  r7
654         tst             r6,  #0x100
655         movne           r6,  r5,  lsr #24
656         tst             r8,  #0x100
657         movne           r8,  r7,  lsr #24
658         mov             r9,  r6
659         ldrsh           r5,  [r0, #4]           /* moved form [A] */
660         orr             r9,  r9,  r8,  lsl #8
661         /* block[2] and block[3] */
662         /* [A] */
663         ldrsh           r7,  [r0, #6]
664         and             r6,  r4,  #0xFF0000
665         and             r8,  r4,  #0xFF000000
666         add             r6,  r5,  r6,  lsr #16
667         add             r8,  r7,  r8,  lsr #24
668         mvn             r5,  r5
669         mvn             r7,  r7
670         tst             r6,  #0x100
671         movne           r6,  r5,  lsr #24
672         tst             r8,  #0x100
673         movne           r8,  r7,  lsr #24
674         orr             r9,  r9,  r6,  lsl #16
675         ldr             r4,  [r1, #4]           /* moved form [B] */
676         orr             r9,  r9,  r8,  lsl #24
677         /* store dest */
678         ldrsh           r5,  [r0, #8]           /* moved form [C] */
679         str             r9,  [r1]
680
681         /* load dest */
682         /* [B] */
683         /* block[4] and block[5] */
684         /* [C] */
685         ldrsh           r7,  [r0, #10]
686         and             r6,  r4,  #0xFF
687         and             r8,  r4,  #0xFF00
688         add             r6,  r5,  r6
689         add             r8,  r7,  r8,  lsr #8
690         mvn             r5,  r5
691         mvn             r7,  r7
692         tst             r6,  #0x100
693         movne           r6,  r5,  lsr #24
694         tst             r8,  #0x100
695         movne           r8,  r7,  lsr #24
696         mov             r9,  r6
697         ldrsh           r5,  [r0, #12]          /* moved from [D] */
698         orr             r9,  r9,  r8,  lsl #8
699         /* block[6] and block[7] */
700         /* [D] */
701         ldrsh           r7,  [r0, #14]
702         and             r6,  r4,  #0xFF0000
703         and             r8,  r4,  #0xFF000000
704         add             r6,  r5,  r6,  lsr #16
705         add             r8,  r7,  r8,  lsr #24
706         mvn             r5,  r5
707         mvn             r7,  r7
708         tst             r6,  #0x100
709         movne           r6,  r5,  lsr #24
710         tst             r8,  #0x100
711         movne           r8,  r7,  lsr #24
712         orr             r9,  r9,  r6,  lsl #16
713         add             r0,  r0,  #16           /* moved from [E] */
714         orr             r9,  r9,  r8,  lsl #24
715         subs            r10, r10, #1            /* moved from [F] */
716         /* store dest */
717         str             r9,  [r1, #4]
718
719         /* [E] */
720         /* [F] */
721         add             r1,  r1,  r2
722         bne             1b
723
724         pop             {r4-r10}
725         bx              lr
726         .endfunc