]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/dsputil_arm.S
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / arm / dsputil_arm.S
1 @
2 @ ARMv4 optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of FFmpeg.
6 @
7 @ FFmpeg is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ FFmpeg is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with FFmpeg; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "asm.S"
24
25         preserve8
26
27 #if HAVE_ARMV5TE
28 function ff_prefetch_arm, export=1
29         subs            r2,  r2,  #1
30         pld             [r0]
31         add             r0,  r0,  r1
32         bne             ff_prefetch_arm
33         bx              lr
34 endfunc
35 #else
36 #define pld @
37 #endif
38
39 .macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
40         mov             \Rd0, \Rn0, lsr #(\shift * 8)
41         mov             \Rd1, \Rn1, lsr #(\shift * 8)
42         mov             \Rd2, \Rn2, lsr #(\shift * 8)
43         mov             \Rd3, \Rn3, lsr #(\shift * 8)
44         orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
45         orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
46         orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
47         orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
48 .endm
49 .macro  ALIGN_DWORD shift, R0, R1, R2
50         mov             \R0, \R0, lsr #(\shift * 8)
51         orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
52         mov             \R1, \R1, lsr #(\shift * 8)
53         orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
54 .endm
55 .macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
56         mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
57         mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
58         orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
59         orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
60 .endm
61
62 .macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
63         @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
64         @ Rmask = 0xFEFEFEFE
65         @ Rn = destroy
66         eor             \Rd0, \Rn0, \Rm0
67         eor             \Rd1, \Rn1, \Rm1
68         orr             \Rn0, \Rn0, \Rm0
69         orr             \Rn1, \Rn1, \Rm1
70         and             \Rd0, \Rd0, \Rmask
71         and             \Rd1, \Rd1, \Rmask
72         sub             \Rd0, \Rn0, \Rd0, lsr #1
73         sub             \Rd1, \Rn1, \Rd1, lsr #1
74 .endm
75
76 .macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
77         @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
78         @ Rmask = 0xFEFEFEFE
79         @ Rn = destroy
80         eor             \Rd0, \Rn0, \Rm0
81         eor             \Rd1, \Rn1, \Rm1
82         and             \Rn0, \Rn0, \Rm0
83         and             \Rn1, \Rn1, \Rm1
84         and             \Rd0, \Rd0, \Rmask
85         and             \Rd1, \Rd1, \Rmask
86         add             \Rd0, \Rn0, \Rd0, lsr #1
87         add             \Rd1, \Rn1, \Rd1, lsr #1
88 .endm
89
90 .macro  JMP_ALIGN tmp, reg
91         ands            \tmp, \reg, #3
92         bic             \reg, \reg, #3
93         beq             1f
94         subs            \tmp, \tmp, #1
95         beq             2f
96         subs            \tmp, \tmp, #1
97         beq             3f
98         b    4f
99 .endm
100
101 @ ----------------------------------------------------------------
102         .align 5
103 function ff_put_pixels16_arm, export=1
104         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
105         @ block = word aligned, pixles = unaligned
106         pld             [r1]
107         push            {r4-r11, lr}
108         JMP_ALIGN       r5,  r1
109 1:
110         ldm             r1,  {r4-r7}
111         add             r1,  r1,  r2
112         stm             r0,  {r4-r7}
113         pld             [r1]
114         subs            r3,  r3,  #1
115         add             r0,  r0,  r2
116         bne             1b
117         pop             {r4-r11, pc}
118         .align 5
119 2:
120         ldm             r1,  {r4-r8}
121         add             r1,  r1,  r2
122         ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
123         pld             [r1]
124         subs            r3,  r3,  #1
125         stm             r0,  {r9-r12}
126         add             r0,  r0,  r2
127         bne             2b
128         pop             {r4-r11, pc}
129         .align 5
130 3:
131         ldm             r1,  {r4-r8}
132         add             r1,  r1,  r2
133         ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
134         pld             [r1]
135         subs            r3,  r3,  #1
136         stm             r0,  {r9-r12}
137         add             r0,  r0,  r2
138         bne             3b
139         pop             {r4-r11, pc}
140         .align 5
141 4:
142         ldm             r1,  {r4-r8}
143         add             r1,  r1,  r2
144         ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
145         pld             [r1]
146         subs            r3,  r3,  #1
147         stm             r0,  {r9-r12}
148         add             r0,  r0,  r2
149         bne             4b
150         pop             {r4-r11,pc}
151 endfunc
152
153 @ ----------------------------------------------------------------
154         .align 5
155 function ff_put_pixels8_arm, export=1
156         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
157         @ block = word aligned, pixles = unaligned
158         pld             [r1]
159         push            {r4-r5,lr}
160         JMP_ALIGN       r5,  r1
161 1:
162         ldm             r1,  {r4-r5}
163         add             r1,  r1,  r2
164         subs            r3,  r3,  #1
165         pld             [r1]
166         stm             r0,  {r4-r5}
167         add             r0,  r0,  r2
168         bne             1b
169         pop             {r4-r5,pc}
170         .align 5
171 2:
172         ldm             r1,  {r4-r5, r12}
173         add             r1,  r1,  r2
174         ALIGN_DWORD     1,   r4,  r5,  r12
175         pld             [r1]
176         subs            r3,  r3,  #1
177         stm             r0,  {r4-r5}
178         add             r0,  r0,  r2
179         bne             2b
180         pop             {r4-r5,pc}
181         .align 5
182 3:
183         ldm             r1,  {r4-r5, r12}
184         add             r1,  r1,  r2
185         ALIGN_DWORD     2,   r4,  r5,  r12
186         pld             [r1]
187         subs            r3,  r3,  #1
188         stm             r0,  {r4-r5}
189         add             r0,  r0,  r2
190         bne             3b
191         pop             {r4-r5,pc}
192         .align 5
193 4:
194         ldm             r1,  {r4-r5, r12}
195         add             r1,  r1,  r2
196         ALIGN_DWORD     3,   r4,  r5,  r12
197         pld             [r1]
198         subs            r3,  r3,  #1
199         stm             r0,  {r4-r5}
200         add             r0,  r0,  r2
201         bne             4b
202         pop             {r4-r5,pc}
203 endfunc
204
205 @ ----------------------------------------------------------------
206         .align 5
207 function ff_put_pixels8_x2_arm, export=1
208         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
209         @ block = word aligned, pixles = unaligned
210         pld             [r1]
211         push            {r4-r10,lr}
212         ldr             r12, =0xfefefefe
213         JMP_ALIGN       r5,  r1
214 1:
215         ldm             r1,  {r4-r5, r10}
216         add             r1,  r1,  r2
217         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
218         pld             [r1]
219         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
220         subs            r3,  r3,  #1
221         stm             r0,  {r8-r9}
222         add             r0,  r0,  r2
223         bne             1b
224         pop             {r4-r10,pc}
225         .align 5
226 2:
227         ldm             r1,  {r4-r5, r10}
228         add             r1,  r1,  r2
229         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
230         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
231         pld             [r1]
232         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
233         subs            r3,  r3,  #1
234         stm             r0,  {r4-r5}
235         add             r0,  r0,  r2
236         bne             2b
237         pop             {r4-r10,pc}
238         .align 5
239 3:
240         ldm             r1,  {r4-r5, r10}
241         add             r1,  r1,  r2
242         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
243         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
244         pld             [r1]
245         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
246         subs            r3,  r3,  #1
247         stm             r0,  {r4-r5}
248         add             r0,  r0,  r2
249         bne             3b
250         pop             {r4-r10,pc}
251         .align 5
252 4:
253         ldm             r1,  {r4-r5, r10}
254         add             r1,  r1,  r2
255         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
256         pld             [r1]
257         RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
258         subs            r3,  r3,  #1
259         stm             r0,  {r8-r9}
260         add             r0,  r0,  r2
261         bne             4b
262         pop             {r4-r10,pc}
263 endfunc
264
265         .align 5
266 function ff_put_no_rnd_pixels8_x2_arm, export=1
267         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
268         @ block = word aligned, pixles = unaligned
269         pld             [r1]
270         push            {r4-r10,lr}
271         ldr             r12, =0xfefefefe
272         JMP_ALIGN       r5,  r1
273 1:
274         ldm             r1,  {r4-r5, r10}
275         add             r1,  r1,  r2
276         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
277         pld             [r1]
278         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
279         subs            r3,  r3,  #1
280         stm             r0,  {r8-r9}
281         add             r0,  r0,  r2
282         bne             1b
283         pop             {r4-r10,pc}
284         .align 5
285 2:
286         ldm             r1,  {r4-r5, r10}
287         add             r1,  r1,  r2
288         ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
289         ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
290         pld             [r1]
291         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
292         subs            r3,  r3,  #1
293         stm             r0,  {r4-r5}
294         add             r0,  r0,  r2
295         bne             2b
296         pop             {r4-r10,pc}
297         .align 5
298 3:
299         ldm             r1,  {r4-r5, r10}
300         add             r1,  r1,  r2
301         ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
302         ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
303         pld             [r1]
304         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
305         subs            r3,  r3,  #1
306         stm             r0,  {r4-r5}
307         add             r0,  r0,  r2
308         bne             3b
309         pop             {r4-r10,pc}
310         .align 5
311 4:
312         ldm             r1,  {r4-r5, r10}
313         add             r1,  r1,  r2
314         ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
315         pld             [r1]
316         NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
317         subs            r3,  r3,  #1
318         stm             r0,  {r8-r9}
319         add             r0,  r0,  r2
320         bne             4b
321         pop             {r4-r10,pc}
322 endfunc
323
324
325 @ ----------------------------------------------------------------
326         .align 5
327 function ff_put_pixels8_y2_arm, export=1
328         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
329         @ block = word aligned, pixles = unaligned
330         pld             [r1]
331         push            {r4-r11,lr}
332         mov             r3,  r3,  lsr #1
333         ldr             r12, =0xfefefefe
334         JMP_ALIGN       r5,  r1
335 1:
336         ldm             r1,  {r4-r5}
337         add             r1,  r1,  r2
338 6:      ldm             r1,  {r6-r7}
339         add             r1,  r1,  r2
340         pld             [r1]
341         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
342         ldm             r1,  {r4-r5}
343         add             r1,  r1,  r2
344         stm             r0,  {r8-r9}
345         add             r0,  r0,  r2
346         pld             [r1]
347         RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
348         subs            r3,  r3,  #1
349         stm             r0,  {r8-r9}
350         add             r0,  r0,  r2
351         bne             6b
352         pop             {r4-r11,pc}
353         .align 5
354 2:
355         ldm             r1,  {r4-r6}
356         add             r1,  r1,  r2
357         pld             [r1]
358         ALIGN_DWORD     1,   r4,  r5,  r6
359 6:      ldm             r1,  {r7-r9}
360         add             r1,  r1,  r2
361         pld             [r1]
362         ALIGN_DWORD     1,   r7,  r8,  r9
363         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
364         stm             r0,  {r10-r11}
365         add             r0,  r0,  r2
366         ldm             r1,  {r4-r6}
367         add             r1,  r1,  r2
368         pld             [r1]
369         ALIGN_DWORD     1,   r4,  r5,  r6
370         subs            r3,  r3,  #1
371         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
372         stm             r0,  {r10-r11}
373         add             r0,  r0,  r2
374         bne             6b
375         pop             {r4-r11,pc}
376         .align 5
377 3:
378         ldm             r1,  {r4-r6}
379         add             r1,  r1,  r2
380         pld             [r1]
381         ALIGN_DWORD     2,   r4,  r5,  r6
382 6:      ldm             r1,  {r7-r9}
383         add             r1,  r1,  r2
384         pld             [r1]
385         ALIGN_DWORD     2,   r7,  r8,  r9
386         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
387         stm             r0,  {r10-r11}
388         add             r0,  r0,  r2
389         ldm             r1,  {r4-r6}
390         add             r1,  r1,  r2
391         pld             [r1]
392         ALIGN_DWORD     2,   r4,  r5,  r6
393         subs            r3,  r3,  #1
394         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
395         stm             r0,  {r10-r11}
396         add             r0,  r0,  r2
397         bne             6b
398         pop             {r4-r11,pc}
399         .align 5
400 4:
401         ldm             r1,  {r4-r6}
402         add             r1,  r1,  r2
403         pld             [r1]
404         ALIGN_DWORD     3,   r4,  r5,  r6
405 6:      ldm             r1,  {r7-r9}
406         add             r1,  r1,  r2
407         pld             [r1]
408         ALIGN_DWORD     3,   r7,  r8,  r9
409         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
410         stm             r0,  {r10-r11}
411         add             r0,  r0,  r2
412         ldm             r1,  {r4-r6}
413         add             r1,  r1,  r2
414         pld             [r1]
415         ALIGN_DWORD     3,   r4,  r5,  r6
416         subs            r3,  r3,  #1
417         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
418         stm             r0,  {r10-r11}
419         add             r0,  r0,  r2
420         bne             6b
421         pop             {r4-r11,pc}
422 endfunc
423
424         .align 5
425 function ff_put_no_rnd_pixels8_y2_arm, export=1
426         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
427         @ block = word aligned, pixles = unaligned
428         pld             [r1]
429         push            {r4-r11,lr}
430         mov             r3,  r3,  lsr #1
431         ldr             r12, =0xfefefefe
432         JMP_ALIGN       r5,  r1
433 1:
434         ldm             r1,  {r4-r5}
435         add             r1,  r1,  r2
436 6:      ldm             r1,  {r6-r7}
437         add             r1,  r1,  r2
438         pld             [r1]
439         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
440         ldm             r1,  {r4-r5}
441         add             r1,  r1,  r2
442         stm             r0,  {r8-r9}
443         add             r0,  r0,  r2
444         pld             [r1]
445         NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
446         subs            r3,  r3,  #1
447         stm             r0,  {r8-r9}
448         add             r0,  r0,  r2
449         bne             6b
450         pop             {r4-r11,pc}
451         .align 5
452 2:
453         ldm             r1,  {r4-r6}
454         add             r1,  r1,  r2
455         pld             [r1]
456         ALIGN_DWORD     1,   r4,  r5,  r6
457 6:      ldm             r1,  {r7-r9}
458         add             r1,  r1,  r2
459         pld             [r1]
460         ALIGN_DWORD     1,   r7,  r8,  r9
461         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
462         stm             r0,  {r10-r11}
463         add             r0,  r0,  r2
464         ldm             r1,  {r4-r6}
465         add             r1,  r1,  r2
466         pld             [r1]
467         ALIGN_DWORD     1,   r4,  r5,  r6
468         subs            r3,  r3,  #1
469         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
470         stm             r0,  {r10-r11}
471         add             r0,  r0,  r2
472         bne             6b
473         pop             {r4-r11,pc}
474         .align 5
475 3:
476         ldm             r1,  {r4-r6}
477         add             r1,  r1,  r2
478         pld             [r1]
479         ALIGN_DWORD     2,   r4,  r5,  r6
480 6:      ldm             r1,  {r7-r9}
481         add             r1,  r1,  r2
482         pld             [r1]
483         ALIGN_DWORD     2,   r7,  r8,  r9
484         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
485         stm             r0,  {r10-r11}
486         add             r0,  r0,  r2
487         ldm             r1,  {r4-r6}
488         add             r1,  r1,  r2
489         pld             [r1]
490         ALIGN_DWORD     2,   r4,  r5,  r6
491         subs            r3,  r3,  #1
492         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
493         stm             r0,  {r10-r11}
494         add             r0,  r0,  r2
495         bne             6b
496         pop             {r4-r11,pc}
497         .align 5
498 4:
499         ldm             r1,  {r4-r6}
500         add             r1,  r1,  r2
501         pld             [r1]
502         ALIGN_DWORD     3,   r4,  r5,  r6
503 6:      ldm             r1,  {r7-r9}
504         add             r1,  r1,  r2
505         pld             [r1]
506         ALIGN_DWORD     3,   r7,  r8,  r9
507         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
508         stm             r0,  {r10-r11}
509         add             r0,  r0,  r2
510         ldm             r1,  {r4-r6}
511         add             r1,  r1,  r2
512         pld             [r1]
513         ALIGN_DWORD     3,   r4,  r5,  r6
514         subs            r3,  r3,  #1
515         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
516         stm             r0,  {r10-r11}
517         add             r0,  r0,  r2
518         bne             6b
519         pop             {r4-r11,pc}
520 endfunc
521
522         .ltorg
523
524 @ ----------------------------------------------------------------
525 .macro  RND_XY2_IT align, rnd
526         @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
527         @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
528 .if \align == 0
529         ldm             r1,  {r6-r8}
530 .elseif \align == 3
531         ldm             r1,  {r5-r7}
532 .else
533         ldm             r1,  {r8-r10}
534 .endif
535         add             r1,  r1,  r2
536         pld             [r1]
537 .if \align == 0
538         ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
539 .elseif \align == 1
540         ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
541         ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
542 .elseif \align == 2
543         ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
544         ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
545 .elseif \align == 3
546         ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
547 .endif
548         ldr             r14, =0x03030303
549         tst             r3,  #1
550         and             r8,  r4,  r14
551         and             r9,  r5,  r14
552         and             r10, r6,  r14
553         and             r11, r7,  r14
554         it              eq
555         andeq           r14, r14, r14, \rnd #1
556         add             r8,  r8,  r10
557         add             r9,  r9,  r11
558         ldr             r12, =0xfcfcfcfc >> 2
559         itt             eq
560         addeq           r8,  r8,  r14
561         addeq           r9,  r9,  r14
562         and             r4,  r12, r4,  lsr #2
563         and             r5,  r12, r5,  lsr #2
564         and             r6,  r12, r6,  lsr #2
565         and             r7,  r12, r7,  lsr #2
566         add             r10, r4,  r6
567         add             r11, r5,  r7
568         subs            r3,  r3,  #1
569 .endm
570
571 .macro RND_XY2_EXPAND align, rnd
572         RND_XY2_IT      \align, \rnd
573 6:      push            {r8-r11}
574         RND_XY2_IT      \align, \rnd
575         pop             {r4-r7}
576         add             r4,  r4,  r8
577         add             r5,  r5,  r9
578         ldr             r14, =0x0f0f0f0f
579         add             r6,  r6,  r10
580         add             r7,  r7,  r11
581         and             r4,  r14, r4,  lsr #2
582         and             r5,  r14, r5,  lsr #2
583         add             r4,  r4,  r6
584         add             r5,  r5,  r7
585         stm             r0,  {r4-r5}
586         add             r0,  r0,  r2
587         bge             6b
588         pop             {r4-r11,pc}
589 .endm
590
591         .align 5
592 function ff_put_pixels8_xy2_arm, export=1
593         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
594         @ block = word aligned, pixles = unaligned
595         pld             [r1]
596         push            {r4-r11,lr} @ R14 is also called LR
597         JMP_ALIGN       r5,  r1
598 1:      RND_XY2_EXPAND  0, lsl
599         .align 5
600 2:      RND_XY2_EXPAND  1, lsl
601         .align 5
602 3:      RND_XY2_EXPAND  2, lsl
603         .align 5
604 4:      RND_XY2_EXPAND  3, lsl
605 endfunc
606
607         .align 5
608 function ff_put_no_rnd_pixels8_xy2_arm, export=1
609         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
610         @ block = word aligned, pixles = unaligned
611         pld             [r1]
612         push            {r4-r11,lr}
613         JMP_ALIGN       r5,  r1
614 1:      RND_XY2_EXPAND  0, lsr
615         .align 5
616 2:      RND_XY2_EXPAND  1, lsr
617         .align 5
618 3:      RND_XY2_EXPAND  2, lsr
619         .align 5
620 4:      RND_XY2_EXPAND  3, lsr
621 endfunc
622
623         .align 5
624 @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
625 function ff_add_pixels_clamped_arm, export=1
626         push            {r4-r10}
627         mov             r10, #8
628 1:
629         ldr             r4,  [r1]               /* load dest */
630         /* block[0] and block[1]*/
631         ldrsh           r5,  [r0]
632         ldrsh           r7,  [r0, #2]
633         and             r6,  r4,  #0xFF
634         and             r8,  r4,  #0xFF00
635         add             r6,  r5,  r6
636         add             r8,  r7,  r8,  lsr #8
637         mvn             r5,  r5
638         mvn             r7,  r7
639         tst             r6,  #0x100
640         it              ne
641         movne           r6,  r5,  lsr #24
642         tst             r8,  #0x100
643         it              ne
644         movne           r8,  r7,  lsr #24
645         mov             r9,  r6
646         ldrsh           r5,  [r0, #4]           /* moved form [A] */
647         orr             r9,  r9,  r8,  lsl #8
648         /* block[2] and block[3] */
649         /* [A] */
650         ldrsh           r7,  [r0, #6]
651         and             r6,  r4,  #0xFF0000
652         and             r8,  r4,  #0xFF000000
653         add             r6,  r5,  r6,  lsr #16
654         add             r8,  r7,  r8,  lsr #24
655         mvn             r5,  r5
656         mvn             r7,  r7
657         tst             r6,  #0x100
658         it              ne
659         movne           r6,  r5,  lsr #24
660         tst             r8,  #0x100
661         it              ne
662         movne           r8,  r7,  lsr #24
663         orr             r9,  r9,  r6,  lsl #16
664         ldr             r4,  [r1, #4]           /* moved form [B] */
665         orr             r9,  r9,  r8,  lsl #24
666         /* store dest */
667         ldrsh           r5,  [r0, #8]           /* moved form [C] */
668         str             r9,  [r1]
669
670         /* load dest */
671         /* [B] */
672         /* block[4] and block[5] */
673         /* [C] */
674         ldrsh           r7,  [r0, #10]
675         and             r6,  r4,  #0xFF
676         and             r8,  r4,  #0xFF00
677         add             r6,  r5,  r6
678         add             r8,  r7,  r8,  lsr #8
679         mvn             r5,  r5
680         mvn             r7,  r7
681         tst             r6,  #0x100
682         it              ne
683         movne           r6,  r5,  lsr #24
684         tst             r8,  #0x100
685         it              ne
686         movne           r8,  r7,  lsr #24
687         mov             r9,  r6
688         ldrsh           r5,  [r0, #12]          /* moved from [D] */
689         orr             r9,  r9,  r8,  lsl #8
690         /* block[6] and block[7] */
691         /* [D] */
692         ldrsh           r7,  [r0, #14]
693         and             r6,  r4,  #0xFF0000
694         and             r8,  r4,  #0xFF000000
695         add             r6,  r5,  r6,  lsr #16
696         add             r8,  r7,  r8,  lsr #24
697         mvn             r5,  r5
698         mvn             r7,  r7
699         tst             r6,  #0x100
700         it              ne
701         movne           r6,  r5,  lsr #24
702         tst             r8,  #0x100
703         it              ne
704         movne           r8,  r7,  lsr #24
705         orr             r9,  r9,  r6,  lsl #16
706         add             r0,  r0,  #16           /* moved from [E] */
707         orr             r9,  r9,  r8,  lsl #24
708         subs            r10, r10, #1            /* moved from [F] */
709         /* store dest */
710         str             r9,  [r1, #4]
711
712         /* [E] */
713         /* [F] */
714         add             r1,  r1,  r2
715         bne             1b
716
717         pop             {r4-r10}
718         bx              lr
719 endfunc