]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/dsputil_armv6.S
dsputil: Move pix_sum, pix_norm1, shrink function pointers to mpegvideoenc
[ffmpeg] / libavcodec / arm / dsputil_armv6.S
1 /*
2  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/arm/asm.S"
22
23 function ff_get_pixels_armv6, export=1
24         pld             [r1, r2]
25         push            {r4-r8, lr}
26         mov             lr,  #8
27 1:
28         ldrd_post       r4,  r5,  r1,  r2
29         subs            lr,  lr,  #1
30         uxtb16          r6,  r4
31         uxtb16          r4,  r4,  ror #8
32         uxtb16          r12, r5
33         uxtb16          r8,  r5,  ror #8
34         pld             [r1, r2]
35         pkhbt           r5,  r6,  r4,  lsl #16
36         pkhtb           r6,  r4,  r6,  asr #16
37         pkhbt           r7,  r12, r8,  lsl #16
38         pkhtb           r12, r8,  r12, asr #16
39         stm             r0!, {r5,r6,r7,r12}
40         bgt             1b
41
42         pop             {r4-r8, pc}
43 endfunc
44
45 function ff_diff_pixels_armv6, export=1
46         pld             [r1, r3]
47         pld             [r2, r3]
48         push            {r4-r9, lr}
49         mov             lr,  #8
50 1:
51         ldrd_post       r4,  r5,  r1,  r3
52         ldrd_post       r6,  r7,  r2,  r3
53         uxtb16          r8,  r4
54         uxtb16          r4,  r4,  ror #8
55         uxtb16          r9,  r6
56         uxtb16          r6,  r6,  ror #8
57         pld             [r1, r3]
58         ssub16          r9,  r8,  r9
59         ssub16          r6,  r4,  r6
60         uxtb16          r8,  r5
61         uxtb16          r5,  r5,  ror #8
62         pld             [r2, r3]
63         pkhbt           r4,  r9,  r6,  lsl #16
64         pkhtb           r6,  r6,  r9,  asr #16
65         uxtb16          r9,  r7
66         uxtb16          r7,  r7,  ror #8
67         ssub16          r9,  r8,  r9
68         ssub16          r5,  r5,  r7
69         subs            lr,  lr,  #1
70         pkhbt           r8,  r9,  r5,  lsl #16
71         pkhtb           r9,  r5,  r9,  asr #16
72         stm             r0!, {r4,r6,r8,r9}
73         bgt             1b
74
75         pop             {r4-r9, pc}
76 endfunc
77
78 function ff_pix_abs16_armv6, export=1
79         ldr             r0,  [sp]
80         push            {r4-r9, lr}
81         mov             r12, #0
82         mov             lr,  #0
83         ldm             r1,  {r4-r7}
84         ldr             r8,  [r2]
85 1:
86         ldr             r9,  [r2, #4]
87         pld             [r1, r3]
88         usada8          r12, r4,  r8,  r12
89         ldr             r8,  [r2, #8]
90         pld             [r2, r3]
91         usada8          lr,  r5,  r9,  lr
92         ldr             r9,  [r2, #12]
93         usada8          r12, r6,  r8,  r12
94         subs            r0,  r0,  #1
95         usada8          lr,  r7,  r9,  lr
96         beq             2f
97         add             r1,  r1,  r3
98         ldm             r1,  {r4-r7}
99         add             r2,  r2,  r3
100         ldr             r8,  [r2]
101         b               1b
102 2:
103         add             r0,  r12, lr
104         pop             {r4-r9, pc}
105 endfunc
106
107 function ff_pix_abs16_x2_armv6, export=1
108         ldr             r12, [sp]
109         push            {r4-r11, lr}
110         mov             r0,  #0
111         mov             lr,  #1
112         orr             lr,  lr,  lr,  lsl #8
113         orr             lr,  lr,  lr,  lsl #16
114 1:
115         ldr             r8,  [r2]
116         ldr             r9,  [r2, #4]
117         lsr             r10, r8,  #8
118         ldr             r4,  [r1]
119         lsr             r6,  r9,  #8
120         orr             r10, r10, r9,  lsl #24
121         ldr             r5,  [r2, #8]
122         eor             r11, r8,  r10
123         uhadd8          r7,  r8,  r10
124         orr             r6,  r6,  r5,  lsl #24
125         and             r11, r11, lr
126         uadd8           r7,  r7,  r11
127         ldr             r8,  [r1, #4]
128         usada8          r0,  r4,  r7,  r0
129         eor             r7,  r9,  r6
130         lsr             r10, r5,  #8
131         and             r7,  r7,  lr
132         uhadd8          r4,  r9,  r6
133         ldr             r6,  [r2, #12]
134         uadd8           r4,  r4,  r7
135         pld             [r1, r3]
136         orr             r10, r10, r6,  lsl #24
137         usada8          r0,  r8,  r4,  r0
138         ldr             r4,  [r1, #8]
139         eor             r11, r5,  r10
140         ldrb            r7,  [r2, #16]
141         and             r11, r11, lr
142         uhadd8          r8,  r5,  r10
143         ldr             r5,  [r1, #12]
144         uadd8           r8,  r8,  r11
145         pld             [r2, r3]
146         lsr             r10, r6,  #8
147         usada8          r0,  r4,  r8,  r0
148         orr             r10, r10, r7,  lsl #24
149         subs            r12,  r12,  #1
150         eor             r11, r6,  r10
151         add             r1,  r1,  r3
152         uhadd8          r9,  r6,  r10
153         and             r11, r11, lr
154         uadd8           r9,  r9,  r11
155         add             r2,  r2,  r3
156         usada8          r0,  r5,  r9,  r0
157         bgt             1b
158
159         pop             {r4-r11, pc}
160 endfunc
161
162 .macro  usad_y2         p0,  p1,  p2,  p3,  n0,  n1,  n2,  n3
163         ldr             \n0, [r2]
164         eor             \n1, \p0, \n0
165         uhadd8          \p0, \p0, \n0
166         and             \n1, \n1, lr
167         ldr             \n2, [r1]
168         uadd8           \p0, \p0, \n1
169         ldr             \n1, [r2, #4]
170         usada8          r0,  \p0, \n2, r0
171         pld             [r1,  r3]
172         eor             \n3, \p1, \n1
173         uhadd8          \p1, \p1, \n1
174         and             \n3, \n3, lr
175         ldr             \p0, [r1, #4]
176         uadd8           \p1, \p1, \n3
177         ldr             \n2, [r2, #8]
178         usada8          r0,  \p1, \p0, r0
179         pld             [r2,  r3]
180         eor             \p0, \p2, \n2
181         uhadd8          \p2, \p2, \n2
182         and             \p0, \p0, lr
183         ldr             \p1, [r1, #8]
184         uadd8           \p2, \p2, \p0
185         ldr             \n3, [r2, #12]
186         usada8          r0,  \p2, \p1, r0
187         eor             \p1, \p3, \n3
188         uhadd8          \p3, \p3, \n3
189         and             \p1, \p1, lr
190         ldr             \p0,  [r1, #12]
191         uadd8           \p3, \p3, \p1
192         add             r1,  r1,  r3
193         usada8          r0,  \p3, \p0,  r0
194         add             r2,  r2,  r3
195 .endm
196
197 function ff_pix_abs16_y2_armv6, export=1
198         pld             [r1]
199         pld             [r2]
200         ldr             r12, [sp]
201         push            {r4-r11, lr}
202         mov             r0,  #0
203         mov             lr,  #1
204         orr             lr,  lr,  lr,  lsl #8
205         orr             lr,  lr,  lr,  lsl #16
206         ldr             r4,  [r2]
207         ldr             r5,  [r2, #4]
208         ldr             r6,  [r2, #8]
209         ldr             r7,  [r2, #12]
210         add             r2,  r2,  r3
211 1:
212         usad_y2         r4,  r5,  r6,  r7,  r8,  r9,  r10, r11
213         subs            r12, r12, #2
214         usad_y2         r8,  r9,  r10, r11, r4,  r5,  r6,  r7
215         bgt             1b
216
217         pop             {r4-r11, pc}
218 endfunc
219
220 function ff_pix_abs8_armv6, export=1
221         pld             [r2, r3]
222         ldr             r12, [sp]
223         push            {r4-r9, lr}
224         mov             r0,  #0
225         mov             lr,  #0
226         ldrd_post       r4,  r5,  r1,  r3
227 1:
228         subs            r12, r12, #2
229         ldr             r7,  [r2, #4]
230         ldr_post        r6,  r2,  r3
231         ldrd_post       r8,  r9,  r1,  r3
232         usada8          r0,  r4,  r6,  r0
233         pld             [r2, r3]
234         usada8          lr,  r5,  r7,  lr
235         ldr             r7,  [r2, #4]
236         ldr_post        r6,  r2,  r3
237         beq             2f
238         ldrd_post       r4,  r5,  r1,  r3
239         usada8          r0,  r8,  r6,  r0
240         pld             [r2, r3]
241         usada8          lr,  r9,  r7,  lr
242         b               1b
243 2:
244         usada8          r0,  r8,  r6,  r0
245         usada8          lr,  r9,  r7,  lr
246         add             r0,  r0,  lr
247         pop             {r4-r9, pc}
248 endfunc
249
250 function ff_sse16_armv6, export=1
251         ldr             r12, [sp]
252         push            {r4-r9, lr}
253         mov             r0,  #0
254 1:
255         ldrd            r4,  r5,  [r1]
256         ldr             r8,  [r2]
257         uxtb16          lr,  r4
258         uxtb16          r4,  r4,  ror #8
259         uxtb16          r9,  r8
260         uxtb16          r8,  r8,  ror #8
261         ldr             r7,  [r2, #4]
262         usub16          lr,  lr,  r9
263         usub16          r4,  r4,  r8
264         smlad           r0,  lr,  lr,  r0
265         uxtb16          r6,  r5
266         uxtb16          lr,  r5,  ror #8
267         uxtb16          r8,  r7
268         uxtb16          r9,  r7,  ror #8
269         smlad           r0,  r4,  r4,  r0
270         ldrd            r4,  r5,  [r1, #8]
271         usub16          r6,  r6,  r8
272         usub16          r8,  lr,  r9
273         ldr             r7,  [r2, #8]
274         smlad           r0,  r6,  r6,  r0
275         uxtb16          lr,  r4
276         uxtb16          r4,  r4,  ror #8
277         uxtb16          r9,  r7
278         uxtb16          r7,  r7, ror #8
279         smlad           r0,  r8,  r8,  r0
280         ldr             r8,  [r2, #12]
281         usub16          lr,  lr,  r9
282         usub16          r4,  r4,  r7
283         smlad           r0,  lr,  lr,  r0
284         uxtb16          r6,  r5
285         uxtb16          r5,  r5,  ror #8
286         uxtb16          r9,  r8
287         uxtb16          r8,  r8,  ror #8
288         smlad           r0,  r4,  r4,  r0
289         usub16          r6,  r6,  r9
290         usub16          r5,  r5,  r8
291         smlad           r0,  r6,  r6,  r0
292         add             r1,  r1,  r3
293         add             r2,  r2,  r3
294         subs            r12, r12, #1
295         smlad           r0,  r5,  r5,  r0
296         bgt             1b
297
298         pop             {r4-r9, pc}
299 endfunc