]> git.sesse.net Git - ffmpeg/blob - libavcodec/arm/hpeldsp_armv6.S
arm: hpeldsp: prevent overreads in armv6 asm
[ffmpeg] / libavcodec / arm / hpeldsp_armv6.S
1 /*
2  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/arm/asm.S"
22
23 .macro  call_2x_pixels  type, subp
24 function ff_\type\()_pixels16\subp\()_armv6, export=1
25         push            {r0-r3, lr}
26         bl              X(ff_\type\()_pixels8\subp\()_armv6)
27         pop             {r0-r3, lr}
28         add             r0,  r0,  #8
29         add             r1,  r1,  #8
30         b               X(ff_\type\()_pixels8\subp\()_armv6)
31 endfunc
32 .endm
33
34 call_2x_pixels          avg
35 call_2x_pixels          put, _x2
36 call_2x_pixels          put, _y2
37 call_2x_pixels          put, _x2_no_rnd
38 call_2x_pixels          put, _y2_no_rnd
39
40 function ff_put_pixels16_armv6, export=1
41         push            {r4-r11}
42 1:
43         ldr             r5,  [r1, #4]
44         ldr             r6,  [r1, #8]
45         ldr             r7,  [r1, #12]
46         ldr_post        r4,  r1,  r2
47         strd            r6,  r7,  [r0, #8]
48         ldr             r9,  [r1, #4]
49         strd_post       r4,  r5,  r0,  r2
50         ldr             r10, [r1, #8]
51         ldr             r11, [r1, #12]
52         ldr_post        r8,  r1,  r2
53         strd            r10, r11, [r0, #8]
54         subs            r3,  r3,  #2
55         strd_post       r8,  r9,  r0,  r2
56         bne             1b
57
58         pop             {r4-r11}
59         bx              lr
60 endfunc
61
62 function ff_put_pixels8_armv6, export=1
63         push            {r4-r7}
64 1:
65         ldr             r5,  [r1, #4]
66         ldr_post        r4,  r1,  r2
67         ldr             r7,  [r1, #4]
68         strd_post       r4,  r5,  r0,  r2
69         ldr_post        r6,  r1,  r2
70         subs            r3,  r3,  #2
71         strd_post       r6,  r7,  r0,  r2
72         bne             1b
73
74         pop             {r4-r7}
75         bx              lr
76 endfunc
77
78 function ff_put_pixels8_x2_armv6, export=1
79         push            {r4-r11, lr}
80         mov             r12, #1
81         orr             r12, r12, r12, lsl #8
82         orr             r12, r12, r12, lsl #16
83 1:
84         ldr             r4,  [r1]
85         subs            r3,  r3,  #2
86         ldr             r5,  [r1, #4]
87         ldr             r7,  [r1, #5]
88         lsr             r6,  r4,  #8
89         ldr_pre         r8,  r1,  r2
90         orr             r6,  r6,  r5,  lsl #24
91         ldr             r9,  [r1, #4]
92         ldr             r11, [r1, #5]
93         lsr             r10, r8,  #8
94         add             r1,  r1,  r2
95         orr             r10, r10, r9,  lsl #24
96         eor             r14, r4,  r6
97         uhadd8          r4,  r4,  r6
98         eor             r6,  r5,  r7
99         uhadd8          r5,  r5,  r7
100         and             r14, r14, r12
101         and             r6,  r6,  r12
102         uadd8           r4,  r4,  r14
103         eor             r14, r8,  r10
104         uadd8           r5,  r5,  r6
105         eor             r6,  r9,  r11
106         uhadd8          r8,  r8,  r10
107         and             r14, r14, r12
108         uhadd8          r9,  r9,  r11
109         and             r6,  r6,  r12
110         uadd8           r8,  r8,  r14
111         strd_post       r4,  r5,  r0,  r2
112         uadd8           r9,  r9,  r6
113         strd_post       r8,  r9,  r0,  r2
114         bne             1b
115
116         pop             {r4-r11, pc}
117 endfunc
118
119 function ff_put_pixels8_y2_armv6, export=1
120         push            {r4-r11}
121         mov             r12, #1
122         orr             r12, r12, r12, lsl #8
123         orr             r12, r12, r12, lsl #16
124         ldr             r4,  [r1]
125         ldr             r5,  [r1, #4]
126         ldr_pre         r6,  r1,  r2
127         ldr             r7,  [r1, #4]
128 1:
129         subs            r3,  r3,  #2
130         uhadd8          r8,  r4,  r6
131         eor             r10, r4,  r6
132         uhadd8          r9,  r5,  r7
133         eor             r11, r5,  r7
134         and             r10, r10, r12
135         ldrc_pre        ne,  r4,  r1,  r2
136         uadd8           r8,  r8,  r10
137         and             r11, r11, r12
138         uadd8           r9,  r9,  r11
139         it              ne
140         ldrne           r5,  [r1, #4]
141         uhadd8          r10, r4,  r6
142         eor             r6,  r4,  r6
143         uhadd8          r11, r5,  r7
144         and             r6,  r6,  r12
145         eor             r7,  r5,  r7
146         uadd8           r10, r10, r6
147         and             r7,  r7,  r12
148         ldrc_pre        ne,  r6,  r1,  r2
149         uadd8           r11, r11, r7
150         strd_post       r8,  r9,  r0,  r2
151         it              ne
152         ldrne           r7,  [r1, #4]
153         strd_post       r10, r11, r0,  r2
154         bne             1b
155
156         pop             {r4-r11}
157         bx              lr
158 endfunc
159
160 function ff_put_pixels8_x2_no_rnd_armv6, export=1
161         push            {r4-r9, lr}
162 1:
163         subs            r3,  r3,  #2
164         ldr             r4,  [r1]
165         ldr             r5,  [r1, #4]
166         ldr             r7,  [r1, #5]
167         ldr_pre         r8,  r1,  r2
168         ldr             r9,  [r1, #4]
169         ldr             r14, [r1, #5]
170         add             r1,  r1,  r2
171         lsr             r6,  r4,  #8
172         orr             r6,  r6,  r5,  lsl #24
173         lsr             r12, r8,  #8
174         orr             r12, r12, r9,  lsl #24
175         uhadd8          r4,  r4,  r6
176         uhadd8          r5,  r5,  r7
177         uhadd8          r8,  r8,  r12
178         uhadd8          r9,  r9,  r14
179         stm             r0,  {r4,r5}
180         add             r0,  r0,  r2
181         stm             r0,  {r8,r9}
182         add             r0,  r0,  r2
183         bne             1b
184
185         pop             {r4-r9, pc}
186 endfunc
187
188 function ff_put_pixels8_y2_no_rnd_armv6, export=1
189         push            {r4-r9, lr}
190         ldr             r4,  [r1]
191         ldr             r5,  [r1, #4]
192         ldr_pre         r6,  r1,  r2
193         ldr             r7,  [r1, #4]
194 1:
195         subs            r3,  r3,  #2
196         uhadd8          r8,  r4,  r6
197         ldrc_pre        ne,  r4,  r1,  r2
198         uhadd8          r9,  r5,  r7
199         it              ne
200         ldrne           r5,  [r1, #4]
201         uhadd8          r12, r4,  r6
202         ldrc_pre        ne,  r6,  r1,  r2
203         uhadd8          r14, r5,  r7
204         it              ne
205         ldrne           r7,  [r1, #4]
206         stm             r0,  {r8,r9}
207         add             r0,  r0,  r2
208         stm             r0,  {r12,r14}
209         add             r0,  r0,  r2
210         bne             1b
211
212         pop             {r4-r9, pc}
213 endfunc
214
215 function ff_avg_pixels8_armv6, export=1
216         pld             [r1, r2]
217         push            {r4-r10, lr}
218         mov             lr,  #1
219         orr             lr,  lr,  lr,  lsl #8
220         orr             lr,  lr,  lr,  lsl #16
221         ldrd            r4,  r5,  [r0]
222         ldr             r10, [r1, #4]
223         ldr_post        r9,  r1,  r2
224         subs            r3,  r3,  #2
225 1:
226         pld             [r1, r2]
227         eor             r8,  r4,  r9
228         uhadd8          r4,  r4,  r9
229         eor             r12, r5,  r10
230         ldrd_reg        r6,  r7,  r0,  r2
231         uhadd8          r5,  r5,  r10
232         and             r8,  r8,  lr
233         ldr             r10, [r1, #4]
234         and             r12, r12, lr
235         uadd8           r4,  r4,  r8
236         ldr_post        r9,  r1,  r2
237         eor             r8,  r6,  r9
238         uadd8           r5,  r5,  r12
239         pld             [r1, r2,  lsl #1]
240         eor             r12, r7,  r10
241         uhadd8          r6,  r6,  r9
242         strd_post       r4,  r5,  r0,  r2
243         uhadd8          r7,  r7,  r10
244         beq             2f
245         and             r8,  r8,  lr
246         ldrd_reg        r4,  r5,  r0,  r2
247         uadd8           r6,  r6,  r8
248         ldr             r10, [r1, #4]
249         and             r12, r12, lr
250         subs            r3,  r3,  #2
251         uadd8           r7,  r7,  r12
252         ldr_post        r9,  r1,  r2
253         strd_post       r6,  r7,  r0,  r2
254         b               1b
255 2:
256         and             r8,  r8,  lr
257         and             r12, r12, lr
258         uadd8           r6,  r6,  r8
259         uadd8           r7,  r7,  r12
260         strd_post       r6,  r7,  r0,  r2
261
262         pop             {r4-r10, pc}
263 endfunc