2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
23 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
24 .macro h264_chroma_mc8 type, codec=h264
25 function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
27 ldrd r4, r5, [sp, #20]
37 add r6, r6, r7, lsl #3
39 add r6, r6, r7, lsl #1
40 vld1.16 {d22[],d23[]}, [r6,:16]
46 rsb r6, r7, r5, lsl #3
47 rsb r12, r7, r4, lsl #3
48 sub r4, r7, r4, lsl #3
49 sub r4, r4, r5, lsl #3
56 vld1.8 {d4, d5}, [r1], r2
61 1: vld1.8 {d6, d7}, [r1], r2
65 vld1.8 {d4, d5}, [r1], r2
77 vrshrn.u16 d16, q8, #6
78 vrshrn.u16 d17, q9, #6
86 vld1.8 {d20}, [lr,:64], r2
87 vld1.8 {d21}, [lr,:64], r2
90 vst1.8 {d16}, [r0,:64], r2
91 vst1.8 {d17}, [r0,:64], r2
103 vld1.8 {d4}, [r1], r2
105 3: vld1.8 {d6}, [r1], r2
108 vld1.8 {d4}, [r1], r2
113 vrshrn.u16 d16, q8, #6
114 vrshrn.u16 d17, q9, #6
118 vshrn.u16 d16, q8, #6
119 vshrn.u16 d17, q9, #6
123 vld1.8 {d20}, [lr,:64], r2
124 vld1.8 {d21}, [lr,:64], r2
125 vrhadd.u8 q8, q8, q10
128 vst1.8 {d16}, [r0,:64], r2
129 vst1.8 {d17}, [r0,:64], r2
134 4: vld1.8 {d4, d5}, [r1], r2
135 vld1.8 {d6, d7}, [r1], r2
136 vext.8 d5, d4, d5, #1
137 vext.8 d7, d6, d7, #1
146 vrshrn.u16 d16, q8, #6
147 vrshrn.u16 d17, q9, #6
151 vshrn.u16 d16, q8, #6
152 vshrn.u16 d17, q9, #6
155 vld1.8 {d20}, [lr,:64], r2
156 vld1.8 {d21}, [lr,:64], r2
157 vrhadd.u8 q8, q8, q10
159 vst1.8 {d16}, [r0,:64], r2
160 vst1.8 {d17}, [r0,:64], r2
167 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
168 .macro h264_chroma_mc4 type, codec=h264
169 function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
171 ldrd r4, r5, [sp, #20]
181 add r6, r6, r7, lsl #3
183 add r6, r6, r7, lsl #1
184 vld1.16 {d22[],d23[]}, [r6,:16]
190 rsb r6, r7, r5, lsl #3
191 rsb r12, r7, r4, lsl #3
192 sub r4, r7, r4, lsl #3
193 sub r4, r4, r5, lsl #3
200 vld1.8 {d4}, [r1], r2
204 vext.8 d5, d4, d5, #1
210 1: vld1.8 {d6}, [r1], r2
211 vext.8 d7, d6, d7, #1
215 vld1.8 {d4}, [r1], r2
216 vext.8 d5, d4, d5, #1
221 vadd.i16 d16, d16, d17
222 vadd.i16 d17, d18, d19
224 vrshrn.u16 d16, q8, #6
227 vshrn.u16 d16, q8, #6
232 vld1.32 {d20[0]}, [lr,:32], r2
233 vld1.32 {d20[1]}, [lr,:32], r2
234 vrhadd.u8 d16, d16, d20
236 vst1.32 {d16[0]}, [r0,:32], r2
237 vst1.32 {d16[1]}, [r0,:32], r2
250 vext.32 d1, d0, d1, #1
251 vld1.32 {d4[0]}, [r1], r2
253 3: vld1.32 {d4[1]}, [r1], r2
255 vld1.32 {d4[0]}, [r1], r2
257 vadd.i16 d16, d16, d17
258 vadd.i16 d17, d18, d19
261 vrshrn.u16 d16, q8, #6
264 vshrn.u16 d16, q8, #6
267 vld1.32 {d20[0]}, [lr,:32], r2
268 vld1.32 {d20[1]}, [lr,:32], r2
269 vrhadd.u8 d16, d16, d20
273 vst1.32 {d16[0]}, [r0,:32], r2
274 vst1.32 {d16[1]}, [r0,:32], r2
279 4: vld1.8 {d4}, [r1], r2
280 vld1.8 {d6}, [r1], r2
281 vext.8 d5, d4, d5, #1
282 vext.8 d7, d6, d7, #1
288 vadd.i16 d16, d16, d17
289 vadd.i16 d17, d18, d19
292 vrshrn.u16 d16, q8, #6
295 vshrn.u16 d16, q8, #6
298 vld1.32 {d20[0]}, [lr,:32], r2
299 vld1.32 {d20[1]}, [lr,:32], r2
300 vrhadd.u8 d16, d16, d20
303 vst1.32 {d16[0]}, [r0,:32], r2
304 vst1.32 {d16[1]}, [r0,:32], r2
311 .macro h264_chroma_mc2 type
312 function ff_\type\()_h264_chroma_mc2_neon, export=1
322 rsb r6, r5, lr, lsl #3
323 rsb r12, r5, r4, lsl #3
324 sub r4, r5, r4, lsl #3
325 sub r4, r4, lr, lsl #3
333 vld1.32 {d4[0]}, [r1], r2
334 vld1.32 {d4[1]}, [r1], r2
336 vld1.32 {d5[1]}, [r1]
337 vext.8 q3, q2, q2, #1
342 vld1.16 {d18[0]}, [r0,:16], r2
343 vld1.16 {d18[1]}, [r0,:16]
347 vadd.i16 d16, d16, d17
348 vrshrn.u16 d16, q8, #6
350 vrhadd.u8 d16, d16, d18
352 vst1.16 {d16[0]}, [r0,:16], r2
353 vst1.16 {d16[1]}, [r0,:16], r2
364 vld1.16 {d16[0]}, [r1], r2
365 vld1.16 {d16[1]}, [r1], r2
366 vld1.16 {d18[0]}, [r0,:16], r2
367 vld1.16 {d18[1]}, [r0,:16]
369 vrhadd.u8 d16, d16, d18
370 vst1.16 {d16[0]}, [r0,:16], r2
371 vst1.16 {d16[1]}, [r0,:16], r2
379 #if CONFIG_H264_DECODER
388 #if CONFIG_RV40_DECODER
391 .short 32, 28, 32, 28
393 .short 32, 28, 32, 28
396 h264_chroma_mc8 put, rv40
397 h264_chroma_mc8 avg, rv40
398 h264_chroma_mc4 put, rv40
399 h264_chroma_mc4 avg, rv40