2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
23 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
24 .macro h264_chroma_mc8 type, codec=h264
25 function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
27 ldrd r4, r5, [sp, #20]
37 add r6, r6, r7, lsl #3
39 add r6, r6, r7, lsl #1
40 vld1.16 {d22[],d23[]}, [r6,:16]
49 rsb r6, r7, r5, lsl #3
50 rsb r12, r7, r4, lsl #3
51 sub r4, r7, r4, lsl #3
52 sub r4, r4, r5, lsl #3
59 vld1.8 {d4, d5}, [r1], r2
64 1: vld1.8 {d6, d7}, [r1], r2
68 vld1.8 {d4, d5}, [r1], r2
80 vrshrn.u16 d16, q8, #6
81 vrshrn.u16 d17, q9, #6
89 vld1.8 {d20}, [lr,:64], r2
90 vld1.8 {d21}, [lr,:64], r2
93 vst1.8 {d16}, [r0,:64], r2
94 vst1.8 {d17}, [r0,:64], r2
106 vld1.8 {d4}, [r1], r2
108 3: vld1.8 {d6}, [r1], r2
111 vld1.8 {d4}, [r1], r2
116 vrshrn.u16 d16, q8, #6
117 vrshrn.u16 d17, q9, #6
121 vshrn.u16 d16, q8, #6
122 vshrn.u16 d17, q9, #6
126 vld1.8 {d20}, [lr,:64], r2
127 vld1.8 {d21}, [lr,:64], r2
128 vrhadd.u8 q8, q8, q10
131 vst1.8 {d16}, [r0,:64], r2
132 vst1.8 {d17}, [r0,:64], r2
137 4: vld1.8 {d4, d5}, [r1], r2
138 vld1.8 {d6, d7}, [r1], r2
139 vext.8 d5, d4, d5, #1
140 vext.8 d7, d6, d7, #1
149 vrshrn.u16 d16, q8, #6
150 vrshrn.u16 d17, q9, #6
154 vshrn.u16 d16, q8, #6
155 vshrn.u16 d17, q9, #6
158 vld1.8 {d20}, [lr,:64], r2
159 vld1.8 {d21}, [lr,:64], r2
160 vrhadd.u8 q8, q8, q10
162 vst1.8 {d16}, [r0,:64], r2
163 vst1.8 {d17}, [r0,:64], r2
170 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
171 .macro h264_chroma_mc4 type, codec=h264
172 function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
174 ldrd r4, r5, [sp, #20]
184 add r6, r6, r7, lsl #3
186 add r6, r6, r7, lsl #1
187 vld1.16 {d22[],d23[]}, [r6,:16]
196 rsb r6, r7, r5, lsl #3
197 rsb r12, r7, r4, lsl #3
198 sub r4, r7, r4, lsl #3
199 sub r4, r4, r5, lsl #3
206 vld1.8 {d4}, [r1], r2
210 vext.8 d5, d4, d5, #1
216 1: vld1.8 {d6}, [r1], r2
217 vext.8 d7, d6, d7, #1
221 vld1.8 {d4}, [r1], r2
222 vext.8 d5, d4, d5, #1
227 vadd.i16 d16, d16, d17
228 vadd.i16 d17, d18, d19
230 vrshrn.u16 d16, q8, #6
233 vshrn.u16 d16, q8, #6
238 vld1.32 {d20[0]}, [lr,:32], r2
239 vld1.32 {d20[1]}, [lr,:32], r2
240 vrhadd.u8 d16, d16, d20
242 vst1.32 {d16[0]}, [r0,:32], r2
243 vst1.32 {d16[1]}, [r0,:32], r2
256 vext.32 d1, d0, d1, #1
257 vld1.32 {d4[0]}, [r1], r2
259 3: vld1.32 {d4[1]}, [r1], r2
261 vld1.32 {d4[0]}, [r1], r2
263 vadd.i16 d16, d16, d17
264 vadd.i16 d17, d18, d19
267 vrshrn.u16 d16, q8, #6
270 vshrn.u16 d16, q8, #6
273 vld1.32 {d20[0]}, [lr,:32], r2
274 vld1.32 {d20[1]}, [lr,:32], r2
275 vrhadd.u8 d16, d16, d20
279 vst1.32 {d16[0]}, [r0,:32], r2
280 vst1.32 {d16[1]}, [r0,:32], r2
285 4: vld1.8 {d4}, [r1], r2
286 vld1.8 {d6}, [r1], r2
287 vext.8 d5, d4, d5, #1
288 vext.8 d7, d6, d7, #1
294 vadd.i16 d16, d16, d17
295 vadd.i16 d17, d18, d19
298 vrshrn.u16 d16, q8, #6
301 vshrn.u16 d16, q8, #6
304 vld1.32 {d20[0]}, [lr,:32], r2
305 vld1.32 {d20[1]}, [lr,:32], r2
306 vrhadd.u8 d16, d16, d20
309 vst1.32 {d16[0]}, [r0,:32], r2
310 vst1.32 {d16[1]}, [r0,:32], r2
317 .macro h264_chroma_mc2 type
318 function ff_\type\()_h264_chroma_mc2_neon, export=1
328 rsb r6, r5, lr, lsl #3
329 rsb r12, r5, r4, lsl #3
330 sub r4, r5, r4, lsl #3
331 sub r4, r4, lr, lsl #3
339 vld1.32 {d4[0]}, [r1], r2
340 vld1.32 {d4[1]}, [r1], r2
342 vld1.32 {d5[1]}, [r1]
343 vext.8 q3, q2, q2, #1
348 vld1.16 {d18[0]}, [r0,:16], r2
349 vld1.16 {d18[1]}, [r0,:16]
353 vadd.i16 d16, d16, d17
354 vrshrn.u16 d16, q8, #6
356 vrhadd.u8 d16, d16, d18
358 vst1.16 {d16[0]}, [r0,:16], r2
359 vst1.16 {d16[1]}, [r0,:16], r2
370 vld1.16 {d16[0]}, [r1], r2
371 vld1.16 {d16[1]}, [r1], r2
372 vld1.16 {d18[0]}, [r0,:16], r2
373 vld1.16 {d18[1]}, [r0,:16]
375 vrhadd.u8 d16, d16, d18
376 vst1.16 {d16[0]}, [r0,:16], r2
377 vst1.16 {d16[1]}, [r0,:16], r2
392 #if CONFIG_RV40_DECODER
395 .short 32, 28, 32, 28
397 .short 32, 28, 32, 28
400 h264_chroma_mc8 put, rv40
401 h264_chroma_mc8 avg, rv40
402 h264_chroma_mc4 put, rv40
403 h264_chroma_mc4 avg, rv40
406 #if CONFIG_VC1_DECODER
407 h264_chroma_mc8 put, vc1
408 h264_chroma_mc8 avg, vc1
409 h264_chroma_mc4 put, vc1
410 h264_chroma_mc4 avg, vc1