2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 .macro ldcol.8 rd, rs, rt, n=8, hi=0
24 .if \n == 8 || \hi == 0
25 vld1.8 {\rd[0]}, [\rs], \rt
26 vld1.8 {\rd[1]}, [\rs], \rt
27 vld1.8 {\rd[2]}, [\rs], \rt
28 vld1.8 {\rd[3]}, [\rs], \rt
30 .if \n == 8 || \hi == 1
31 vld1.8 {\rd[4]}, [\rs], \rt
32 vld1.8 {\rd[5]}, [\rs], \rt
33 vld1.8 {\rd[6]}, [\rs], \rt
34 vld1.8 {\rd[7]}, [\rs], \rt
38 .macro add16x8 dq, dl, dh, rl, rh
39 vaddl.u8 \dq, \rl, \rh
40 vadd.u16 \dl, \dl, \dh
41 vpadd.u16 \dl, \dl, \dl
42 vpadd.u16 \dl, \dl, \dl
45 function ff_pred16x16_128_dc_neon, export=1
50 function ff_pred16x16_top_dc_neon, export=1
52 vld1.8 {q0}, [r2,:128]
53 add16x8 q0, d0, d1, d0, d1
59 function ff_pred16x16_left_dc_neon, export=1
63 add16x8 q0, d0, d1, d0, d1
69 function ff_pred16x16_dc_neon, export=1
71 vld1.8 {q0}, [r2,:128]
85 6: vst1.8 {q0}, [r0,:128], r1
86 vst1.8 {q0}, [r0,:128], r1
92 function ff_pred16x16_hor_neon, export=1
95 1: vld1.8 {d0[],d1[]},[r2], r1
96 vst1.8 {q0}, [r0,:128], r1
102 function ff_pred16x16_vert_neon, export=1
104 vld1.8 {q0}, [r0,:128], r1
106 1: vst1.8 {q0}, [r0,:128], r1
107 vst1.8 {q0}, [r0,:128], r1
113 function ff_pred16x16_plane_neon, export=1
118 vld1.8 {d2}, [r2,:64], r1
127 vld1.8 {q0}, [r3,:128]
136 vrshrn.s32 d4, q2, #6
143 vadd.i16 d16, d16, d0
147 vext.16 q0, q0, q0, #7
150 vmul.i16 q0, q0, d4[0]
159 vqshrun.s16 d0, q1, #5
161 vqshrun.s16 d1, q1, #5
163 vst1.8 {q0}, [r0,:128], r1
172 .short 1,2,3,4,5,6,7,8
176 function ff_pred8x8_hor_neon, export=1
179 1: vld1.8 {d0[]}, [r2], r1
180 vst1.8 {d0}, [r0,:64], r1
186 function ff_pred8x8_vert_neon, export=1
188 vld1.8 {d0}, [r0,:64], r1
190 1: vst1.8 {d0}, [r0,:64], r1
191 vst1.8 {d0}, [r0,:64], r1
197 function ff_pred8x8_plane_neon, export=1
201 vld1.32 {d0[0]}, [r3]
202 vld1.32 {d2[0]}, [r2,:32], r1
203 ldcol.8 d0, r3, r1, 4, hi=1
205 ldcol.8 d3, r3, r1, 4
211 vld1.16 {q0}, [r3,:128]
218 vrshrn.s32 d4, q2, #5
225 vadd.i16 d16, d16, d0
229 vext.16 q0, q0, q0, #7
232 vmul.i16 q0, q0, d4[0]
241 vqshrun.s16 d0, q1, #5
243 vst1.8 {d0}, [r0,:64], r1
249 function ff_pred8x8_128_dc_neon, export=1
254 function ff_pred8x8_top_dc_neon, export=1
256 vld1.8 {d0}, [r2,:64]
259 vrshrn.u16 d0, q0, #2
266 function ff_pred8x8_left_dc_neon, export=1
271 vrshrn.u16 d0, q0, #2
277 function ff_pred8x8_dc_neon, export=1
279 vld1.8 {d0}, [r2,:64]
286 vrshrn.u16 d2, q0, #3
287 vrshrn.u16 d3, q0, #2
295 add r2, r0, r1, lsl #2
296 6: vst1.8 {d0}, [r0,:64], r1
297 vst1.8 {d1}, [r2,:64], r1
303 function ff_pred8x8_l0t_dc_neon, export=1
305 vld1.8 {d0}, [r2,:64]
307 ldcol.8 d1, r2, r1, 4
312 vrshrn.u16 d2, q0, #3
313 vrshrn.u16 d3, q0, #2
321 function ff_pred8x8_l00_dc_neon, export=1
323 ldcol.8 d0, r2, r1, 4
326 vrshrn.u16 d0, q0, #2
332 function ff_pred8x8_0lt_dc_neon, export=1
334 vld1.8 {d0}, [r2,:64]
335 add r2, r0, r1, lsl #2
337 ldcol.8 d1, r2, r1, 4, hi=1
342 vrshrn.u16 d3, q0, #2
343 vrshrn.u16 d2, q0, #3
352 function ff_pred8x8_0l0_dc_neon, export=1
353 add r2, r0, r1, lsl #2
355 ldcol.8 d1, r2, r1, 4
358 vrshrn.u16 d1, q1, #2