2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3 * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/aarch64/asm.S"
27 .macro lowpass_const r
34 .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
35 ext v2.8B, \r0\().8B, \r1\().8B, #2
36 ext v3.8B, \r0\().8B, \r1\().8B, #3
37 uaddl v2.8H, v2.8B, v3.8B
38 ext v4.8B, \r0\().8B, \r1\().8B, #1
39 ext v5.8B, \r0\().8B, \r1\().8B, #4
40 uaddl v4.8H, v4.8B, v5.8B
41 ext v1.8B, \r0\().8B, \r1\().8B, #5
42 uaddl \d0\().8H, \r0\().8B, v1.8B
43 ext v0.8B, \r2\().8B, \r3\().8B, #2
44 mla \d0\().8H, v2.8H, v6.H[1]
45 ext v1.8B, \r2\().8B, \r3\().8B, #3
46 uaddl v0.8H, v0.8B, v1.8B
47 ext v1.8B, \r2\().8B, \r3\().8B, #1
48 mls \d0\().8H, v4.8H, v6.H[0]
49 ext v3.8B, \r2\().8B, \r3\().8B, #4
50 uaddl v1.8H, v1.8B, v3.8B
51 ext v2.8B, \r2\().8B, \r3\().8B, #5
52 uaddl \d1\().8H, \r2\().8B, v2.8B
53 mla \d1\().8H, v0.8H, v6.H[1]
54 mls \d1\().8H, v1.8H, v6.H[0]
56 sqrshrun \d0\().8B, \d0\().8H, #5
57 sqrshrun \d1\().8B, \d1\().8H, #5
61 //trashes v0-v5, v7, v30-v31
62 .macro lowpass_8H r0, r1
63 ext v0.16B, \r0\().16B, \r0\().16B, #2
64 ext v1.16B, \r0\().16B, \r0\().16B, #3
65 uaddl v0.8H, v0.8B, v1.8B
66 ext v2.16B, \r0\().16B, \r0\().16B, #1
67 ext v3.16B, \r0\().16B, \r0\().16B, #4
68 uaddl v2.8H, v2.8B, v3.8B
69 ext v30.16B, \r0\().16B, \r0\().16B, #5
70 uaddl \r0\().8H, \r0\().8B, v30.8B
71 ext v4.16B, \r1\().16B, \r1\().16B, #2
72 mla \r0\().8H, v0.8H, v6.H[1]
73 ext v5.16B, \r1\().16B, \r1\().16B, #3
74 uaddl v4.8H, v4.8B, v5.8B
75 ext v7.16B, \r1\().16B, \r1\().16B, #1
76 mls \r0\().8H, v2.8H, v6.H[0]
77 ext v0.16B, \r1\().16B, \r1\().16B, #4
78 uaddl v7.8H, v7.8B, v0.8B
79 ext v31.16B, \r1\().16B, \r1\().16B, #5
80 uaddl \r1\().8H, \r1\().8B, v31.8B
81 mla \r1\().8H, v4.8H, v6.H[1]
82 mls \r1\().8H, v7.8H, v6.H[0]
86 .macro lowpass_8_1 r0, r1, d0, narrow=1
87 ext v2.8B, \r0\().8B, \r1\().8B, #2
88 ext v3.8B, \r0\().8B, \r1\().8B, #3
89 uaddl v2.8H, v2.8B, v3.8B
90 ext v4.8B, \r0\().8B, \r1\().8B, #1
91 ext v5.8B, \r0\().8B, \r1\().8B, #4
92 uaddl v4.8H, v4.8B, v5.8B
93 ext v30.8B, \r0\().8B, \r1\().8B, #5
94 uaddl \d0\().8H, \r0\().8B, v30.8B
95 mla \d0\().8H, v2.8H, v6.H[1]
96 mls \d0\().8H, v4.8H, v6.H[0]
98 sqrshrun \d0\().8B, \d0\().8H, #5
103 .macro lowpass_8.16 r0, r1, r2
104 ext v1.16B, \r0\().16B, \r1\().16B, #4
105 ext v0.16B, \r0\().16B, \r1\().16B, #6
106 saddl v5.4S, v1.4H, v0.4H
107 ext v2.16B, \r0\().16B, \r1\().16B, #2
108 saddl2 v1.4S, v1.8H, v0.8H
109 ext v3.16B, \r0\().16B, \r1\().16B, #8
110 saddl v6.4S, v2.4H, v3.4H
111 ext \r1\().16B, \r0\().16B, \r1\().16B, #10
112 saddl2 v2.4S, v2.8H, v3.8H
113 saddl v0.4S, \r0\().4H, \r1\().4H
114 saddl2 v4.4S, \r0\().8H, \r1\().8H
119 add v5.4S, v5.4S, v3.4S
120 add v6.4S, v6.4S, v7.4S
125 add v1.4S, v1.4S, v3.4S
126 add v2.4S, v2.4S, v7.4S
128 add v5.4S, v5.4S, v0.4S
129 sub v5.4S, v5.4S, v6.4S
131 add v1.4S, v1.4S, v4.4S
132 sub v1.4S, v1.4S, v2.4S
134 rshrn v5.4H, v5.4S, #10
135 rshrn2 v5.8H, v1.4S, #10
137 sqxtun \r2\().8B, v5.8H
140 function put_h264_qpel16_h_lowpass_neon_packed
144 bl put_h264_qpel8_h_lowpass_neon
145 sub x1, x1, x2, lsl #4
149 b put_h264_qpel8_h_lowpass_neon
152 .macro h264_qpel_h_lowpass type
153 function \type\()_h264_qpel16_h_lowpass_neon
156 bl \type\()_h264_qpel8_h_lowpass_neon
157 sub x0, x0, x3, lsl #4
158 sub x1, x1, x2, lsl #4
165 function \type\()_h264_qpel8_h_lowpass_neon
166 1: ld1 {v28.8B, v29.8B}, [x1], x2
167 ld1 {v16.8B, v17.8B}, [x1], x2
169 lowpass_8 v28, v29, v16, v17, v28, v16
171 ld1 {v2.8B}, [x0], x3
172 urhadd v28.8B, v28.8B, v2.8B
174 urhadd v16.8B, v16.8B, v3.8B
177 st1 {v28.8B}, [x0], x3
178 st1 {v16.8B}, [x0], x3
184 h264_qpel_h_lowpass put
185 h264_qpel_h_lowpass avg
187 .macro h264_qpel_h_lowpass_l2 type
188 function \type\()_h264_qpel16_h_lowpass_l2_neon
191 bl \type\()_h264_qpel8_h_lowpass_l2_neon
192 sub x0, x0, x2, lsl #4
193 sub x1, x1, x2, lsl #4
194 sub x3, x3, x2, lsl #4
202 function \type\()_h264_qpel8_h_lowpass_l2_neon
203 1: ld1 {v26.8B, v27.8B}, [x1], x2
204 ld1 {v16.8B, v17.8B}, [x1], x2
205 ld1 {v28.8B}, [x3], x2
206 ld1 {v29.8B}, [x3], x2
208 lowpass_8 v26, v27, v16, v17, v26, v27
209 urhadd v26.8B, v26.8B, v28.8B
210 urhadd v27.8B, v27.8B, v29.8B
212 ld1 {v2.8B}, [x0], x2
213 urhadd v26.8B, v26.8B, v2.8B
215 urhadd v27.8B, v27.8B, v3.8B
218 st1 {v26.8B}, [x0], x2
219 st1 {v27.8B}, [x0], x2
225 h264_qpel_h_lowpass_l2 put
226 h264_qpel_h_lowpass_l2 avg
228 function put_h264_qpel16_v_lowpass_neon_packed
231 bl put_h264_qpel8_v_lowpass_neon
232 sub x1, x1, x3, lsl #2
233 bl put_h264_qpel8_v_lowpass_neon
234 sub x1, x1, x3, lsl #4
235 sub x1, x1, x3, lsl #2
237 bl put_h264_qpel8_v_lowpass_neon
238 sub x1, x1, x3, lsl #2
240 b put_h264_qpel8_v_lowpass_neon
243 .macro h264_qpel_v_lowpass type
244 function \type\()_h264_qpel16_v_lowpass_neon
246 bl \type\()_h264_qpel8_v_lowpass_neon
247 sub x1, x1, x3, lsl #2
248 bl \type\()_h264_qpel8_v_lowpass_neon
249 sub x0, x0, x2, lsl #4
251 sub x1, x1, x3, lsl #4
252 sub x1, x1, x3, lsl #2
254 bl \type\()_h264_qpel8_v_lowpass_neon
255 sub x1, x1, x3, lsl #2
259 function \type\()_h264_qpel8_v_lowpass_neon
260 ld1 {v16.8B}, [x1], x3
261 ld1 {v18.8B}, [x1], x3
262 ld1 {v20.8B}, [x1], x3
263 ld1 {v22.8B}, [x1], x3
264 ld1 {v24.8B}, [x1], x3
265 ld1 {v26.8B}, [x1], x3
266 ld1 {v28.8B}, [x1], x3
267 ld1 {v30.8B}, [x1], x3
268 ld1 {v17.8B}, [x1], x3
269 ld1 {v19.8B}, [x1], x3
270 ld1 {v21.8B}, [x1], x3
271 ld1 {v23.8B}, [x1], x3
274 transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
275 transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
276 lowpass_8 v16, v17, v18, v19, v16, v17
277 lowpass_8 v20, v21, v22, v23, v18, v19
278 lowpass_8 v24, v25, v26, v27, v20, v21
279 lowpass_8 v28, v29, v30, v31, v22, v23
280 transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
283 ld1 {v24.8B}, [x0], x2
284 urhadd v16.8B, v16.8B, v24.8B
285 ld1 {v25.8B}, [x0], x2
286 urhadd v17.8B, v17.8B, v25.8B
287 ld1 {v26.8B}, [x0], x2
288 urhadd v18.8B, v18.8B, v26.8B
289 ld1 {v27.8B}, [x0], x2
290 urhadd v19.8B, v19.8B, v27.8B
291 ld1 {v28.8B}, [x0], x2
292 urhadd v20.8B, v20.8B, v28.8B
293 ld1 {v29.8B}, [x0], x2
294 urhadd v21.8B, v21.8B, v29.8B
295 ld1 {v30.8B}, [x0], x2
296 urhadd v22.8B, v22.8B, v30.8B
297 ld1 {v31.8B}, [x0], x2
298 urhadd v23.8B, v23.8B, v31.8B
299 sub x0, x0, x2, lsl #3
302 st1 {v16.8B}, [x0], x2
303 st1 {v17.8B}, [x0], x2
304 st1 {v18.8B}, [x0], x2
305 st1 {v19.8B}, [x0], x2
306 st1 {v20.8B}, [x0], x2
307 st1 {v21.8B}, [x0], x2
308 st1 {v22.8B}, [x0], x2
309 st1 {v23.8B}, [x0], x2
315 h264_qpel_v_lowpass put
316 h264_qpel_v_lowpass avg
318 .macro h264_qpel_v_lowpass_l2 type
319 function \type\()_h264_qpel16_v_lowpass_l2_neon
321 bl \type\()_h264_qpel8_v_lowpass_l2_neon
322 sub x1, x1, x3, lsl #2
323 bl \type\()_h264_qpel8_v_lowpass_l2_neon
324 sub x0, x0, x3, lsl #4
325 sub x12, x12, x2, lsl #4
328 sub x1, x1, x3, lsl #4
329 sub x1, x1, x3, lsl #2
331 bl \type\()_h264_qpel8_v_lowpass_l2_neon
332 sub x1, x1, x3, lsl #2
336 function \type\()_h264_qpel8_v_lowpass_l2_neon
337 ld1 {v16.8B}, [x1], x3
338 ld1 {v18.8B}, [x1], x3
339 ld1 {v20.8B}, [x1], x3
340 ld1 {v22.8B}, [x1], x3
341 ld1 {v24.8B}, [x1], x3
342 ld1 {v26.8B}, [x1], x3
343 ld1 {v28.8B}, [x1], x3
344 ld1 {v30.8B}, [x1], x3
345 ld1 {v17.8B}, [x1], x3
346 ld1 {v19.8B}, [x1], x3
347 ld1 {v21.8B}, [x1], x3
348 ld1 {v23.8B}, [x1], x3
351 transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
352 transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
353 lowpass_8 v16, v17, v18, v19, v16, v17
354 lowpass_8 v20, v21, v22, v23, v18, v19
355 lowpass_8 v24, v25, v26, v27, v20, v21
356 lowpass_8 v28, v29, v30, v31, v22, v23
357 transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
359 ld1 {v24.8B}, [x12], x2
360 ld1 {v25.8B}, [x12], x2
361 ld1 {v26.8B}, [x12], x2
362 ld1 {v27.8B}, [x12], x2
363 ld1 {v28.8B}, [x12], x2
364 urhadd v16.8B, v24.8B, v16.8B
365 urhadd v17.8B, v25.8B, v17.8B
366 ld1 {v29.8B}, [x12], x2
367 urhadd v18.8B, v26.8B, v18.8B
368 urhadd v19.8B, v27.8B, v19.8B
369 ld1 {v30.8B}, [x12], x2
370 urhadd v20.8B, v28.8B, v20.8B
371 urhadd v21.8B, v29.8B, v21.8B
372 ld1 {v31.8B}, [x12], x2
373 urhadd v22.8B, v30.8B, v22.8B
374 urhadd v23.8B, v31.8B, v23.8B
377 ld1 {v24.8B}, [x0], x3
378 urhadd v16.8B, v16.8B, v24.8B
379 ld1 {v25.8B}, [x0], x3
380 urhadd v17.8B, v17.8B, v25.8B
381 ld1 {v26.8B}, [x0], x3
382 urhadd v18.8B, v18.8B, v26.8B
383 ld1 {v27.8B}, [x0], x3
384 urhadd v19.8B, v19.8B, v27.8B
385 ld1 {v28.8B}, [x0], x3
386 urhadd v20.8B, v20.8B, v28.8B
387 ld1 {v29.8B}, [x0], x3
388 urhadd v21.8B, v21.8B, v29.8B
389 ld1 {v30.8B}, [x0], x3
390 urhadd v22.8B, v22.8B, v30.8B
391 ld1 {v31.8B}, [x0], x3
392 urhadd v23.8B, v23.8B, v31.8B
393 sub x0, x0, x3, lsl #3
396 st1 {v16.8B}, [x0], x3
397 st1 {v17.8B}, [x0], x3
398 st1 {v18.8B}, [x0], x3
399 st1 {v19.8B}, [x0], x3
400 st1 {v20.8B}, [x0], x3
401 st1 {v21.8B}, [x0], x3
402 st1 {v22.8B}, [x0], x3
403 st1 {v23.8B}, [x0], x3
409 h264_qpel_v_lowpass_l2 put
410 h264_qpel_v_lowpass_l2 avg
412 function put_h264_qpel8_hv_lowpass_neon_top
414 ld1 {v16.8H}, [x1], x3
415 ld1 {v17.8H}, [x1], x3
416 ld1 {v18.8H}, [x1], x3
417 ld1 {v19.8H}, [x1], x3
418 ld1 {v20.8H}, [x1], x3
419 ld1 {v21.8H}, [x1], x3
420 ld1 {v22.8H}, [x1], x3
421 ld1 {v23.8H}, [x1], x3
422 ld1 {v24.8H}, [x1], x3
423 ld1 {v25.8H}, [x1], x3
424 ld1 {v26.8H}, [x1], x3
425 ld1 {v27.8H}, [x1], x3
435 transpose_8x8H v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
436 transpose_8x8H v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
438 lowpass_8.16 v16, v24, v16
439 lowpass_8.16 v17, v25, v17
441 lowpass_8.16 v18, v26, v18
442 lowpass_8.16 v19, v27, v19
444 lowpass_8.16 v20, v28, v20
445 lowpass_8.16 v21, v29, v21
447 lowpass_8.16 v22, v30, v22
448 lowpass_8.16 v23, v31, v23
450 transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
455 .macro h264_qpel8_hv_lowpass type
456 function \type\()_h264_qpel8_hv_lowpass_neon
458 bl put_h264_qpel8_hv_lowpass_neon_top
460 ld1 {v0.8B}, [x0], x2
461 urhadd v16.8B, v16.8B, v0.8B
462 ld1 {v1.8B}, [x0], x2
463 urhadd v17.8B, v17.8B, v1.8B
464 ld1 {v2.8B}, [x0], x2
465 urhadd v18.8B, v18.8B, v2.8B
466 ld1 {v3.8B}, [x0], x2
467 urhadd v19.8B, v19.8B, v3.8B
468 ld1 {v4.8B}, [x0], x2
469 urhadd v20.8B, v20.8B, v4.8B
470 ld1 {v5.8B}, [x0], x2
471 urhadd v21.8B, v21.8B, v5.8B
472 ld1 {v6.8B}, [x0], x2
473 urhadd v22.8B, v22.8B, v6.8B
474 ld1 {v7.8B}, [x0], x2
475 urhadd v23.8B, v23.8B, v7.8B
476 sub x0, x0, x2, lsl #3
479 st1 {v16.8B}, [x0], x2
480 st1 {v17.8B}, [x0], x2
481 st1 {v18.8B}, [x0], x2
482 st1 {v19.8B}, [x0], x2
483 st1 {v20.8B}, [x0], x2
484 st1 {v21.8B}, [x0], x2
485 st1 {v22.8B}, [x0], x2
486 st1 {v23.8B}, [x0], x2
492 h264_qpel8_hv_lowpass put
493 h264_qpel8_hv_lowpass avg
495 .macro h264_qpel8_hv_lowpass_l2 type
496 function \type\()_h264_qpel8_hv_lowpass_l2_neon
498 bl put_h264_qpel8_hv_lowpass_neon_top
500 ld1 {v0.8B, v1.8B}, [x2], #16
501 ld1 {v2.8B, v3.8B}, [x2], #16
502 urhadd v0.8B, v0.8B, v16.8B
503 urhadd v1.8B, v1.8B, v17.8B
504 ld1 {v4.8B, v5.8B}, [x2], #16
505 urhadd v2.8B, v2.8B, v18.8B
506 urhadd v3.8B, v3.8B, v19.8B
507 ld1 {v6.8B, v7.8B}, [x2], #16
508 urhadd v4.8B, v4.8B, v20.8B
509 urhadd v5.8B, v5.8B, v21.8B
510 urhadd v6.8B, v6.8B, v22.8B
511 urhadd v7.8B, v7.8B, v23.8B
513 ld1 {v16.8B}, [x0], x3
514 urhadd v0.8B, v0.8B, v16.8B
515 ld1 {v17.8B}, [x0], x3
516 urhadd v1.8B, v1.8B, v17.8B
517 ld1 {v18.8B}, [x0], x3
518 urhadd v2.8B, v2.8B, v18.8B
519 ld1 {v19.8B}, [x0], x3
520 urhadd v3.8B, v3.8B, v19.8B
521 ld1 {v20.8B}, [x0], x3
522 urhadd v4.8B, v4.8B, v20.8B
523 ld1 {v21.8B}, [x0], x3
524 urhadd v5.8B, v5.8B, v21.8B
525 ld1 {v22.8B}, [x0], x3
526 urhadd v6.8B, v6.8B, v22.8B
527 ld1 {v23.8B}, [x0], x3
528 urhadd v7.8B, v7.8B, v23.8B
529 sub x0, x0, x3, lsl #3
531 st1 {v0.8B}, [x0], x3
532 st1 {v1.8B}, [x0], x3
533 st1 {v2.8B}, [x0], x3
534 st1 {v3.8B}, [x0], x3
535 st1 {v4.8B}, [x0], x3
536 st1 {v5.8B}, [x0], x3
537 st1 {v6.8B}, [x0], x3
538 st1 {v7.8B}, [x0], x3
544 h264_qpel8_hv_lowpass_l2 put
545 h264_qpel8_hv_lowpass_l2 avg
547 .macro h264_qpel16_hv type
548 function \type\()_h264_qpel16_hv_lowpass_neon
550 bl \type\()_h264_qpel8_hv_lowpass_neon
551 sub x1, x1, x3, lsl #2
552 bl \type\()_h264_qpel8_hv_lowpass_neon
553 sub x1, x1, x3, lsl #4
554 sub x1, x1, x3, lsl #2
556 sub x0, x0, x2, lsl #4
558 bl \type\()_h264_qpel8_hv_lowpass_neon
559 sub x1, x1, x3, lsl #2
561 b \type\()_h264_qpel8_hv_lowpass_neon
564 function \type\()_h264_qpel16_hv_lowpass_l2_neon
567 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
568 sub x1, x1, x3, lsl #2
569 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
570 sub x1, x1, x3, lsl #4
571 sub x1, x1, x3, lsl #2
573 sub x0, x0, x3, lsl #4
575 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
576 sub x1, x1, x3, lsl #2
578 b \type\()_h264_qpel8_hv_lowpass_l2_neon
585 .macro h264_qpel8 type
586 function ff_\type\()_h264_qpel8_mc10_neon, export=1
591 b \type\()_h264_qpel8_h_lowpass_l2_neon
594 function ff_\type\()_h264_qpel8_mc20_neon, export=1
599 b \type\()_h264_qpel8_h_lowpass_neon
602 function ff_\type\()_h264_qpel8_mc30_neon, export=1
607 b \type\()_h264_qpel8_h_lowpass_l2_neon
610 function ff_\type\()_h264_qpel8_mc01_neon, export=1
613 \type\()_h264_qpel8_mc01:
616 sub x1, x1, x2, lsl #1
617 bl \type\()_h264_qpel8_v_lowpass_l2_neon
621 function ff_\type\()_h264_qpel8_mc11_neon, export=1
625 \type\()_h264_qpel8_mc11:
633 bl put_h264_qpel8_h_lowpass_neon
637 sub x1, x9, x2, lsl #1
639 bl \type\()_h264_qpel8_v_lowpass_l2_neon
644 function ff_\type\()_h264_qpel8_mc21_neon, export=1
648 \type\()_h264_qpel8_mc21:
651 sub sp, sp, #(8*8+16*12)
656 bl put_h264_qpel8_h_lowpass_neon
659 sub x1, x9, x2, lsl #1
663 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
668 function ff_\type\()_h264_qpel8_mc31_neon, export=1
674 b \type\()_h264_qpel8_mc11
677 function ff_\type\()_h264_qpel8_mc02_neon, export=1
680 sub x1, x1, x2, lsl #1
682 bl \type\()_h264_qpel8_v_lowpass_neon
686 function ff_\type\()_h264_qpel8_mc12_neon, export=1
690 \type\()_h264_qpel8_mc12:
693 sub sp, sp, #(8*8+16*12)
694 sub x1, x1, x2, lsl #1
698 bl put_h264_qpel8_v_lowpass_neon
701 sub x1, x9, x3, lsl #1
704 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
709 function ff_\type\()_h264_qpel8_mc22_neon, export=1
712 sub x1, x1, x2, lsl #1
715 bl \type\()_h264_qpel8_hv_lowpass_neon
720 function ff_\type\()_h264_qpel8_mc32_neon, export=1
725 b \type\()_h264_qpel8_mc12
728 function ff_\type\()_h264_qpel8_mc03_neon, export=1
731 b \type\()_h264_qpel8_mc01
734 function ff_\type\()_h264_qpel8_mc13_neon, export=1
739 b \type\()_h264_qpel8_mc11
742 function ff_\type\()_h264_qpel8_mc23_neon, export=1
747 b \type\()_h264_qpel8_mc21
750 function ff_\type\()_h264_qpel8_mc33_neon, export=1
757 b \type\()_h264_qpel8_mc11
764 .macro h264_qpel16 type
765 function ff_\type\()_h264_qpel16_mc10_neon, export=1
769 b \type\()_h264_qpel16_h_lowpass_l2_neon
772 function ff_\type\()_h264_qpel16_mc20_neon, export=1
776 b \type\()_h264_qpel16_h_lowpass_neon
779 function ff_\type\()_h264_qpel16_mc30_neon, export=1
783 b \type\()_h264_qpel16_h_lowpass_l2_neon
786 function ff_\type\()_h264_qpel16_mc01_neon, export=1
789 \type\()_h264_qpel16_mc01:
792 sub x1, x1, x2, lsl #1
793 bl \type\()_h264_qpel16_v_lowpass_l2_neon
797 function ff_\type\()_h264_qpel16_mc11_neon, export=1
801 \type\()_h264_qpel16_mc11:
808 bl put_h264_qpel16_h_lowpass_neon
812 sub x1, x9, x2, lsl #1
814 bl \type\()_h264_qpel16_v_lowpass_l2_neon
819 function ff_\type\()_h264_qpel16_mc21_neon, export=1
823 \type\()_h264_qpel16_mc21:
826 sub sp, sp, #(16*16+16*12)
829 bl put_h264_qpel16_h_lowpass_neon_packed
832 sub x1, x9, x2, lsl #1
835 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
840 function ff_\type\()_h264_qpel16_mc31_neon, export=1
846 b \type\()_h264_qpel16_mc11
849 function ff_\type\()_h264_qpel16_mc02_neon, export=1
852 sub x1, x1, x2, lsl #1
854 bl \type\()_h264_qpel16_v_lowpass_neon
858 function ff_\type\()_h264_qpel16_mc12_neon, export=1
862 \type\()_h264_qpel16_mc12:
865 sub sp, sp, #(16*16+16*12)
866 sub x1, x1, x2, lsl #1
869 bl put_h264_qpel16_v_lowpass_neon_packed
872 sub x1, x9, x3, lsl #1
875 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
880 function ff_\type\()_h264_qpel16_mc22_neon, export=1
884 sub x1, x1, x2, lsl #1
887 bl \type\()_h264_qpel16_hv_lowpass_neon
888 mov sp, x11 // restore stack
892 function ff_\type\()_h264_qpel16_mc32_neon, export=1
897 b \type\()_h264_qpel16_mc12
900 function ff_\type\()_h264_qpel16_mc03_neon, export=1
903 b \type\()_h264_qpel16_mc01
906 function ff_\type\()_h264_qpel16_mc13_neon, export=1
911 b \type\()_h264_qpel16_mc11
914 function ff_\type\()_h264_qpel16_mc23_neon, export=1
919 b \type\()_h264_qpel16_mc21
922 function ff_\type\()_h264_qpel16_mc33_neon, export=1
929 b \type\()_h264_qpel16_mc11