2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
26 .macro lowpass_const r
32 .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
40 vext.8 d2, \r0, \r1, #2
41 vext.8 d3, \r0, \r1, #3
43 vext.8 d4, \r0, \r1, #1
44 vext.8 d5, \r0, \r1, #4
46 vext.8 d30, \r0, \r1, #5
48 vext.8 d18, \r2, \r3, #2
49 vmla.i16 t0, q1, d6[1]
50 vext.8 d19, \r2, \r3, #3
52 vext.8 d20, \r2, \r3, #1
53 vmls.i16 t0, q2, d6[0]
54 vext.8 d21, \r2, \r3, #4
55 vaddl.u8 q10, d20, d21
56 vext.8 d31, \r2, \r3, #5
58 vmla.i16 t1, q9, d6[1]
59 vmls.i16 t1, q10, d6[0]
61 vqrshrun.s16 \d0, t0, #5
62 vqrshrun.s16 \d1, t1, #5
68 .macro lowpass_8_1 r0, r1, d0, narrow=1
74 vext.8 d2, \r0, \r1, #2
75 vext.8 d3, \r0, \r1, #3
77 vext.8 d4, \r0, \r1, #1
78 vext.8 d5, \r0, \r1, #4
80 vext.8 d30, \r0, \r1, #5
82 vmla.i16 t0, q1, d6[1]
83 vmls.i16 t0, q2, d6[0]
85 vqrshrun.s16 \d0, t0, #5
90 .macro lowpass_8.16 r0, r1, l0, h0, l1, h1, d
91 vext.16 q1, \r0, \r1, #2
92 vext.16 q0, \r0, \r1, #3
94 vext.16 q2, \r0, \r1, #1
96 vext.16 q3, \r0, \r1, #4
98 vext.16 \r1, \r0, \r1, #5
100 vaddl.s16 q0, \h0, \h1
101 vaddl.s16 q8, \l0, \l1
105 vshl.i32 q15, q10, #2
107 vadd.i32 q10, q10, q15
121 vrshrn.s32 d18, q9, #10
122 vrshrn.s32 d19, q1, #10
127 function put_h264_qpel16_h_lowpass_neon_packed
131 bl put_h264_qpel8_h_lowpass_neon
132 sub r1, r1, r2, lsl #4
136 b put_h264_qpel8_h_lowpass_neon
139 .macro h264_qpel_h_lowpass type
140 function \type\()_h264_qpel16_h_lowpass_neon
143 bl \type\()_h264_qpel8_h_lowpass_neon
144 sub r0, r0, r3, lsl #4
145 sub r1, r1, r2, lsl #4
152 function \type\()_h264_qpel8_h_lowpass_neon
153 1: vld1.8 {d0, d1}, [r1], r2
154 vld1.8 {d16,d17}, [r1], r2
156 lowpass_8 d0, d1, d16, d17, d0, d16
158 vld1.8 {d2}, [r0,:64], r3
160 vld1.8 {d3}, [r0,:64]
161 vrhadd.u8 d16, d16, d3
164 vst1.8 {d0}, [r0,:64], r3
165 vst1.8 {d16}, [r0,:64], r3
171 h264_qpel_h_lowpass put
172 h264_qpel_h_lowpass avg
174 .macro h264_qpel_h_lowpass_l2 type
175 function \type\()_h264_qpel16_h_lowpass_l2_neon
178 bl \type\()_h264_qpel8_h_lowpass_l2_neon
179 sub r0, r0, r2, lsl #4
180 sub r1, r1, r2, lsl #4
181 sub r3, r3, r2, lsl #4
189 function \type\()_h264_qpel8_h_lowpass_l2_neon
190 1: vld1.8 {d0, d1}, [r1], r2
191 vld1.8 {d16,d17}, [r1], r2
192 vld1.8 {d28}, [r3], r2
193 vld1.8 {d29}, [r3], r2
195 lowpass_8 d0, d1, d16, d17, d0, d1
196 vrhadd.u8 q0, q0, q14
198 vld1.8 {d2}, [r0,:64], r2
200 vld1.8 {d3}, [r0,:64]
204 vst1.8 {d0}, [r0,:64], r2
205 vst1.8 {d1}, [r0,:64], r2
211 h264_qpel_h_lowpass_l2 put
212 h264_qpel_h_lowpass_l2 avg
214 function put_h264_qpel16_v_lowpass_neon_packed
217 bl put_h264_qpel8_v_lowpass_neon
218 sub r1, r1, r3, lsl #2
219 bl put_h264_qpel8_v_lowpass_neon
220 sub r1, r1, r3, lsl #4
221 sub r1, r1, r3, lsl #2
223 bl put_h264_qpel8_v_lowpass_neon
224 sub r1, r1, r3, lsl #2
226 b put_h264_qpel8_v_lowpass_neon
229 .macro h264_qpel_v_lowpass type
230 function \type\()_h264_qpel16_v_lowpass_neon
232 bl \type\()_h264_qpel8_v_lowpass_neon
233 sub r1, r1, r3, lsl #2
234 bl \type\()_h264_qpel8_v_lowpass_neon
235 sub r0, r0, r2, lsl #4
237 sub r1, r1, r3, lsl #4
238 sub r1, r1, r3, lsl #2
240 bl \type\()_h264_qpel8_v_lowpass_neon
241 sub r1, r1, r3, lsl #2
245 function \type\()_h264_qpel8_v_lowpass_neon
246 vld1.8 {d8}, [r1], r3
247 vld1.8 {d10}, [r1], r3
248 vld1.8 {d12}, [r1], r3
249 vld1.8 {d14}, [r1], r3
250 vld1.8 {d22}, [r1], r3
251 vld1.8 {d24}, [r1], r3
252 vld1.8 {d26}, [r1], r3
253 vld1.8 {d28}, [r1], r3
254 vld1.8 {d9}, [r1], r3
255 vld1.8 {d11}, [r1], r3
256 vld1.8 {d13}, [r1], r3
257 vld1.8 {d15}, [r1], r3
260 transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
261 lowpass_8 d8, d9, d10, d11, d8, d10
262 lowpass_8 d12, d13, d14, d15, d12, d14
263 lowpass_8 d22, d23, d24, d25, d22, d24
264 lowpass_8 d26, d27, d28, d29, d26, d28
265 transpose_8x8 d8, d10, d12, d14, d22, d24, d26, d28
268 vld1.8 {d9}, [r0,:64], r2
270 vld1.8 {d11}, [r0,:64], r2
271 vrhadd.u8 d10, d10, d11
272 vld1.8 {d13}, [r0,:64], r2
273 vrhadd.u8 d12, d12, d13
274 vld1.8 {d15}, [r0,:64], r2
275 vrhadd.u8 d14, d14, d15
276 vld1.8 {d23}, [r0,:64], r2
277 vrhadd.u8 d22, d22, d23
278 vld1.8 {d25}, [r0,:64], r2
279 vrhadd.u8 d24, d24, d25
280 vld1.8 {d27}, [r0,:64], r2
281 vrhadd.u8 d26, d26, d27
282 vld1.8 {d29}, [r0,:64], r2
283 vrhadd.u8 d28, d28, d29
284 sub r0, r0, r2, lsl #3
287 vst1.8 {d8}, [r0,:64], r2
288 vst1.8 {d10}, [r0,:64], r2
289 vst1.8 {d12}, [r0,:64], r2
290 vst1.8 {d14}, [r0,:64], r2
291 vst1.8 {d22}, [r0,:64], r2
292 vst1.8 {d24}, [r0,:64], r2
293 vst1.8 {d26}, [r0,:64], r2
294 vst1.8 {d28}, [r0,:64], r2
300 h264_qpel_v_lowpass put
301 h264_qpel_v_lowpass avg
303 .macro h264_qpel_v_lowpass_l2 type
304 function \type\()_h264_qpel16_v_lowpass_l2_neon
306 bl \type\()_h264_qpel8_v_lowpass_l2_neon
307 sub r1, r1, r3, lsl #2
308 bl \type\()_h264_qpel8_v_lowpass_l2_neon
309 sub r0, r0, r3, lsl #4
310 sub r12, r12, r2, lsl #4
313 sub r1, r1, r3, lsl #4
314 sub r1, r1, r3, lsl #2
316 bl \type\()_h264_qpel8_v_lowpass_l2_neon
317 sub r1, r1, r3, lsl #2
321 function \type\()_h264_qpel8_v_lowpass_l2_neon
322 vld1.8 {d8}, [r1], r3
323 vld1.8 {d10}, [r1], r3
324 vld1.8 {d12}, [r1], r3
325 vld1.8 {d14}, [r1], r3
326 vld1.8 {d22}, [r1], r3
327 vld1.8 {d24}, [r1], r3
328 vld1.8 {d26}, [r1], r3
329 vld1.8 {d28}, [r1], r3
330 vld1.8 {d9}, [r1], r3
331 vld1.8 {d11}, [r1], r3
332 vld1.8 {d13}, [r1], r3
333 vld1.8 {d15}, [r1], r3
336 transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
337 lowpass_8 d8, d9, d10, d11, d8, d9
338 lowpass_8 d12, d13, d14, d15, d12, d13
339 lowpass_8 d22, d23, d24, d25, d22, d23
340 lowpass_8 d26, d27, d28, d29, d26, d27
341 transpose_8x8 d8, d9, d12, d13, d22, d23, d26, d27
343 vld1.8 {d0}, [r12], r2
344 vld1.8 {d1}, [r12], r2
345 vld1.8 {d2}, [r12], r2
346 vld1.8 {d3}, [r12], r2
347 vld1.8 {d4}, [r12], r2
349 vld1.8 {d5}, [r12], r2
351 vld1.8 {d10}, [r12], r2
352 vrhadd.u8 q2, q2, q11
353 vld1.8 {d11}, [r12], r2
354 vrhadd.u8 q5, q5, q13
357 vld1.8 {d16}, [r0,:64], r3
358 vrhadd.u8 d0, d0, d16
359 vld1.8 {d17}, [r0,:64], r3
360 vrhadd.u8 d1, d1, d17
361 vld1.8 {d16}, [r0,:64], r3
362 vrhadd.u8 d2, d2, d16
363 vld1.8 {d17}, [r0,:64], r3
364 vrhadd.u8 d3, d3, d17
365 vld1.8 {d16}, [r0,:64], r3
366 vrhadd.u8 d4, d4, d16
367 vld1.8 {d17}, [r0,:64], r3
368 vrhadd.u8 d5, d5, d17
369 vld1.8 {d16}, [r0,:64], r3
370 vrhadd.u8 d10, d10, d16
371 vld1.8 {d17}, [r0,:64], r3
372 vrhadd.u8 d11, d11, d17
373 sub r0, r0, r3, lsl #3
376 vst1.8 {d0}, [r0,:64], r3
377 vst1.8 {d1}, [r0,:64], r3
378 vst1.8 {d2}, [r0,:64], r3
379 vst1.8 {d3}, [r0,:64], r3
380 vst1.8 {d4}, [r0,:64], r3
381 vst1.8 {d5}, [r0,:64], r3
382 vst1.8 {d10}, [r0,:64], r3
383 vst1.8 {d11}, [r0,:64], r3
389 h264_qpel_v_lowpass_l2 put
390 h264_qpel_v_lowpass_l2 avg
392 function put_h264_qpel8_hv_lowpass_neon_top
395 1: vld1.8 {d0, d1}, [r1], r3
396 vld1.8 {d16,d17}, [r1], r3
398 lowpass_8 d0, d1, d16, d17, q11, q12, narrow=0
399 vst1.8 {d22-d25}, [r4,:128]!
402 vld1.8 {d0, d1}, [r1]
403 lowpass_8_1 d0, d1, q12, narrow=0
407 vld1.8 {d30,d31}, [r4,:128], r12
408 vld1.8 {d20,d21}, [r4,:128], r12
409 vld1.8 {d18,d19}, [r4,:128], r12
410 vld1.8 {d16,d17}, [r4,:128], r12
411 vld1.8 {d14,d15}, [r4,:128], r12
412 vld1.8 {d12,d13}, [r4,:128], r12
413 vld1.8 {d10,d11}, [r4,:128], r12
414 vld1.8 {d8, d9}, [r4,:128], r12
415 vld1.8 {d6, d7}, [r4,:128], r12
416 vld1.8 {d4, d5}, [r4,:128], r12
417 vld1.8 {d2, d3}, [r4,:128], r12
418 vld1.8 {d0, d1}, [r4,:128]
420 swap4 d1, d3, d5, d7, d8, d10, d12, d14
421 transpose16_4x4 q0, q1, q2, q3, q4, q5, q6, q7
423 swap4 d17, d19, d21, d31, d24, d26, d28, d22
424 transpose16_4x4 q8, q9, q10, q15, q12, q13, q14, q11
426 vst1.8 {d30,d31}, [r4,:128]!
427 vst1.8 {d6, d7}, [r4,:128]!
428 vst1.8 {d20,d21}, [r4,:128]!
429 vst1.8 {d4, d5}, [r4,:128]!
430 vst1.8 {d18,d19}, [r4,:128]!
431 vst1.8 {d2, d3}, [r4,:128]!
432 vst1.8 {d16,d17}, [r4,:128]!
433 vst1.8 {d0, d1}, [r4,:128]
435 lowpass_8.16 q4, q12, d8, d9, d24, d25, d8
436 lowpass_8.16 q5, q13, d10, d11, d26, d27, d9
437 lowpass_8.16 q6, q14, d12, d13, d28, d29, d10
438 lowpass_8.16 q7, q11, d14, d15, d22, d23, d11
440 vld1.8 {d16,d17}, [r4,:128], r12
441 vld1.8 {d30,d31}, [r4,:128], r12
442 lowpass_8.16 q8, q15, d16, d17, d30, d31, d12
443 vld1.8 {d16,d17}, [r4,:128], r12
444 vld1.8 {d30,d31}, [r4,:128], r12
445 lowpass_8.16 q8, q15, d16, d17, d30, d31, d13
446 vld1.8 {d16,d17}, [r4,:128], r12
447 vld1.8 {d30,d31}, [r4,:128], r12
448 lowpass_8.16 q8, q15, d16, d17, d30, d31, d14
449 vld1.8 {d16,d17}, [r4,:128], r12
450 vld1.8 {d30,d31}, [r4,:128]
451 lowpass_8.16 q8, q15, d16, d17, d30, d31, d15
453 transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
458 .macro h264_qpel8_hv_lowpass type
459 function \type\()_h264_qpel8_hv_lowpass_neon
461 bl put_h264_qpel8_hv_lowpass_neon_top
463 vld1.8 {d0}, [r0,:64], r2
464 vrhadd.u8 d12, d12, d0
465 vld1.8 {d1}, [r0,:64], r2
466 vrhadd.u8 d13, d13, d1
467 vld1.8 {d2}, [r0,:64], r2
468 vrhadd.u8 d14, d14, d2
469 vld1.8 {d3}, [r0,:64], r2
470 vrhadd.u8 d15, d15, d3
471 vld1.8 {d4}, [r0,:64], r2
473 vld1.8 {d5}, [r0,:64], r2
475 vld1.8 {d6}, [r0,:64], r2
476 vrhadd.u8 d10, d10, d6
477 vld1.8 {d7}, [r0,:64], r2
478 vrhadd.u8 d11, d11, d7
479 sub r0, r0, r2, lsl #3
482 vst1.8 {d12}, [r0,:64], r2
483 vst1.8 {d13}, [r0,:64], r2
484 vst1.8 {d14}, [r0,:64], r2
485 vst1.8 {d15}, [r0,:64], r2
486 vst1.8 {d8}, [r0,:64], r2
487 vst1.8 {d9}, [r0,:64], r2
488 vst1.8 {d10}, [r0,:64], r2
489 vst1.8 {d11}, [r0,:64], r2
496 h264_qpel8_hv_lowpass put
497 h264_qpel8_hv_lowpass avg
499 .macro h264_qpel8_hv_lowpass_l2 type
500 function \type\()_h264_qpel8_hv_lowpass_l2_neon
502 bl put_h264_qpel8_hv_lowpass_neon_top
504 vld1.8 {d0, d1}, [r2,:128]!
505 vld1.8 {d2, d3}, [r2,:128]!
507 vld1.8 {d4, d5}, [r2,:128]!
509 vld1.8 {d6, d7}, [r2,:128]!
513 vld1.8 {d16}, [r0,:64], r3
514 vrhadd.u8 d0, d0, d16
515 vld1.8 {d17}, [r0,:64], r3
516 vrhadd.u8 d1, d1, d17
517 vld1.8 {d18}, [r0,:64], r3
518 vrhadd.u8 d2, d2, d18
519 vld1.8 {d19}, [r0,:64], r3
520 vrhadd.u8 d3, d3, d19
521 vld1.8 {d20}, [r0,:64], r3
522 vrhadd.u8 d4, d4, d20
523 vld1.8 {d21}, [r0,:64], r3
524 vrhadd.u8 d5, d5, d21
525 vld1.8 {d22}, [r0,:64], r3
526 vrhadd.u8 d6, d6, d22
527 vld1.8 {d23}, [r0,:64], r3
528 vrhadd.u8 d7, d7, d23
529 sub r0, r0, r3, lsl #3
531 vst1.8 {d0}, [r0,:64], r3
532 vst1.8 {d1}, [r0,:64], r3
533 vst1.8 {d2}, [r0,:64], r3
534 vst1.8 {d3}, [r0,:64], r3
535 vst1.8 {d4}, [r0,:64], r3
536 vst1.8 {d5}, [r0,:64], r3
537 vst1.8 {d6}, [r0,:64], r3
538 vst1.8 {d7}, [r0,:64], r3
545 h264_qpel8_hv_lowpass_l2 put
546 h264_qpel8_hv_lowpass_l2 avg
548 .macro h264_qpel16_hv type
549 function \type\()_h264_qpel16_hv_lowpass_neon
551 bl \type\()_h264_qpel8_hv_lowpass_neon
552 sub r1, r1, r3, lsl #2
553 bl \type\()_h264_qpel8_hv_lowpass_neon
554 sub r1, r1, r3, lsl #4
555 sub r1, r1, r3, lsl #2
557 sub r0, r0, r2, lsl #4
559 bl \type\()_h264_qpel8_hv_lowpass_neon
560 sub r1, r1, r3, lsl #2
562 b \type\()_h264_qpel8_hv_lowpass_neon
565 function \type\()_h264_qpel16_hv_lowpass_l2_neon
568 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
569 sub r1, r1, r3, lsl #2
570 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
571 sub r1, r1, r3, lsl #4
572 sub r1, r1, r3, lsl #2
574 sub r0, r0, r3, lsl #4
576 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
577 sub r1, r1, r3, lsl #2
579 b \type\()_h264_qpel8_hv_lowpass_l2_neon
586 .macro h264_qpel8 type
587 function ff_\type\()_h264_qpel8_mc10_neon, export=1
592 b \type\()_h264_qpel8_h_lowpass_l2_neon
595 function ff_\type\()_h264_qpel8_mc20_neon, export=1
600 b \type\()_h264_qpel8_h_lowpass_neon
603 function ff_\type\()_h264_qpel8_mc30_neon, export=1
608 b \type\()_h264_qpel8_h_lowpass_l2_neon
611 function ff_\type\()_h264_qpel8_mc01_neon, export=1
614 \type\()_h264_qpel8_mc01:
617 sub r1, r1, r2, lsl #1
619 bl \type\()_h264_qpel8_v_lowpass_l2_neon
624 function ff_\type\()_h264_qpel8_mc11_neon, export=1
625 push {r0, r1, r11, lr}
626 \type\()_h264_qpel8_mc11:
638 bl put_h264_qpel8_h_lowpass_neon
639 ldrd r0, r1, [r11], #8
642 sub r1, r1, r2, lsl #1
644 bl \type\()_h264_qpel8_v_lowpass_l2_neon
650 function ff_\type\()_h264_qpel8_mc21_neon, export=1
651 push {r0, r1, r4, r10, r11, lr}
652 \type\()_h264_qpel8_mc21:
658 sub sp, sp, #(8*8+16*12)
664 bl put_h264_qpel8_h_lowpass_neon
666 ldrd r0, r1, [r11], #8
667 sub r1, r1, r2, lsl #1
671 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
674 pop {r4, r10, r11, pc}
677 function ff_\type\()_h264_qpel8_mc31_neon, export=1
679 push {r0, r1, r11, lr}
681 b \type\()_h264_qpel8_mc11
684 function ff_\type\()_h264_qpel8_mc02_neon, export=1
687 sub r1, r1, r2, lsl #1
690 bl \type\()_h264_qpel8_v_lowpass_neon
695 function ff_\type\()_h264_qpel8_mc12_neon, export=1
696 push {r0, r1, r4, r10, r11, lr}
697 \type\()_h264_qpel8_mc12:
703 sub sp, sp, #(8*8+16*12)
704 sub r1, r1, r2, lsl #1
709 bl put_h264_qpel8_v_lowpass_neon
711 ldrd r0, r1, [r11], #8
712 sub r1, r1, r3, lsl #1
715 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
718 pop {r4, r10, r11, pc}
721 function ff_\type\()_h264_qpel8_mc22_neon, export=1
722 push {r4, r10, r11, lr}
727 sub r1, r1, r2, lsl #1
733 bl \type\()_h264_qpel8_hv_lowpass_neon
736 pop {r4, r10, r11, pc}
739 function ff_\type\()_h264_qpel8_mc32_neon, export=1
740 push {r0, r1, r4, r10, r11, lr}
742 b \type\()_h264_qpel8_mc12
745 function ff_\type\()_h264_qpel8_mc03_neon, export=1
748 b \type\()_h264_qpel8_mc01
751 function ff_\type\()_h264_qpel8_mc13_neon, export=1
752 push {r0, r1, r11, lr}
754 b \type\()_h264_qpel8_mc11
757 function ff_\type\()_h264_qpel8_mc23_neon, export=1
758 push {r0, r1, r4, r10, r11, lr}
760 b \type\()_h264_qpel8_mc21
763 function ff_\type\()_h264_qpel8_mc33_neon, export=1
765 push {r0, r1, r11, lr}
768 b \type\()_h264_qpel8_mc11
775 .macro h264_qpel16 type
776 function ff_\type\()_h264_qpel16_mc10_neon, export=1
780 b \type\()_h264_qpel16_h_lowpass_l2_neon
783 function ff_\type\()_h264_qpel16_mc20_neon, export=1
787 b \type\()_h264_qpel16_h_lowpass_neon
790 function ff_\type\()_h264_qpel16_mc30_neon, export=1
794 b \type\()_h264_qpel16_h_lowpass_l2_neon
797 function ff_\type\()_h264_qpel16_mc01_neon, export=1
800 \type\()_h264_qpel16_mc01:
803 sub r1, r1, r2, lsl #1
805 bl \type\()_h264_qpel16_v_lowpass_l2_neon
810 function ff_\type\()_h264_qpel16_mc11_neon, export=1
811 push {r0, r1, r4, r11, lr}
812 \type\()_h264_qpel16_mc11:
823 bl put_h264_qpel16_h_lowpass_neon
824 ldrd r0, r1, [r11], #8
827 sub r1, r1, r2, lsl #1
829 bl \type\()_h264_qpel16_v_lowpass_l2_neon
835 function ff_\type\()_h264_qpel16_mc21_neon, export=1
836 push {r0, r1, r4-r5, r9-r11, lr}
837 \type\()_h264_qpel16_mc21:
843 sub sp, sp, #(16*16+16*12)
847 bl put_h264_qpel16_h_lowpass_neon_packed
849 ldrd r0, r1, [r11], #8
850 sub r1, r1, r2, lsl #1
853 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
856 pop {r4-r5, r9-r11, pc}
859 function ff_\type\()_h264_qpel16_mc31_neon, export=1
861 push {r0, r1, r4, r11, lr}
863 b \type\()_h264_qpel16_mc11
866 function ff_\type\()_h264_qpel16_mc02_neon, export=1
869 sub r1, r1, r2, lsl #1
872 bl \type\()_h264_qpel16_v_lowpass_neon
877 function ff_\type\()_h264_qpel16_mc12_neon, export=1
878 push {r0, r1, r4-r5, r9-r11, lr}
879 \type\()_h264_qpel16_mc12:
885 sub sp, sp, #(16*16+16*12)
886 sub r1, r1, r2, lsl #1
890 bl put_h264_qpel16_v_lowpass_neon_packed
892 ldrd r0, r1, [r11], #8
893 sub r1, r1, r3, lsl #1
896 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
899 pop {r4-r5, r9-r11, pc}
902 function ff_\type\()_h264_qpel16_mc22_neon, export=1
903 push {r4, r9-r11, lr}
909 sub r1, r1, r2, lsl #1
915 bl \type\()_h264_qpel16_hv_lowpass_neon
921 function ff_\type\()_h264_qpel16_mc32_neon, export=1
922 push {r0, r1, r4-r5, r9-r11, lr}
924 b \type\()_h264_qpel16_mc12
927 function ff_\type\()_h264_qpel16_mc03_neon, export=1
930 b \type\()_h264_qpel16_mc01
933 function ff_\type\()_h264_qpel16_mc13_neon, export=1
934 push {r0, r1, r4, r11, lr}
936 b \type\()_h264_qpel16_mc11
939 function ff_\type\()_h264_qpel16_mc23_neon, export=1
940 push {r0, r1, r4-r5, r9-r11, lr}
942 b \type\()_h264_qpel16_mc21
945 function ff_\type\()_h264_qpel16_mc33_neon, export=1
947 push {r0, r1, r4, r11, lr}
950 b \type\()_h264_qpel16_mc11