X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Farm%2Fvp8dsp_neon.S;h=7cedfc25316016facdde52cb58df658cd3e8f58b;hb=d6b62ce1aced9e2456582870382f384581cc7cbb;hp=fcb424881b0a0182b2f42818af56c8258b907c4c;hpb=c7488f746154b5dcd70f8a3bef9a9fa5c42ac595;p=ffmpeg diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S index fcb424881b0..7cedfc25316 100644 --- a/libavcodec/arm/vp8dsp_neon.S +++ b/libavcodec/arm/vp8dsp_neon.S @@ -773,23 +773,6 @@ endfunc vqrshrun.s16 \d1, q14, #7 .endm -.macro vp8_epel8_v6 d0, s0, s1, s2, s3, s4, s5 - vmovl.u8 q10, \s2 - vmovl.u8 q11, \s3 - vmovl.u8 q9, \s1 - vmovl.u8 q12, \s4 - vmovl.u8 q8, \s0 - vmovl.u8 q13, \s5 - vmul.u16 q10, q10, d0[2] - vmul.u16 q11, q11, d0[3] - vmls.u16 q10, q9, d0[1] - vmls.u16 q11, q12, d1[0] - vmla.u16 q10, q8, d0[0] - vmla.u16 q11, q13, d1[1] - vqadd.s16 q11, q10, q11 - vqrshrun.s16 \d0, q11, #7 -.endm - .macro vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6 vmovl.u8 q10, \s0 vmovl.u8 q11, \s3 @@ -909,12 +892,12 @@ function ff_put_vp8_epel16_h6v6_neon, export=1 sub r2, r2, r3, lsl #1 sub r2, r2, #2 push {r4,lr} - vpush {d8-d9} + vpush {d8-d15} @ first pass (horizontal): - ldr r4, [sp, #28] @ mx + ldr r4, [sp, #64+8+4] @ mx movrel lr, subpel_filters-16 - ldr r12, [sp, #24] @ h + ldr r12, [sp, #64+8+0] @ h add r4, lr, r4, lsl #4 sub sp, sp, #336+16 vld1.16 {q0}, [r4,:128] @@ -931,9 +914,9 @@ function ff_put_vp8_epel16_h6v6_neon, export=1 bne 1b @ second pass (vertical): - ldr r4, [sp, #336+16+32] @ my + ldr r4, [sp, #336+16+64+8+8] @ my movrel lr, subpel_filters-16 - ldr r12, [sp, #336+16+24] @ h + ldr r12, [sp, #336+16+64+8+0] @ h add r4, lr, r4, lsl #4 add lr, sp, #15 vld1.16 {q0}, [r4,:128] @@ -941,18 +924,20 @@ function ff_put_vp8_epel16_h6v6_neon, export=1 2: vld1.8 {d2-d5}, [lr,:128]! vld1.8 {d6-d9}, [lr,:128]! - vld1.8 {d28-d31},[lr,:128] - sub lr, lr, #48 + vld1.8 {d10-d13},[lr,:128]! + vld1.8 {d14-d15},[lr,:128] + sub lr, lr, #64 - vp8_epel8_v6 d2, d2, d4, d6, d8, d28, d30 - vp8_epel8_v6 d3, d3, d5, d7, d9, d29, d31 + vp8_epel8_v6_y2 d2, d4, d2, d4, d6, d8, d10, d12, d14 + vp8_epel8_v6_y2 d3, d5, d3, d5, d7, d9, d11, d13, d15 vst1.8 {d2-d3}, [r0,:128], r1 - subs r12, r12, #1 + vst1.8 {d4-d5}, [r0,:128], r1 + subs r12, r12, #2 bne 2b add sp, sp, #336+16 - vpop {d8-d9} + vpop {d8-d15} pop {r4,pc} endfunc