X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Farm%2Fvp8dsp_neon.S;h=544332c3c27e2aca2fa798f03d16da9016ec15e2;hb=6a13505c069890cb0e2a07e29fd819a0cf2e73c1;hp=c1a91e14616432480c0315b1aaeed7d5468c8c0c;hpb=b692d246ead6ed56045904c1f839eb3cbf2cf2a7;p=ffmpeg diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S index c1a91e14616..544332c3c27 100644 --- a/libavcodec/arm/vp8dsp_neon.S +++ b/libavcodec/arm/vp8dsp_neon.S @@ -21,7 +21,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "asm.S" +#include "libavutil/arm/asm.S" #include "neon.S" function ff_vp8_luma_dc_wht_neon, export=1 @@ -1576,18 +1576,18 @@ endconst /* Bilinear MC */ function ff_put_vp8_bilin16_h_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 ldr r12, [sp] @ h 1: subs r12, r12, #2 - vld1.8 {d2-d4}, [r2], r1 + vld1.8 {d2-d4}, [r2], r3 vext.8 q2, q1, q2, #1 vmull.u8 q8, d2, d1 vmlal.u8 q8, d4, d0 - vld1.8 {d18-d20},[r2], r1 + vld1.8 {d18-d20},[r2], r3 vmull.u8 q3, d3, d1 vmlal.u8 q3, d5, d0 vext.8 q10, q9, q10, #1 @@ -1607,20 +1607,20 @@ function ff_put_vp8_bilin16_h_neon, export=1 endfunc function ff_put_vp8_bilin16_v_neon, export=1 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #8] @ my + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 ldr r12, [sp] @ h - vld1.8 {q1}, [r2], r1 + vld1.8 {q1}, [r2], r3 1: subs r12, r12, #2 - vld1.8 {q2}, [r2], r1 + vld1.8 {q2}, [r2], r3 vmull.u8 q3, d2, d1 vmlal.u8 q3, d4, d0 vmull.u8 q8, d3, d1 vmlal.u8 q8, d5, d0 - vld1.8 {q1}, [r2], r1 + vld1.8 {q1}, [r2], r3 vmull.u8 q9, d4, d1 vmlal.u8 q9, d2, d0 vmull.u8 q10, d5, d1 @@ -1637,17 +1637,17 @@ function ff_put_vp8_bilin16_v_neon, export=1 endfunc function ff_put_vp8_bilin16_hv_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d2, r3 + ldr r12, [sp, #8] @ my + vdup.8 d2, r12 + rsb r12, r12, #8 vdup.8 d3, r12 ldr r12, [sp] @ h - vld1.8 {d4-d6}, [r2], r1 + vld1.8 {d4-d6}, [r2], r3 vext.8 q3, q2, q3, #1 vmull.u8 q8, d4, d1 vmlal.u8 q8, d6, d0 @@ -1657,11 +1657,11 @@ function ff_put_vp8_bilin16_hv_neon, export=1 vrshrn.u16 d5, q9, #3 1: subs r12, r12, #2 - vld1.8 {d18-d20},[r2], r1 + vld1.8 {d18-d20},[r2], r3 vext.8 q10, q9, q10, #1 vmull.u8 q11, d18, d1 vmlal.u8 q11, d20, d0 - vld1.8 {d26-d28},[r2], r1 + vld1.8 {d26-d28},[r2], r3 vmull.u8 q12, d19, d1 vmlal.u8 q12, d21, d0 vext.8 q14, q13, q14, #1 @@ -1693,18 +1693,18 @@ function ff_put_vp8_bilin16_hv_neon, export=1 endfunc function ff_put_vp8_bilin8_h_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 ldr r12, [sp] @ h 1: subs r12, r12, #2 - vld1.8 {q1}, [r2], r1 + vld1.8 {q1}, [r2], r3 vext.8 d3, d2, d3, #1 vmull.u8 q2, d2, d1 vmlal.u8 q2, d3, d0 - vld1.8 {q3}, [r2], r1 + vld1.8 {q3}, [r2], r3 vext.8 d7, d6, d7, #1 vmull.u8 q8, d6, d1 vmlal.u8 q8, d7, d0 @@ -1718,18 +1718,18 @@ function ff_put_vp8_bilin8_h_neon, export=1 endfunc function ff_put_vp8_bilin8_v_neon, export=1 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #8] @ my + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 ldr r12, [sp] @ h - vld1.8 {d2}, [r2], r1 + vld1.8 {d2}, [r2], r3 1: subs r12, r12, #2 - vld1.8 {d3}, [r2], r1 + vld1.8 {d3}, [r2], r3 vmull.u8 q2, d2, d1 vmlal.u8 q2, d3, d0 - vld1.8 {d2}, [r2], r1 + vld1.8 {d2}, [r2], r3 vmull.u8 q3, d3, d1 vmlal.u8 q3, d2, d0 vrshrn.u16 d4, q2, #3 @@ -1742,28 +1742,28 @@ function ff_put_vp8_bilin8_v_neon, export=1 endfunc function ff_put_vp8_bilin8_hv_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d2, r3 + ldr r12, [sp, #8] @ my + vdup.8 d2, r12 + rsb r12, r12, #8 vdup.8 d3, r12 ldr r12, [sp] @ h - vld1.8 {q2}, [r2], r1 + vld1.8 {q2}, [r2], r3 vext.8 d5, d4, d5, #1 vmull.u8 q9, d4, d1 vmlal.u8 q9, d5, d0 vrshrn.u16 d22, q9, #3 1: subs r12, r12, #2 - vld1.8 {q3}, [r2], r1 + vld1.8 {q3}, [r2], r3 vext.8 d7, d6, d7, #1 vmull.u8 q8, d6, d1 vmlal.u8 q8, d7, d0 - vld1.8 {q2}, [r2], r1 + vld1.8 {q2}, [r2], r3 vext.8 d5, d4, d5, #1 vmull.u8 q9, d4, d1 vmlal.u8 q9, d5, d0 @@ -1783,16 +1783,16 @@ function ff_put_vp8_bilin8_hv_neon, export=1 endfunc function ff_put_vp8_bilin4_h_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 ldr r12, [sp] @ h 1: subs r12, r12, #2 - vld1.8 {d2}, [r2], r1 + vld1.8 {d2}, [r2], r3 vext.8 d3, d2, d3, #1 - vld1.8 {d6}, [r2], r1 + vld1.8 {d6}, [r2], r3 vext.8 d7, d6, d7, #1 vtrn.32 q1, q3 vmull.u8 q2, d2, d1 @@ -1806,16 +1806,16 @@ function ff_put_vp8_bilin4_h_neon, export=1 endfunc function ff_put_vp8_bilin4_v_neon, export=1 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #8] @ my + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 ldr r12, [sp] @ h - vld1.32 {d2[]}, [r2], r1 + vld1.32 {d2[]}, [r2], r3 1: vld1.32 {d3[]}, [r2] - vld1.32 {d2[1]}, [r2], r1 - vld1.32 {d3[1]}, [r2], r1 + vld1.32 {d2[1]}, [r2], r3 + vld1.32 {d3[1]}, [r2], r3 vmull.u8 q2, d2, d1 vmlal.u8 q2, d3, d0 vtrn.32 d3, d2 @@ -1829,26 +1829,26 @@ function ff_put_vp8_bilin4_v_neon, export=1 endfunc function ff_put_vp8_bilin4_hv_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d2, r3 + ldr r12, [sp, #8] @ my + vdup.8 d2, r12 + rsb r12, r12, #8 vdup.8 d3, r12 ldr r12, [sp] @ h - vld1.8 {d4}, [r2], r1 + vld1.8 {d4}, [r2], r3 vext.8 d5, d4, d4, #1 vmull.u8 q9, d4, d1 vmlal.u8 q9, d5, d0 vrshrn.u16 d22, q9, #3 1: subs r12, r12, #2 - vld1.8 {d6}, [r2], r1 + vld1.8 {d6}, [r2], r3 vext.8 d7, d6, d6, #1 - vld1.8 {d4}, [r2], r1 + vld1.8 {d4}, [r2], r3 vext.8 d5, d4, d4, #1 vtrn.32 q3, q2 vmull.u8 q8, d6, d1