]> git.sesse.net Git - x264/commitdiff
arm: x264_plane_copy_deinterleave_neon
authorJanne Grunau <janne-x264@jannau.net>
Sat, 15 Mar 2014 16:22:08 +0000 (17:22 +0100)
committerFiona Glaser <fiona@x264.com>
Tue, 22 Apr 2014 22:37:48 +0000 (15:37 -0700)
plane_copy_deinterleave_c: 42988
plane_copy_deinterleave_neon: 10184

common/arm/mc-a.S
common/arm/mc-c.c

index 6267e35010c24d745775df8e9679f248ff36c4bc..179315cf9de672ab3c23d61b5a4bcb04716607f5 100644 (file)
@@ -1465,3 +1465,29 @@ lowres_xloop_end:
     vpop            {d8-d15}
     pop             {r4-r10,pc}
 .endfunc
+
+function x264_plane_copy_deinterleave_neon
+    push            {r4-r7, lr}
+    ldrd            r6, r7, [sp, #28]
+    ldrd            r4, r5, [sp, #20]
+    add             lr,  r6,  #15
+    bic             lr,  lr,  #15
+    sub             r1,  r1,  lr
+    sub             r3,  r3,  lr
+    sub             r5,  r5,  lr, lsl #1
+block:
+    vld2.8          {d0-d3}, [r4,:128]!
+    subs            lr,  lr,  #16
+    vst1.8          {q0},    [r0]!
+    vst1.8          {q1},    [r2]!
+    bgt             block
+
+    add             r4,  r4,  r5
+    subs            r7,  r7,  #1
+    add             r0,  r0,  r1
+    add             r2,  r2,  r3
+    mov             lr,  r6
+    bgt             block
+
+    pop             {r4-r7, pc}
+.endfunc
index d68369f75511c85ed1565dc2b51b1f2f52ab0579..bf5e2eb1e34c17c481bf7d435da68c1e983b703a 100644 (file)
@@ -47,6 +47,10 @@ void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
 void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 
+void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
+                                         pixel *dstv, intptr_t i_dstv,
+                                         pixel *src,  intptr_t i_src, int w, int h );
+
 #define MC_WEIGHT(func)\
 void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
@@ -225,6 +229,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
     pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
     pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_neon;
 
+    pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
+
     pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon;
     pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_neon;
     pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_neon;