]> git.sesse.net Git - x264/commitdiff
arm: x264_plane_copy_interleave_neon
authorJanne Grunau <janne-x264@jannau.net>
Sat, 15 Mar 2014 18:55:50 +0000 (19:55 +0100)
committerFiona Glaser <fiona@x264.com>
Tue, 22 Apr 2014 22:37:49 +0000 (15:37 -0700)
plane_copy_interleave_c: 40285
plane_copy_interleave_neon: 10137

common/arm/mc-a.S
common/arm/mc-c.c

index e9a5f863c846dff91b5f74e46f1204a793972fad..6274c5946f6b740245538dff36796c0c0b3e8787 100644 (file)
@@ -1569,3 +1569,30 @@ block4:
 
     pop             {r4-r8, r10, r11, pc}
 .endfunc
+
+function x264_plane_copy_interleave_neon
+    push            {r4-r7, lr}
+    ldrd            r6, r7, [sp, #28]
+    ldrd            r4, r5, [sp, #20]
+    add             lr,  r6,  #15
+    bic             lr,  lr,  #15
+    sub             r1,  r1,  lr, lsl #1
+    sub             r3,  r3,  lr
+    sub             r5,  r5,  lr
+blocki:
+    vld1.8          {q0}, [r2]!
+    vld1.8          {q1}, [r4]!
+    subs            lr,  lr,  #16
+    vst2.8          {d0,d2}, [r0]!
+    vst2.8          {d1,d3}, [r0]!
+    bgt             blocki
+
+    subs            r7,  r7,  #1
+    add             r0,  r0,  r1
+    add             r2,  r2,  r3
+    add             r4,  r4,  r5
+    mov             lr,  r6
+    bgt             blocki
+
+    pop             {r4-r7, pc}
+.endfunc
index 48b868e4a24cb97ffb20499fb11ee8959ea52a76..e134e96756e9d5d46f3d8953f697efafc02201fa 100644 (file)
@@ -54,6 +54,9 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
                                             pixel *dstb, intptr_t i_dstb,
                                             pixel *dstc, intptr_t i_dstc,
                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_interleave_neon( pixel *dst,  intptr_t i_dst,
+                                      pixel *srcu, intptr_t i_srcu,
+                                      pixel *srcv, intptr_t i_srcv, int w, int h );
 
 void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
 void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@@ -238,6 +241,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
 
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
+    pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
 
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
     pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;