]> git.sesse.net Git - x264/commitdiff
arm: load_deinterleave_chroma_f{dec,enc}_neon
authorJanne Grunau <janne-x264@jannau.net>
Sat, 15 Mar 2014 17:22:49 +0000 (18:22 +0100)
committerFiona Glaser <fiona@x264.com>
Tue, 22 Apr 2014 22:37:48 +0000 (15:37 -0700)
load_deinterleave_chroma_fdec_c: 4055
load_deinterleave_chroma_fdec_neon: 995
load_deinterleave_chroma_fenc_c: 4071
load_deinterleave_chroma_fenc_neon: 992

common/arm/mc-a.S
common/arm/mc-c.c

index 179315cf9de672ab3c23d61b5a4bcb04716607f5..df9e2fb6c885e11388a70cd0638c9f10cca7a426 100644 (file)
@@ -1466,6 +1466,32 @@ lowres_xloop_end:
     pop             {r4-r10,pc}
 .endfunc
 
+function x264_load_deinterleave_chroma_fdec_neon
+    mov             ip,  #FDEC_STRIDE/2
+1:
+    vld2.8          {d0-d1}, [r1,:128], r2
+    subs            r3,  r3,  #1
+    pld             [r1]
+    vst1.8          {d0},    [r0,:64], ip
+    vst1.8          {d1},    [r0,:64], ip
+    bgt             1b
+
+    bx              lr
+.endfunc
+
+function x264_load_deinterleave_chroma_fenc_neon
+    mov             ip,  #FENC_STRIDE/2
+1:
+    vld2.8          {d0-d1}, [r1,:128], r2
+    subs            r3,  r3,  #1
+    pld             [r1]
+    vst1.8          {d0},    [r0,:64], ip
+    vst1.8          {d1},    [r0,:64], ip
+    bgt             1b
+
+    bx              lr
+.endfunc
+
 function x264_plane_copy_deinterleave_neon
     push            {r4-r7, lr}
     ldrd            r6, r7, [sp, #28]
index bf5e2eb1e34c17c481bf7d435da68c1e983b703a..e50d7364188e86a4fdb679cdf0a8a66d3fc0ebaa 100644 (file)
@@ -51,6 +51,9 @@ void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
                                          pixel *dstv, intptr_t i_dstv,
                                          pixel *src,  intptr_t i_src, int w, int h );
 
+void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
+
 #define MC_WEIGHT(func)\
 void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
@@ -231,6 +234,9 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
 
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
 
+    pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
+    pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
+
     pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon;
     pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_neon;
     pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_neon;