]> git.sesse.net Git - x264/commitdiff
arm: x264_coeff_last8_arm
authorJanne Grunau <janne-x264@jannau.net>
Sun, 16 Mar 2014 16:21:58 +0000 (17:21 +0100)
committerFiona Glaser <fiona@x264.com>
Tue, 22 Apr 2014 22:37:49 +0000 (15:37 -0700)
checkasm --bench on a coretex-a9:
coeff_last8_c: 173
coeff_last8_armv6: 151

60 instead of 73 cycles in ~130k runs on the same cpu while encoding.

common/arm/predict-c.c
common/arm/predict.h
common/arm/quant-a.S
common/arm/quant.h
common/quant.c

index b9ad2623e6e143d2872b9354a9884be9e6d1543f..08da8e5c131c7bf01c0df2f57841781ac011f180 100644 (file)
 #include "predict.h"
 #include "pixel.h"
 
-void x264_predict_4x4_dc_armv6( uint8_t *src );
-void x264_predict_4x4_dc_top_neon( uint8_t *src );
-void x264_predict_4x4_h_armv6( uint8_t *src );
-void x264_predict_4x4_ddr_armv6( uint8_t *src );
-void x264_predict_4x4_ddl_neon( uint8_t *src );
-
-void x264_predict_8x8c_dc_neon( uint8_t *src );
-void x264_predict_8x8c_dc_top_neon( uint8_t *src );
-void x264_predict_8x8c_dc_left_neon( uint8_t *src );
-void x264_predict_8x8c_h_neon( uint8_t *src );
-void x264_predict_8x8c_v_neon( uint8_t *src );
-void x264_predict_8x8c_p_neon( uint8_t *src );
-
-void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
-
-void x264_predict_16x16_dc_neon( uint8_t *src );
-void x264_predict_16x16_dc_top_neon( uint8_t *src );
-void x264_predict_16x16_dc_left_neon( uint8_t *src );
-void x264_predict_16x16_h_neon( uint8_t *src );
-void x264_predict_16x16_v_neon( uint8_t *src );
-void x264_predict_16x16_p_neon( uint8_t *src );
-
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
 {
     if (!(cpu&X264_CPU_ARMV6))
index 26e1e93e73690660430436eb8790ee30e0af6706..7c7acfc1973a940d0b74d2979e09d00790750776 100644 (file)
 #define X264_ARM_PREDICT_H
 
 void x264_predict_4x4_dc_armv6( uint8_t *src );
+void x264_predict_4x4_dc_top_neon( uint8_t *src );
 void x264_predict_4x4_v_armv6( uint8_t *src );
 void x264_predict_4x4_h_armv6( uint8_t *src );
-void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8c_dc_neon( pixel *src );
-void x264_predict_8x8c_h_neon( pixel *src );
-void x264_predict_8x8c_v_neon( pixel *src );
-void x264_predict_16x16_v_neon( pixel *src );
-void x264_predict_16x16_h_neon( pixel *src );
-void x264_predict_16x16_dc_neon( pixel *src );
+void x264_predict_4x4_ddr_armv6( uint8_t *src );
+void x264_predict_4x4_ddl_neon( uint8_t *src );
+
+void x264_predict_8x8c_dc_neon( uint8_t *src );
+void x264_predict_8x8c_dc_top_neon( uint8_t *src );
+void x264_predict_8x8c_dc_left_neon( uint8_t *src );
+void x264_predict_8x8c_h_neon( uint8_t *src );
+void x264_predict_8x8c_v_neon( uint8_t *src );
+void x264_predict_8x8c_p_neon( uint8_t *src );
+
+void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
+
+void x264_predict_16x16_dc_neon( uint8_t *src );
+void x264_predict_16x16_dc_top_neon( uint8_t *src );
+void x264_predict_16x16_dc_left_neon( uint8_t *src );
+void x264_predict_16x16_h_neon( uint8_t *src );
+void x264_predict_16x16_v_neon( uint8_t *src );
+void x264_predict_16x16_p_neon( uint8_t *src );
 
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
index 2aeedc4ac9b768f19736f38f688b676e9984619e..b8c6ba33488543b90bed39f0430d42163cd2fbf6 100644 (file)
@@ -321,6 +321,20 @@ function x264_coeff_last4_arm
     bx          lr
 .endfunc
 
+function x264_coeff_last8_arm
+    ldrd        r2,  r3,  [r0, #8]
+    orrs        ip,  r2,  r3
+    movne       r0,  #4
+    ldrdeq      r2,  r3,  [r0]
+    moveq       r0,  #0
+    tst         r3,  r3
+    addne       r0,  #2
+    movne       r2,  r3
+    lsrs        r2,  r2,  #16
+    addne       r0,  r0,  #1
+    bx          lr
+.endfunc
+
 .macro COEFF_LAST_1x size
 function x264_coeff_last\size\()_neon
 .if \size == 15
index 0695ab1e327920a22feb3b39b0540e45879b5090..75d9fb286731bd3508b7021e5ab91435d8270626 100644 (file)
@@ -39,6 +39,7 @@ void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
 void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
 
 int x264_coeff_last4_arm( int16_t * );
+int x264_coeff_last8_arm( int16_t * );
 int x264_coeff_last15_neon( int16_t * );
 int x264_coeff_last16_neon( int16_t * );
 int x264_coeff_last64_neon( int16_t * );
index 339df1c1277dd8db41b23a5e9cd79678f8ec978d..1a9e4dcaa5e62b383ff41fda82c3fc784aeb74a6 100644 (file)
@@ -725,7 +725,10 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
 
 #if HAVE_ARMV6
     if( cpu&X264_CPU_ARMV6 )
+    {
         pf->coeff_last4 = x264_coeff_last4_arm;
+        pf->coeff_last8 = x264_coeff_last8_arm;
+    }
 
     if( cpu&X264_CPU_NEON )
     {