]> git.sesse.net Git - x264/commitdiff
aarch64: x264_denoise_dct_neon
authorJanne Grunau <janne-x264@jannau.net>
Tue, 21 Oct 2014 13:18:49 +0000 (15:18 +0200)
committerAnton Mitrofanov <BugMaster@narod.ru>
Tue, 16 Dec 2014 17:40:07 +0000 (20:40 +0300)
3.5 times faster.

common/aarch64/quant-a.S
common/aarch64/quant.h
common/quant.c

index d3b2933bdcc0caac19cb7e2f209f492e6081a7d7..f4be81b011abb13dc6bffad2e2763ed0ae30d05c 100644 (file)
@@ -574,3 +574,28 @@ endfunc
 X264_COEFF_LEVEL_RUN 8
 X264_COEFF_LEVEL_RUN 15
 X264_COEFF_LEVEL_RUN 16
+
+function x264_denoise_dct_neon, export=1
+1:  subs        w3,  w3,  #16
+    ld1         {v0.8h,v1.8h}, [x0]
+    ld1         {v4.4s,v5.4s,v6.4s,v7.4s}, [x1]
+    abs         v16.8h,  v0.8h
+    abs         v17.8h,  v1.8h
+    ld1         {v2.8h,v3.8h}, [x2], #32
+    cmlt        v18.8h,  v0.8h,   #0
+    cmlt        v19.8h,  v1.8h,   #0
+    uaddw       v4.4s,   v4.4s,   v16.4h
+    uaddw2      v5.4s,   v5.4s,   v16.8h
+    uqsub       v20.8h,  v16.8h,  v2.8h
+    uqsub       v21.8h,  v17.8h,  v3.8h
+    uaddw       v6.4s,   v6.4s,   v17.4h
+    uaddw2      v7.4s,   v7.4s,   v17.8h
+    neg         v22.8h,  v20.8h
+    neg         v23.8h,  v21.8h
+    bsl         v18.16b, v22.16b, v20.16b
+    bsl         v19.16b, v23.16b, v21.16b
+    st1         {v4.4s,v5.4s,v6.4s,v7.4s}, [x1], #64
+    st1         {v18.8h,v19.8h}, [x0], #32
+    b.gt        1b
+    ret
+endfunc
index 360af26fcb7ad8bcf054cab580f031561f086fac..a06e78eed8848c0d3c3229bbe8850748905f1c19 100644 (file)
@@ -53,4 +53,7 @@ int x264_coeff_level_run4_aarch64( int16_t *, x264_run_level_t * );
 int x264_coeff_level_run8_neon( int16_t *, x264_run_level_t * );
 int x264_coeff_level_run15_neon( int16_t *, x264_run_level_t * );
 int x264_coeff_level_run16_neon( int16_t *, x264_run_level_t * );
+
+void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int );
+
 #endif
index 514e658e45b643489947773680e2df50c62b1343..c3392bcfd94fda7dace422205a7e517f24557eb0 100644 (file)
@@ -764,6 +764,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
         pf->decimate_score15 = x264_decimate_score15_neon;
         pf->decimate_score16 = x264_decimate_score16_neon;
         pf->decimate_score64 = x264_decimate_score64_neon;
+        pf->denoise_dct = x264_denoise_dct_neon;
     }
 #endif
 #endif // HIGH_BIT_DEPTH