]> git.sesse.net Git - x264/blobdiff - common/aarch64/quant-a.S
aarch64: Fix coeff_level_run* macros with LLVM's assembler
[x264] / common / aarch64 / quant-a.S
index d3b2933bdcc0caac19cb7e2f209f492e6081a7d7..3e7e35e4f05974f57e394f75f79a7ad30de94230 100644 (file)
@@ -1,7 +1,7 @@
 /****************************************************************************
  * quant.S: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2009-2014 x264 project
+ * Copyright (C) 2009-2015 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -328,17 +328,13 @@ function x264_decimate_score\size\()_neon, export=1
     lsr         x6,  x3,  #2
     lsl         x1,  x1,  x3
     ldrb        w7,  [x5, x6]
-    cbz         x1,  2f
     lsl         x1,  x1,  #4
     add         w0,  w0,  w7
     cbnz        x1,  1b
     ret
-2:
-    add         w0,  w0,  w7
-0:
-    ret
 9:
     mov         w0,  #9
+0:
     ret
 endfunc
 .endm
@@ -399,17 +395,13 @@ function x264_decimate_score64_neon, export=1
     clz         x3,  x1
     lsl         x1,  x1,  x3
     ldrb        w7,  [x5, x3]
-    cbz         x1,  2f
     lsl         x1,  x1,  #1
     add         w0,  w0,  w7
     cbnz        x1,  1b
     ret
-2:
-    add         w0,  w0,  w7
-0:
-    ret
 9:
     mov         w0,  #9
+0:
     ret
 endfunc
 
@@ -546,12 +538,10 @@ function x264_coeff_level_run\size\()_neon, export=1
     sub         x0,  x0,  #2
 .endif
 .if         \size < 15
-    .equ        shiftw, 3
     ld1         {v0.8h}, [x0]
     uqxtn       v0.8b,  v0.8h
     cmtst       v0.8b,  v0.8b,  v0.8b
 .else
-    .equ        shiftw, 2
     ld1         {v0.8h,v1.8h}, [x0]
     uqxtn       v0.8b,  v0.8h
     uqxtn2      v0.16b, v1.8h
@@ -565,7 +555,7 @@ function x264_coeff_level_run\size\()_neon, export=1
 
     coeff_level_run_start \size
 
-    coeff_level_run shiftw
+    coeff_level_run (4 - (\size + 1) / 8)
 
     ret
 endfunc
@@ -574,3 +564,28 @@ endfunc
 X264_COEFF_LEVEL_RUN 8
 X264_COEFF_LEVEL_RUN 15
 X264_COEFF_LEVEL_RUN 16
+
+function x264_denoise_dct_neon, export=1
+1:  subs        w3,  w3,  #16
+    ld1         {v0.8h,v1.8h}, [x0]
+    ld1         {v4.4s,v5.4s,v6.4s,v7.4s}, [x1]
+    abs         v16.8h,  v0.8h
+    abs         v17.8h,  v1.8h
+    ld1         {v2.8h,v3.8h}, [x2], #32
+    cmlt        v18.8h,  v0.8h,   #0
+    cmlt        v19.8h,  v1.8h,   #0
+    uaddw       v4.4s,   v4.4s,   v16.4h
+    uaddw2      v5.4s,   v5.4s,   v16.8h
+    uqsub       v20.8h,  v16.8h,  v2.8h
+    uqsub       v21.8h,  v17.8h,  v3.8h
+    uaddw       v6.4s,   v6.4s,   v17.4h
+    uaddw2      v7.4s,   v7.4s,   v17.8h
+    neg         v22.8h,  v20.8h
+    neg         v23.8h,  v21.8h
+    bsl         v18.16b, v22.16b, v20.16b
+    bsl         v19.16b, v23.16b, v21.16b
+    st1         {v4.4s,v5.4s,v6.4s,v7.4s}, [x1], #64
+    st1         {v18.8h,v19.8h}, [x0], #32
+    b.gt        1b
+    ret
+endfunc