]> git.sesse.net Git - x264/commitdiff
arm: Add x264_nal_escape_neon
authorMartin Storsjö <martin@martin.st>
Tue, 25 Aug 2015 11:38:12 +0000 (14:38 +0300)
committerHenrik Gramner <henrik@gramner.com>
Sun, 11 Oct 2015 16:44:54 +0000 (18:44 +0200)
checkasm timing      Cortex-A7      A8      A9
nal_escape_c                852758  879566  655497
nal_escape_neon             376831  450678  371673

Makefile
common/arm/bitstream-a.S [new file with mode: 0644]
common/bitstream.c

index 6193c5936939fda36f9a2e00c734223318ccda14..4403a11be273c9b72dd95343b714eb2c9551a07a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -119,7 +119,7 @@ ifeq ($(SYS_ARCH),ARM)
 ifneq ($(AS),)
 ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
           common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \
-          common/arm/predict-a.S
+          common/arm/predict-a.S common/arm/bitstream-a.S
 SRCS   += common/arm/mc-c.c common/arm/predict-c.c
 OBJASM  = $(ASMSRC:%.S=%.o)
 endif
diff --git a/common/arm/bitstream-a.S b/common/arm/bitstream-a.S
new file mode 100644 (file)
index 0000000..5b0a171
--- /dev/null
@@ -0,0 +1,84 @@
+/*****************************************************************************
+ * bitstream-a.S: arm bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2014-2015 x264 project
+ *
+ * Authors: Janne Grunau <janne-x264@jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+function x264_nal_escape_neon
+    push        {r4-r5,lr}
+    vmov.u8     q0,  #0xff
+    vmov.u8     q8,  #4
+    mov         r3,  #3
+    subs        lr,  r1,  r2
+    beq         99f
+0:
+    cmn         lr,  #15
+    blt         16f
+    mov         r1,  r2
+    b           100f
+16:
+    vld1.8      {q1}, [r1]!
+    vext.8      q2,  q0,  q1, #14
+    vext.8      q3,  q0,  q1, #15
+    vcgt.u8     q11, q8,  q1
+    vceq.u8     q9,  q2,  #0
+    vceq.u8     q10, q3,  #0
+    vand        q9,  q9,  q11
+    vand        q9,  q9,  q10
+    vshrn.u16   d22, q9,  #4
+    vmov        ip,  lr,  d22
+    orrs        ip,  ip,  lr
+    beq         16f
+    mov         lr,  #-16
+100:
+    vmov.u8     r5,  d1[6]
+    vmov.u8     r4,  d1[7]
+    orr         r5,  r4,  r5, lsl #8
+101:
+    ldrb        r4,  [r1, lr]
+    orr         ip,  r4,  r5, lsl #16
+    cmp         ip,  #3
+    bhi         102f
+    strb        r3,  [r0], #1
+    orr         r5,  r3,  r5, lsl #8
+102:
+    adds        lr,  lr,  #1
+    strb        r4,  [r0], #1
+    orr         r5,  r4,  r5, lsl #8
+    blt         101b
+    subs        lr,  r1,  r2
+    lsr         ip,  r5,  #8
+    vmov.u8     d1[6],  ip
+    vmov.u8     d1[7],  r5
+    blt         0b
+
+    pop         {r4-r5,pc}
+16:
+    subs        lr,  r1,  r2
+    vst1.8      {q1}, [r0]!
+    vmov        q0, q1
+    blt         0b
+99:
+    pop         {r4-r5,pc}
+endfunc
index 6ca1f4488ac242217eae6ca598e457c334275f7c..ec9836ac019930198f53242c69ddcbd7eb66e83b 100644 (file)
@@ -144,6 +144,10 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
     }
 #endif
 #endif
+#if HAVE_ARMV6
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
+#endif
 #if ARCH_AARCH64
     if( cpu&X264_CPU_NEON )
         pf->nal_escape = x264_nal_escape_neon;