aarch64: nal_escape_neon

author Janne Grunau <janne-x264@jannau.net>

Wed, 5 Nov 2014 10:35:13 +0000 (11:35 +0100)

committer Anton Mitrofanov <BugMaster@narod.ru>

Tue, 16 Dec 2014 17:40:10 +0000 (20:40 +0300)
author Janne Grunau <janne-x264@jannau.net>
Wed, 5 Nov 2014 10:35:13 +0000 (11:35 +0100)
committer Anton Mitrofanov <BugMaster@narod.ru>
Tue, 16 Dec 2014 17:40:10 +0000 (20:40 +0300)
diff --git a/Makefile b/Makefile

index fd72fcdde051cbcbc714c6b2ae9bac8bf138105b..f29354217aede58ca020a628d81dad5d276060d3 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -128,7 +128,8 @@ endif
  # AArch64 NEON optims
  ifeq ($(ARCH),AARCH64)
  ifneq ($(AS),)
-ASMSRC += common/aarch64/dct-a.S     \
+ASMSRC += common/aarch64/bitstream-a.S \
+          common/aarch64/dct-a.S     \
            common/aarch64/deblock-a.S \
            common/aarch64/mc-a.S      \
            common/aarch64/pixel-a.S   \
diff --git a/common/aarch64/bitstream-a.S b/common/aarch64/bitstream-a.S

new file mode 100644 (file)

index 0000000..81f9ad8
--- /dev/null
+++ b/common/aarch64/bitstream-a.S
@@ -0,0 +1,82 @@
+/*****************************************************************************
+ * bitstream-a.S: aarch64 bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * Authors: Janne Grunau <janne-x264@jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+function x264_nal_escape_neon, export=1
+    movi        v0.16b,  #0xff
+    movi        v4.16b,  #4
+    mov         w3,  #3
+    subs        x6,  x1,  x2
+    cbz         x6,  99f
+0:
+    cmn         x6,  #15
+    b.lt        16f
+    mov         x1,  x2
+    b           100f
+16:
+    ld1         {v1.16b}, [x1], #16
+    ext         v2.16b, v0.16b, v1.16b, #14
+    ext         v3.16b, v0.16b, v1.16b, #15
+    cmhi        v7.16b, v4.16b, v1.16b
+    cmeq        v5.16b, v2.16b, #0
+    cmeq        v6.16b, v3.16b, #0
+    and         v5.16b, v5.16b, v7.16b
+    and         v5.16b, v5.16b, v6.16b
+    shrn        v7.8b,  v5.8h,  #4
+    mov         x7,  v7.d[0]
+    cbz         x7,  16f
+    mov         x6,  #-16
+100:
+    umov        w5,  v0.b[14]
+    umov        w4,  v0.b[15]
+    orr         w5,  w4,  w5, lsl #8
+101:
+    ldrb        w4,  [x1, x6]
+    orr         w9,  w4,  w5, lsl #16
+    cmp         w9,  #3
+    b.hi        102f
+    strb        w3,  [x0], #1
+    orr         w5,  w3,  w5, lsl #8
+102:
+    adds        x6,  x6,  #1
+    strb        w4,  [x0], #1
+    orr         w5,  w4,  w5, lsl #8
+    b.lt        101b
+    subs        x6,  x1,  x2
+    lsr         w9,  w5,  #8
+    mov         v0.b[14],  w9
+    mov         v0.b[15],  w5
+    b.lt        0b
+
+    ret
+16:
+    subs        x6,  x1,  x2
+    st1         {v1.16b}, [x0], #16
+    mov         v0.16b, v1.16b
+    b.lt        0b
+99:
+    ret
+endfunc
diff --git a/common/bitstream.c b/common/bitstream.c

index ed3ad5e1a0912023a1743880ae1b9f418b349561..85dddb65e9e9afb12eb5f78b5e044e6c53dabc32 100644 (file)
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -54,6 +54,8 @@ void x264_cabac_block_residual_internal_sse2       ( dctcoef *l, int b_interlace
  void x264_cabac_block_residual_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
  void x264_cabac_block_residual_internal_avx2_bmi2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
  
+uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
+
  /****************************************************************************
   * x264_nal_encode:
   ****************************************************************************/
@@ -142,4 +144,8 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
      }
  #endif
  #endif
+#if ARCH_AARCH64
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
+#endif
  }
author	Janne Grunau <janne-x264@jannau.net>
	Wed, 5 Nov 2014 10:35:13 +0000 (11:35 +0100)
committer	Anton Mitrofanov <BugMaster@narod.ru>
	Tue, 16 Dec 2014 17:40:10 +0000 (20:40 +0300)
Makefile		patch \| blob \| history
common/aarch64/bitstream-a.S	[new file with mode: 0644]	patch \| blob
common/bitstream.c		patch \| blob \| history