From ceee976bde76a5f4126bfd9d8454f0e601e67204 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Storsj=C3=B6?= Date: Tue, 25 Aug 2015 14:38:12 +0300 Subject: [PATCH] arm: Add x264_nal_escape_neon checkasm timing Cortex-A7 A8 A9 nal_escape_c 852758 879566 655497 nal_escape_neon 376831 450678 371673 --- Makefile | 2 +- common/arm/bitstream-a.S | 84 ++++++++++++++++++++++++++++++++++++++++ common/bitstream.c | 4 ++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 common/arm/bitstream-a.S diff --git a/Makefile b/Makefile index 6193c593..4403a11b 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ ifeq ($(SYS_ARCH),ARM) ifneq ($(AS),) ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \ common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \ - common/arm/predict-a.S + common/arm/predict-a.S common/arm/bitstream-a.S SRCS += common/arm/mc-c.c common/arm/predict-c.c OBJASM = $(ASMSRC:%.S=%.o) endif diff --git a/common/arm/bitstream-a.S b/common/arm/bitstream-a.S new file mode 100644 index 00000000..5b0a171c --- /dev/null +++ b/common/arm/bitstream-a.S @@ -0,0 +1,84 @@ +/***************************************************************************** + * bitstream-a.S: arm bitstream functions + ***************************************************************************** + * Copyright (C) 2014-2015 x264 project + * + * Authors: Janne Grunau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "asm.S" + +function x264_nal_escape_neon + push {r4-r5,lr} + vmov.u8 q0, #0xff + vmov.u8 q8, #4 + mov r3, #3 + subs lr, r1, r2 + beq 99f +0: + cmn lr, #15 + blt 16f + mov r1, r2 + b 100f +16: + vld1.8 {q1}, [r1]! + vext.8 q2, q0, q1, #14 + vext.8 q3, q0, q1, #15 + vcgt.u8 q11, q8, q1 + vceq.u8 q9, q2, #0 + vceq.u8 q10, q3, #0 + vand q9, q9, q11 + vand q9, q9, q10 + vshrn.u16 d22, q9, #4 + vmov ip, lr, d22 + orrs ip, ip, lr + beq 16f + mov lr, #-16 +100: + vmov.u8 r5, d1[6] + vmov.u8 r4, d1[7] + orr r5, r4, r5, lsl #8 +101: + ldrb r4, [r1, lr] + orr ip, r4, r5, lsl #16 + cmp ip, #3 + bhi 102f + strb r3, [r0], #1 + orr r5, r3, r5, lsl #8 +102: + adds lr, lr, #1 + strb r4, [r0], #1 + orr r5, r4, r5, lsl #8 + blt 101b + subs lr, r1, r2 + lsr ip, r5, #8 + vmov.u8 d1[6], ip + vmov.u8 d1[7], r5 + blt 0b + + pop {r4-r5,pc} +16: + subs lr, r1, r2 + vst1.8 {q1}, [r0]! + vmov q0, q1 + blt 0b +99: + pop {r4-r5,pc} +endfunc diff --git a/common/bitstream.c b/common/bitstream.c index 6ca1f448..ec9836ac 100644 --- a/common/bitstream.c +++ b/common/bitstream.c @@ -144,6 +144,10 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf ) } #endif #endif +#if HAVE_ARMV6 + if( cpu&X264_CPU_NEON ) + pf->nal_escape = x264_nal_escape_neon; +#endif #if ARCH_AARCH64 if( cpu&X264_CPU_NEON ) pf->nal_escape = x264_nal_escape_neon; -- 2.39.2