From 7f9ebf8e86e90ea1eb68388f1be12503a3f31e56 Mon Sep 17 00:00:00 2001 From: Jean-Francois Romang Date: Thu, 11 Oct 2012 22:55:25 +0800 Subject: [PATCH] ARM lsb/msb assembly Implement lsb/msb using armv7 assembly instructions. msb is the easiest one, using a gcc intrinsic that generates code using the ARM's clz instruction. lsb is also using this clz instruction, but with the help of ARM's 'rbit' (bit reversing) instruction. This leads to a >2% speed gain. I also renamed 'arm-32' to the more meaningfull 'armv7' in the Makefile No functional change. --- src/Makefile | 14 +++++++------- src/bitboard.h | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/Makefile b/src/Makefile index c73b94f3..17e61773 100644 --- a/src/Makefile +++ b/src/Makefile @@ -126,12 +126,12 @@ ifeq ($(ARCH),x86-32-old) endif #arm section -ifeq ($(ARCH),arm-32) - arch = arm-32 +ifeq ($(ARCH),armv7) + arch = armv7 os = any bits = 32 prefetch = yes - bsfq = no + bsfq = yes popcnt = no endif @@ -276,7 +276,7 @@ ifeq ($(optimize),yes) endif endif - ifeq ($(arch),arm-32) + ifeq ($(arch),armv7) CXXFLAGS += -fno-gcse endif endif @@ -315,7 +315,7 @@ endif ### 3.7 prefetch ifeq ($(prefetch),yes) - ifneq ($(arch),arm-32) + ifneq ($(arch),armv7) CXXFLAGS += -msse DEPENDFLAGS += -msse endif @@ -376,7 +376,7 @@ help: @echo "osx-ppc-32 > PPC-Mac OS X 32 bit" @echo "osx-x86-64 > x86-Mac OS X 64 bit" @echo "osx-x86-32 > x86-Mac OS X 32 bit" - @echo "arm-32 > ARM 32 bit" + @echo "armv7 > ARMv7 32 bit" @echo "general-64 > unspecified 64-bit" @echo "general-32 > unspecified 32-bit" @echo "" @@ -466,7 +466,7 @@ config-sanity: @test "$(debug)" = "yes" || test "$(debug)" = "no" @test "$(optimize)" = "yes" || test "$(optimize)" = "no" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ - test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "arm-32" + test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "armv7" @test "$(os)" = "any" || test "$(os)" = "osx" @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" diff --git a/src/bitboard.h b/src/bitboard.h index 4c7c948a..6aa35874 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -247,6 +247,21 @@ FORCE_INLINE Square msb(Bitboard b) { return (Square) index; } +# elif defined(__arm__) + +FORCE_INLINE int lsb32(uint32_t v) { + __asm__("rbit %0, %1" : "=r"(v) : "r"(v)); + return __builtin_clz(v); +} + +FORCE_INLINE Square msb(Bitboard b) { + return (Square) (63 - __builtin_clzll(b)); +} + +FORCE_INLINE Square lsb(Bitboard b) { + return (Square) (uint32_t(b) ? lsb32(uint32_t(b)) : 32 + lsb32(uint32_t(b >> 32))); +} + # else FORCE_INLINE Square lsb(Bitboard b) { // Assembly code by Heinz van Saanen -- 2.39.2