Implement lsb/msb using armv7 assembly instructions.
msb is the easiest one, using a gcc intrinsic that generates
code using the ARM's clz instruction. lsb is also using this
clz instruction, but with the help of ARM's 'rbit' (bit
reversing) instruction. This leads to a >2% speed gain.
I also renamed 'arm-32' to the more meaningfull 'armv7' in the Makefile
No functional change.
-ifeq ($(ARCH),arm-32)
- arch = arm-32
+ifeq ($(ARCH),armv7)
+ arch = armv7
os = any
bits = 32
prefetch = yes
os = any
bits = 32
prefetch = yes
CXXFLAGS += -fno-gcse
endif
endif
CXXFLAGS += -fno-gcse
endif
endif
### 3.7 prefetch
ifeq ($(prefetch),yes)
### 3.7 prefetch
ifeq ($(prefetch),yes)
CXXFLAGS += -msse
DEPENDFLAGS += -msse
endif
CXXFLAGS += -msse
DEPENDFLAGS += -msse
endif
@echo "osx-ppc-32 > PPC-Mac OS X 32 bit"
@echo "osx-x86-64 > x86-Mac OS X 64 bit"
@echo "osx-x86-32 > x86-Mac OS X 32 bit"
@echo "osx-ppc-32 > PPC-Mac OS X 32 bit"
@echo "osx-x86-64 > x86-Mac OS X 64 bit"
@echo "osx-x86-32 > x86-Mac OS X 32 bit"
- @echo "arm-32 > ARM 32 bit"
+ @echo "armv7 > ARMv7 32 bit"
@echo "general-64 > unspecified 64-bit"
@echo "general-32 > unspecified 32-bit"
@echo ""
@echo "general-64 > unspecified 64-bit"
@echo "general-32 > unspecified 32-bit"
@echo ""
@test "$(debug)" = "yes" || test "$(debug)" = "no"
@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
@test "$(debug)" = "yes" || test "$(debug)" = "no"
@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
- test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "arm-32"
+ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "armv7"
@test "$(os)" = "any" || test "$(os)" = "osx"
@test "$(bits)" = "32" || test "$(bits)" = "64"
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
@test "$(os)" = "any" || test "$(os)" = "osx"
@test "$(bits)" = "32" || test "$(bits)" = "64"
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
+# elif defined(__arm__)
+
+FORCE_INLINE int lsb32(uint32_t v) {
+ __asm__("rbit %0, %1" : "=r"(v) : "r"(v));
+ return __builtin_clz(v);
+}
+
+FORCE_INLINE Square msb(Bitboard b) {
+ return (Square) (63 - __builtin_clzll(b));
+}
+
+FORCE_INLINE Square lsb(Bitboard b) {
+ return (Square) (uint32_t(b) ? lsb32(uint32_t(b)) : 32 + lsb32(uint32_t(b >> 32)));
+}
+
# else
FORCE_INLINE Square lsb(Bitboard b) { // Assembly code by Heinz van Saanen
# else
FORCE_INLINE Square lsb(Bitboard b) { // Assembly code by Heinz van Saanen