From 3901affb1d0cb1889f96e7cdade3943f441e9f5a Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sun, 12 Oct 2008 14:34:54 +0200 Subject: [PATCH] Yet another pop_1st_bit() optimization Always for 32 bit but withot relying on MSVC intrinsics. It is very similar to previous ones, but this does not segfaults due to -fno-strict-aliasing compiler option. Signed-off-by: Marco Costalba --- src/Makefile | 2 +- src/bitboard.cpp | 49 ++++++++++++++++++++++++++++-------------------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/Makefile b/src/Makefile index 2b89bc12..b55ef7ea 100644 --- a/src/Makefile +++ b/src/Makefile @@ -78,7 +78,7 @@ CXXFLAGS += -Wall -g # General optimization flags. Note that -O2 might be faster than -O3 on some # systems; this requires testing. -CXXFLAGS += -O3 -fno-exceptions -fomit-frame-pointer -fno-rtti -fstrict-aliasing +CXXFLAGS += -O3 -fno-exceptions -fomit-frame-pointer -fno-rtti -fno-strict-aliasing # Disable most annoying warnings for the Intel C++ compiler diff --git a/src/bitboard.cpp b/src/bitboard.cpp index 8250e829..89065c49 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -349,30 +349,39 @@ Square first_1(Bitboard b) { /// pop_1st_bit() finds and clears the least significant nonzero bit in a /// nonzero bitboard. -#if defined(USE_32BIT_ATTACKS) && defined(_MSC_VER) +#if defined(USE_32BIT_ATTACKS) -// On 32bit system compiled with MSVC this verion seems -// slightly faster then the standard one. +// Use type-punning +union b_union { -Square pop_1st_bit(Bitboard *b) { + Bitboard b; + struct { + uint32_t l; + uint32_t h; + }; +}; - unsigned long index; - uint32_t *l, *h; +// WARNING: Needs -fno-strict-aliasing compiler option +Square pop_1st_bit(Bitboard *bb) { - if (*(l = (uint32_t*)b) != 0) - { - _BitScanForward(&index, *l); - *l &= ~(1 << index); - } - else if (*(h = (uint32_t*)b + 1) != 0) - { - _BitScanForward(&index, *h); - *h &= ~(1 << index); - index += 32; - } else - return SQ_NONE; - - return Square(index); + b_union u; + uint32_t b; + + u.b = *bb; + + if (u.l) + { + b = u.l; + *((uint32_t*)bb) = b & (b - 1); + b ^= (b - 1); + } + else + { + b = u.h; + *((uint32_t*)bb+1) = b & (b - 1); // Little endian only? + b = ~(b ^ (b - 1)); + } + return Square(BitTable[(b * 0x783a9b23) >> 26]); } #else -- 2.39.2