From 2a3ebc884e9d8faae1a1cd4a2230cf8cfaead95c Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sun, 21 Sep 2008 22:13:03 +0100 Subject: [PATCH] Optimize pop_1st_bit() take 2 This time we use MSVC intrinsics that are C wrappers for Intel assembler 'bsf' instruction. The speed up in node count is around 3%, probably it does not worth the effort. Anyway this patch can be useful at least for documentation purposes. This optimization covers 32 bit systems only. Signed-off-by: Marco Costalba --- src/bitboard.cpp | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/bitboard.cpp b/src/bitboard.cpp index 458f9318..298db7a7 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -21,6 +21,16 @@ //// Includes //// +#ifdef _MSC_VER + #include + #ifdef _WIN64 + #pragma intrinsic(_BitScanForward64) + #else + #pragma intrinsic(_BitScanForward) + #endif + #define USING_INTRINSICS +#endif + #include #include "bitboard.h" @@ -339,20 +349,30 @@ Square first_1(Bitboard b) { /// pop_1st_bit() finds and clears the least significant nonzero bit in a /// nonzero bitboard. -#if defined(USE_32BIT_ATTACKS) && defined(_WIN32) +#if defined(USE_32BIT_ATTACKS) && defined(_MSC_VER) -Square pop_1st_bit(Bitboard *bb) { +// On 32bit system compiled with MSVC this verion seems +// slightly faster then the standard one. - uint32_t a = uint32_t(*bb); - uint32_t* ptr = a ? (uint32_t*)bb : (uint32_t*)bb + 1; // Little endian only? - uint32_t b = a ? a : *ptr; - uint32_t c = ~(b ^ (b - 1)); - - *ptr = b & c; // clear the bit - if (a) - c = ~c; +Square pop_1st_bit(Bitboard *b) { - return Square(BitTable[(c * 0x783a9b23) >> 26]); + unsigned long index; + uint32_t *l, *h; + + if (*(l = (uint32_t*)b) != 0) + { + _BitScanForward(&index, *l); + *l &= ~(1 << index); + } + else if (*(h = (uint32_t*)b + 1) != 0) + { + _BitScanForward(&index, *h); + *h &= ~(1 << index); + index += 32; + } else + return SQ_NONE; + + return Square(index); } #else -- 2.39.2