From: Marco Costalba Date: Thu, 18 Sep 2008 14:09:19 +0000 (+0200) Subject: Optimize pop_1st_bit() on 32 bits x86 X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=9ae2b6923504064da184d08b888fffb328818543;hp=95ce27f9262b63ce8eb611965e5cbc16bae815ad Optimize pop_1st_bit() on 32 bits x86 Operations on 64 bits Bitboard types are slow on x86 compiled with gcc, so optimize this case. BTW profiling shows that pop_1st_bit() is a veeery performance critical path! Signed-off-by: Marco Costalba --- diff --git a/src/bitboard.cpp b/src/bitboard.cpp index 0bbd155b..5dd0137d 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -6,12 +6,12 @@ it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - + Glaurung is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see . */ @@ -38,7 +38,7 @@ const Bitboard FileBB[8] = { }; const Bitboard NeighboringFilesBB[8] = { - FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB, + FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB, FileDBB|FileFBB, FileEBB|FileGBB, FileFBB|FileHBB, FileGBB }; @@ -47,7 +47,7 @@ const Bitboard ThisAndNeighboringFilesBB[8] = { FileBBB|FileCBB|FileDBB, FileCBB|FileDBB|FileEBB, FileDBB|FileEBB|FileFBB, FileEBB|FileFBB|FileGBB, FileFBB|FileGBB|FileHBB, FileGBB|FileHBB -}; +}; const Bitboard RankBB[8] = { Rank1BB, Rank2BB, Rank3BB, Rank4BB, Rank5BB, Rank6BB, Rank7BB, Rank8BB @@ -126,27 +126,27 @@ const int RShift[64] = { #else // if defined(USE_32BIT_ATTACKS) const uint64_t RMult[64] = { - 0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL, - 0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL, - 0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL, - 0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL, - 0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL, - 0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL, - 0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL, - 0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL, - 0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL, - 0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL, - 0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL, - 0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL, - 0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL, - 0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL, - 0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL, - 0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL, - 0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL, - 0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL, - 0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL, - 0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL, - 0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL, + 0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL, + 0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL, + 0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL, + 0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL, + 0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL, + 0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL, + 0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL, + 0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL, + 0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL, + 0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL, + 0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL, + 0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL, + 0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL, + 0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL, + 0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL, + 0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL, + 0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL, + 0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL, + 0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL, + 0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL, + 0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL, 0x410201ce5c030092ULL }; @@ -190,7 +190,7 @@ const uint64_t BMult[64] = { 0x881c7c67fcbfc4f6ULL, 0x47ca41e7e440d423ULL, 0xeb0c88112048d004ULL, 0x51c60e04359aef1aULL, 0x1aa1fe0e957a5554ULL, 0xdd9448db4f5e3104ULL, 0xdc01f6dca4bebbdcULL, -}; +}; const int BShift[64] = { 26, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27, @@ -202,27 +202,27 @@ const int BShift[64] = { #else // if defined(USE_32BIT_ATTACKS) const uint64_t BMult[64] = { - 0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL, - 0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL, - 0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL, - 0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL, - 0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL, - 0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL, - 0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL, - 0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL, - 0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL, - 0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL, - 0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL, - 0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL, - 0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL, - 0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL, - 0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL, - 0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL, - 0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL, - 0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL, - 0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL, - 0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL, - 0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL, + 0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL, + 0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL, + 0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL, + 0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL, + 0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL, + 0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL, + 0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL, + 0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL, + 0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL, + 0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL, + 0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL, + 0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL, + 0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL, + 0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL, + 0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL, + 0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL, + 0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL, + 0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL, + 0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL, + 0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL, + 0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL, 0xa08520292120600ULL }; @@ -320,9 +320,9 @@ void init_bitboards() { #if defined(USE_FOLDED_BITSCAN) static const int BitTable[64] = { - 63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2, - 51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52, - 26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28, + 63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2, + 51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52, + 26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28, 58, 20, 37, 17, 36, 8 }; @@ -339,6 +339,26 @@ Square first_1(Bitboard b) { /// pop_1st_bit() finds and clears the least significant nonzero bit in a /// nonzero bitboard. +#if defined(USE_32BIT_ATTACKS) + +Square pop_1st_bit(Bitboard *bb) { + + uint32_t t = uint32_t(*bb); + uint32_t* p = t ? (uint32_t*)bb : (uint32_t*)bb + 1; // Little endian only? + uint32_t b = t ? t : *p; + + *p = b & (b -1); + + if (t) + b ^= (b - 1); + else + b = ~(b ^ (b - 1)); + + return Square(BitTable[(b * 0x783a9b23) >> 26]); +} + +#else + Square pop_1st_bit(Bitboard *b) { Bitboard bb = *b ^ (*b - 1); uint32_t fold = int(bb) ^ int(bb >> 32); @@ -346,6 +366,8 @@ Square pop_1st_bit(Bitboard *b) { return Square(BitTable[(fold * 0x783a9b23) >> 26]); } +#endif + #else static const int BitTable[64] = { @@ -369,7 +391,7 @@ Square first_1(Bitboard b) { Square pop_1st_bit(Bitboard *b) { Bitboard bb = *b; *b &= (*b - 1); - return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]); + return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]); } #endif // defined(USE_FOLDED_BITSCAN) @@ -417,7 +439,7 @@ namespace { {-7,-9,0}, {17,15,10,6,-6,-10,-15,-17}, {9,7,-7,-9,0}, {8,1,-1,-8,0}, {9,7,-7,-9,8,1,-1,-8}, {9,7,-7,-9,8,1,-1,-8} }; - + for(i = 0; i < 64; i++) { for(j = 0; j <= int(BK); j++) { StepAttackBB[j][i] = EmptyBoardBB; @@ -483,14 +505,14 @@ namespace { Bitboard b; for(i = 0; i < 64; i++) { attackIndex[i] = index; - mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6); + mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6); j = (1 << (64 - shift[i])); for(k = 0; k < j; k++) { #if defined(USE_32BIT_ATTACKS) b = index_to_bitboard(k, mask[i]); - attacks[index + - (unsigned(int(b) * int(mult[i]) ^ - int(b >> 32) * int(mult[i] >> 32)) + attacks[index + + (unsigned(int(b) * int(mult[i]) ^ + int(b >> 32) * int(mult[i] >> 32)) >> shift[i])] = sliding_attacks(i, b, 4, deltas); #else @@ -502,7 +524,7 @@ namespace { index += j; } } - + void init_pseudo_attacks() { Square s; @@ -537,5 +559,5 @@ namespace { } } #endif // defined(USE_COMPACT_ROOK_ATTACKS) - + }