X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fbitcount.h;h=a69ad8e8418535fbe01645368800815d19491b9f;hp=0992c00625e084ecc1fa78110ccef0c04a500166;hb=9204a60dbbebf7e319bb588acc91bf691a3ede9a;hpb=d4876dc96395f5592bfbc25b2eca2360db0655e6 diff --git a/src/bitcount.h b/src/bitcount.h index 0992c006..a69ad8e8 100644 --- a/src/bitcount.h +++ b/src/bitcount.h @@ -1,7 +1,7 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2010 Marco Costalba, Joona Kiiski, Tord Romstad + Copyright (C) 2008-2012 Marco Costalba, Joona Kiiski, Tord Romstad Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,108 +18,89 @@ along with this program. If not, see . */ - #if !defined(BITCOUNT_H_INCLUDED) #define BITCOUNT_H_INCLUDED +#include #include "types.h" enum BitCountType { - CNT64, - CNT64_MAX15, - CNT32, - CNT32_MAX15, - CNT_POPCNT + CNT_64, + CNT_64_MAX15, + CNT_32, + CNT_32_MAX15, + CNT_HW_POPCNT }; -/// count_1s() counts the number of nonzero bits in a bitboard. -/// We have different optimized versions according if platform -/// is 32 or 64 bits, and to the maximum number of nonzero bits. -/// We also support hardware popcnt instruction. See Readme.txt -/// on how to pgo compile with popcnt support. -template inline int count_1s(Bitboard); +/// Determine at compile time the best popcount<> specialization according if +/// platform is 32 or 64 bits, to the maximum number of nonzero bits to count +/// and if hardware popcnt instruction is available. +const BitCountType Full = HasPopCnt ? CNT_HW_POPCNT : Is64Bit ? CNT_64 : CNT_32; +const BitCountType Max15 = HasPopCnt ? CNT_HW_POPCNT : Is64Bit ? CNT_64_MAX15 : CNT_32_MAX15; + + +/// popcount() counts the number of nonzero bits in a bitboard +template inline int popcount(Bitboard); template<> -inline int count_1s(Bitboard b) { - b -= ((b>>1) & 0x5555555555555555ULL); - b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); - b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL; - b *= 0x0101010101010101ULL; - return int(b >> 56); +inline int popcount(Bitboard b) { + b -= (b >> 1) & 0x5555555555555555ULL; + b = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); + b = ((b >> 4) + b) & 0x0F0F0F0F0F0F0F0FULL; + return (b * 0x0101010101010101ULL) >> 56; } template<> -inline int count_1s(Bitboard b) { - b -= (b>>1) & 0x5555555555555555ULL; - b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); - b *= 0x1111111111111111ULL; - return int(b >> 60); +inline int popcount(Bitboard b) { + b -= (b >> 1) & 0x5555555555555555ULL; + b = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); + return (b * 0x1111111111111111ULL) >> 60; } template<> -inline int count_1s(Bitboard b) { +inline int popcount(Bitboard b) { unsigned w = unsigned(b >> 32), v = unsigned(b); - v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits - w -= (w >> 1) & 0x55555555; - v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits - w = ((w >> 2) & 0x33333333) + (w & 0x33333333); - v = ((v >> 4) + v) & 0x0F0F0F0F; // 0-8 in 8 bits - v += (((w >> 4) + w) & 0x0F0F0F0F); // 0-16 in 8 bits - v *= 0x01010101; // mul is fast on amd procs - return int(v >> 24); + v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits + w -= (w >> 1) & 0x55555555; + v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits + w = ((w >> 2) & 0x33333333) + (w & 0x33333333); + v = ((v >> 4) + v + (w >> 4) + w) & 0x0F0F0F0F; + return (v * 0x01010101) >> 24; } template<> -inline int count_1s(Bitboard b) { +inline int popcount(Bitboard b) { unsigned w = unsigned(b >> 32), v = unsigned(b); - v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits - w -= (w >> 1) & 0x55555555; - v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits - w = ((w >> 2) & 0x33333333) + (w & 0x33333333); - v += w; // 0-8 in 4 bits - v *= 0x11111111; - return int(v >> 28); + v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits + w -= (w >> 1) & 0x55555555; + v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits + w = ((w >> 2) & 0x33333333) + (w & 0x33333333); + return ((v + w) * 0x11111111) >> 28; } template<> -inline int count_1s(Bitboard b) { +inline int popcount(Bitboard b) { + #if !defined(USE_POPCNT) - return int(b != 0); // Avoid 'b not used' warning -#elif defined(_MSC_VER) - return __popcnt64(b); -#elif defined(__GNUC__) - unsigned long ret; - __asm__("popcnt %1, %0" : "=r" (ret) : "r" (b)); - return ret; -#endif -} + assert(false); + return b != 0; // Avoid 'b not used' warning -/// cpu_has_popcnt() detects support for popcnt instruction at runtime -inline bool cpu_has_popcnt() { +#elif defined(_MSC_VER) && defined(__INTEL_COMPILER) - int CPUInfo[4] = {-1}; - __cpuid(CPUInfo, 0x00000001); - return (CPUInfo[2] >> 23) & 1; -} + return _mm_popcnt_u64(b); + +#elif defined(_MSC_VER) + return (int)__popcnt64(b); -/// CpuHasPOPCNT is a global constant initialized at startup that -/// is set to true if CPU on which application runs supports popcnt -/// hardware instruction. Unless USE_POPCNT is not defined. -#if defined(USE_POPCNT) -const bool CpuHasPOPCNT = cpu_has_popcnt(); #else -const bool CpuHasPOPCNT = false; -#endif + unsigned long ret; + __asm__("popcnt %1, %0" : "=r" (ret) : "r" (b)); + return ret; -/// CpuIs64Bit is a global constant initialized at compile time that -/// is set to true if CPU on which application runs is a 64 bits. -#if defined(IS_64BIT) -const bool CpuIs64Bit = true; -#else -const bool CpuIs64Bit = false; #endif +} #endif // !defined(BITCOUNT_H_INCLUDED)