X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fbitcount.h;h=6b5f5b5789bb5f8b0b895fafa650566ceba107c2;hp=12826a9f9a05472c6e48066c202b55e0eea158ea;hb=72ab2cd3e98e694ef22316153a42462e2484b466;hpb=3376c68f4bb83dc9fd874eb9d710dab09609ae54 diff --git a/src/bitcount.h b/src/bitcount.h index 12826a9f..6b5f5b57 100644 --- a/src/bitcount.h +++ b/src/bitcount.h @@ -22,35 +22,21 @@ #if !defined(BITCOUNT_H_INCLUDED) #define BITCOUNT_H_INCLUDED -#include "bitboard.h" +// To enable POPCNT support uncomment USE_POPCNT define. For PGO compile on a Core i7 +// you may want to collect profile data first with USE_POPCNT disabled and then, in a +// second profiling session, with USE_POPCNT enabled so to exercise both paths. Don't +// forget to leave USE_POPCNT enabled for the final optimized compile though ;-) +//#define USE_POPCNT -// Select type of software bit count function to use - -#if !defined(AUTO_CONFIGURATION) || defined(IS_64BIT) - -//#define USE_COMPACT_ROOK_ATTACKS -//#define USE_32BIT_ATTACKS -#define USE_FOLDED_BITSCAN - -#define BITCOUNT_SWAR_64 -//#define BITCOUNT_SWAR_32 -//#define BITCOUNT_LOOP - -#else - -#define USE_32BIT_ATTACKS -#define USE_FOLDED_BITSCAN -#define BITCOUNT_SWAR_32 - -#endif +#include "types.h" // Select type of intrinsic bit count instruction to use -#if defined(_MSC_VER) // Microsoft compiler +#if defined(__INTEL_COMPILER) && defined(IS_64BIT) && defined(USE_POPCNT) // Intel compiler -#include +#include inline bool cpu_has_popcnt() { @@ -59,11 +45,19 @@ inline bool cpu_has_popcnt() { return (CPUInfo[2] >> 23) & 1; } -#define POPCNT_INTRINSIC(x) __popcnt64(x) +// Define a dummy template to workaround a compile error if _mm_popcnt_u64() is not defined. +// +// If _mm_popcnt_u64() is defined in it will be choosen first due to +// C++ overload rules that always prefer a function to a template with the same name. +// If not, we avoid a compile error and because cpu_has_popcnt() should return false, +// our templetized _mm_popcnt_u64() is never called anyway. +template inline unsigned _mm_popcnt_u64(T) { return 0; } // Is never called -#elif defined(__INTEL_COMPILER) && (defined(__x86_64) || defined(_M_X64)) // Intel compiler +#define POPCNT_INTRINSIC(x) _mm_popcnt_u64(x) -#include +#elif defined(_MSC_VER) && defined(IS_64BIT) && defined(USE_POPCNT) // Microsoft compiler + +#include inline bool cpu_has_popcnt() { @@ -72,34 +66,42 @@ inline bool cpu_has_popcnt() { return (CPUInfo[2] >> 23) & 1; } -#define POPCNT_INTRINSIC(x) _mm_popcnt_u64(x) +// See comment of _mm_popcnt_u64<>() few lines above for an explanation. +template inline unsigned __popcnt64(T) { return 0; } // Is never called -#else // Safe fallback for unsupported compilers +#define POPCNT_INTRINSIC(x) __popcnt64(x) + +#else // Safe fallback for unsupported compilers or when USE_POPCNT is disabled inline bool cpu_has_popcnt() { return false; } -#define POPCNT_INTRINSIC(x) sw_count_1s(x) +#define POPCNT_INTRINSIC(x) 0 -#endif +#endif // cpu_has_popcnt() and POPCNT_INTRINSIC() definitions /// Software implementation of bit count functions -#if defined(BITCOUNT_LOOP) +#if defined(IS_64BIT) -inline int sw_count_1s(Bitboard b) { - int r; - for(r = 0; b; r++, b &= b - 1); - return r; +inline int count_1s(Bitboard b) { + b -= ((b>>1) & 0x5555555555555555ULL); + b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); + b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL; + b *= 0x0101010101010101ULL; + return int(b >> 56); } -inline int sw_count_1s_max_15(Bitboard b) { - return count_1s(b); +inline int count_1s_max_15(Bitboard b) { + b -= (b>>1) & 0x5555555555555555ULL; + b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); + b *= 0x1111111111111111ULL; + return int(b >> 60); } -#elif defined(BITCOUNT_SWAR_32) +#else // if !defined(IS_64BIT) -inline int sw_count_1s(Bitboard b) { +inline int count_1s(Bitboard b) { unsigned w = unsigned(b >> 32), v = unsigned(b); v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits w -= (w >> 1) & 0x55555555; @@ -111,7 +113,7 @@ inline int sw_count_1s(Bitboard b) { return int(v >> 24); } -inline int sw_count_1s_max_15(Bitboard b) { +inline int count_1s_max_15(Bitboard b) { unsigned w = unsigned(b >> 32), v = unsigned(b); v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits w -= (w >> 1) & 0x55555555; @@ -122,23 +124,6 @@ inline int sw_count_1s_max_15(Bitboard b) { return int(v >> 28); } -#elif defined(BITCOUNT_SWAR_64) - -inline int sw_count_1s(Bitboard b) { - b -= ((b>>1) & 0x5555555555555555ULL); - b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); - b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL; - b *= 0x0101010101010101ULL; - return int(b >> 56); -} - -inline int sw_count_1s_max_15(Bitboard b) { - b -= (b>>1) & 0x5555555555555555ULL; - b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); - b *= 0x1111111111111111ULL; - return int(b >> 60); -} - #endif // BITCOUNT @@ -149,14 +134,28 @@ inline int sw_count_1s_max_15(Bitboard b) { template inline int count_1s(Bitboard b) { - return UseIntrinsic ? POPCNT_INTRINSIC(b) : sw_count_1s(b); + return UseIntrinsic ? POPCNT_INTRINSIC(b) : count_1s(b); } template inline int count_1s_max_15(Bitboard b) { - return UseIntrinsic ? POPCNT_INTRINSIC(b) : sw_count_1s_max_15(b); + return UseIntrinsic ? POPCNT_INTRINSIC(b) : count_1s_max_15(b); } +// Global constant initialized at startup that is set to true if +// CPU on which application runs supports POPCNT intrinsic. Unless +// USE_POPCNT is not defined. +const bool CpuHasPOPCNT = cpu_has_popcnt(); + + +// Global constant used to print info about the use of 64 optimized +// functions to verify that a 64 bit compile has been correctly built. +#if defined(IS_64BIT) +const bool CpuHas64BitPath = true; +#else +const bool CpuHas64BitPath = false; +#endif + #endif // !defined(BITCOUNT_H_INCLUDED)