X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fbitcount.h;h=9feed19f9755a94be0022f3e9cf5c891972589d0;hp=9a3b481169b667943dedbb28cf538f977e02ca5e;hb=c014444f09ace05e908909d9c5c60127e998b538;hpb=08f3aac97c9ee9d9a4cf0a3232ac0bfa4c320896 diff --git a/src/bitcount.h b/src/bitcount.h index 9a3b4811..9feed19f 100644 --- a/src/bitcount.h +++ b/src/bitcount.h @@ -1,7 +1,7 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2009 Marco Costalba + Copyright (C) 2008-2014 Marco Costalba, Joona Kiiski, Tord Romstad Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,147 +18,87 @@ along with this program. If not, see . */ - -#if !defined(BITCOUNT_H_INCLUDED) +#ifndef BITCOUNT_H_INCLUDED #define BITCOUNT_H_INCLUDED -// To disable POPCNT support uncomment NO_POPCNT define. You should do it only -// in PGO compiling to exercise the default fallback path. Don't forget to -// re-comment the line for the final optimized compile though ;-) - -//#define NO_POPCNT - - +#include #include "types.h" -// Select type of intrinsic bit count instruction to use - -#if defined(_MSC_VER) && defined(IS_64BIT) && !defined(NO_POPCNT) // Microsoft compiler - -#include - -inline bool cpu_has_popcnt() { - - int CPUInfo[4] = {-1}; - __cpuid(CPUInfo, 0x00000001); - return (CPUInfo[2] >> 23) & 1; +enum BitCountType { + CNT_64, + CNT_64_MAX15, + CNT_32, + CNT_32_MAX15, + CNT_HW_POPCNT +}; + +/// Determine at compile time the best popcount<> specialization according to +/// whether the platform is 32 or 64 bit, the maximum number of non-zero +/// bits to count and if the hardware popcnt instruction is available. +const BitCountType Full = HasPopCnt ? CNT_HW_POPCNT : Is64Bit ? CNT_64 : CNT_32; +const BitCountType Max15 = HasPopCnt ? CNT_HW_POPCNT : Is64Bit ? CNT_64_MAX15 : CNT_32_MAX15; + + +/// popcount() counts the number of non-zero bits in a bitboard +template inline int popcount(Bitboard); + +template<> +inline int popcount(Bitboard b) { + b -= (b >> 1) & 0x5555555555555555ULL; + b = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); + b = ((b >> 4) + b) & 0x0F0F0F0F0F0F0F0FULL; + return (b * 0x0101010101010101ULL) >> 56; } -// Define a dummy template to workaround a compile error if __popcnt64() is not defined. -// -// If __popcnt64() is defined in it will be choosen first due to -// C++ overload rules that always prefer a function to a template with the same name. -// If not, we avoid a compile error and because cpu_has_popcnt() should return false, -// our templetized __popcnt64() is never called anyway. -template unsigned __popcnt64(T) { return 0; } // Is never called - -#define POPCNT_INTRINSIC(x) __popcnt64(x) - -#elif defined(__INTEL_COMPILER) && defined(IS_64BIT) && !defined(NO_POPCNT) // Intel compiler - -#include - -inline bool cpu_has_popcnt() { - - int CPUInfo[4] = {-1}; - __cpuid(CPUInfo, 0x00000001); - return (CPUInfo[2] >> 23) & 1; +template<> +inline int popcount(Bitboard b) { + b -= (b >> 1) & 0x5555555555555555ULL; + b = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); + return (b * 0x1111111111111111ULL) >> 60; } -// See comment of __popcnt64<>() few lines above for an explanation. -template unsigned _mm_popcnt_u64(T) { return 0; } // Is never called - -#define POPCNT_INTRINSIC(x) _mm_popcnt_u64(x) - -#else // Safe fallback for unsupported compilers or when NO_POPCNT is defined - -inline bool cpu_has_popcnt() { return false; } - -#define POPCNT_INTRINSIC(x) 0 - -#endif // cpu_has_popcnt() and POPCNT_INTRINSIC() definitions - - -/// Software implementation of bit count functions - -#if defined(IS_64BIT) - -inline int count_1s(Bitboard b) { - b -= ((b>>1) & 0x5555555555555555ULL); - b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); - b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL; - b *= 0x0101010101010101ULL; - return int(b >> 56); -} - -inline int count_1s_max_15(Bitboard b) { - b -= (b>>1) & 0x5555555555555555ULL; - b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); - b *= 0x1111111111111111ULL; - return int(b >> 60); -} - -#else // if !defined(IS_64BIT) - -inline int count_1s(Bitboard b) { +template<> +inline int popcount(Bitboard b) { unsigned w = unsigned(b >> 32), v = unsigned(b); - v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits - w -= (w >> 1) & 0x55555555; - v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits - w = ((w >> 2) & 0x33333333) + (w & 0x33333333); - v = ((v >> 4) + v) & 0x0F0F0F0F; // 0-8 in 8 bits - v += (((w >> 4) + w) & 0x0F0F0F0F); // 0-16 in 8 bits - v *= 0x01010101; // mul is fast on amd procs - return int(v >> 24); + v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits + w -= (w >> 1) & 0x55555555; + v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits + w = ((w >> 2) & 0x33333333) + (w & 0x33333333); + v = ((v >> 4) + v + (w >> 4) + w) & 0x0F0F0F0F; + return (v * 0x01010101) >> 24; } -inline int count_1s_max_15(Bitboard b) { +template<> +inline int popcount(Bitboard b) { unsigned w = unsigned(b >> 32), v = unsigned(b); - v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits - w -= (w >> 1) & 0x55555555; - v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits - w = ((w >> 2) & 0x33333333) + (w & 0x33333333); - v += w; // 0-8 in 4 bits - v *= 0x11111111; - return int(v >> 28); + v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits + w -= (w >> 1) & 0x55555555; + v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits + w = ((w >> 2) & 0x33333333) + (w & 0x33333333); + return ((v + w) * 0x11111111) >> 28; } -#endif // BITCOUNT +template<> +inline int popcount(Bitboard b) { +#ifndef USE_POPCNT -/// count_1s() counts the number of nonzero bits in a bitboard. -/// If template parameter is true an intrinsic is called, otherwise -/// we fallback on a software implementation. + assert(false); + return b != 0; // Avoid 'b not used' warning -template -inline int count_1s(Bitboard b) { - - return UseIntrinsic ? POPCNT_INTRINSIC(b) : count_1s(b); -} +#elif defined(_MSC_VER) && defined(__INTEL_COMPILER) -template -inline int count_1s_max_15(Bitboard b) { + return _mm_popcnt_u64(b); - return UseIntrinsic ? POPCNT_INTRINSIC(b) : count_1s_max_15(b); -} +#elif defined(_MSC_VER) + return (int)__popcnt64(b); -// Global constant initialized at startup that is set to true if -// CPU on which application runs supports POPCNT intrinsic. Unless -// NO_POPCNT is defined. -#if defined(NO_POPCNT) -const bool CpuHasPOPCNT = false; #else -const bool CpuHasPOPCNT = cpu_has_popcnt(); -#endif + return __builtin_popcountll(b); -// Global constant used to print info about the use of 64 optimized -// functions to verify that a 64 bit compile has been correctly built. -#if defined(IS_64BIT) -const bool CpuHas64BitPath = true; -#else -const bool CpuHas64BitPath = false; #endif +} -#endif // !defined(BITCOUNT_H_INCLUDED) +#endif // #ifndef BITCOUNT_H_INCLUDED