#if !defined(BITCOUNT_H_INCLUDED)
#define BITCOUNT_H_INCLUDED
-// To disable POPCNT support uncomment NO_POPCNT define. You should do it only
-// in PGO compiling to exercise the default fallback path. Don't forget to
-// re-comment the line for the final optimized compile though ;-)
+// To enable POPCNT support uncomment USE_POPCNT define. For PGO compile on a Core i7
+// you may want to collect profile data first with USE_POPCNT disabled and then, in a
+// second profiling session, with USE_POPCNT enabled so to exercise both paths. Don't
+// forget to leave USE_POPCNT enabled for the final optimized compile though ;-)
-//#define NO_POPCNT
+//#define USE_POPCNT
#include "types.h"
// Select type of intrinsic bit count instruction to use
-#if defined(_MSC_VER) && defined(IS_64BIT) && !defined(NO_POPCNT) // Microsoft compiler
+#if defined(__INTEL_COMPILER) && defined(IS_64BIT) && defined(USE_POPCNT) // Intel compiler
-#include <intrin.h>
+#include <nmmintrin.h>
inline bool cpu_has_popcnt() {
return (CPUInfo[2] >> 23) & 1;
}
-// Define a dummy template to workaround a compile error if __popcnt64() is not defined.
+// Define a dummy template to workaround a compile error if _mm_popcnt_u64() is not defined.
//
-// If __popcnt64() is defined in <intrin.h> it will be choosen first due to
+// If _mm_popcnt_u64() is defined in <nmmintrin.h> it will be choosen first due to
// C++ overload rules that always prefer a function to a template with the same name.
// If not, we avoid a compile error and because cpu_has_popcnt() should return false,
-// our templetized __popcnt64() is never called anyway.
-template<typename T> unsigned __popcnt64(T) { return 0; } // Is never called
+// our templetized _mm_popcnt_u64() is never called anyway.
+template<typename T> inline unsigned _mm_popcnt_u64(T) { return 0; } // Is never called
-#define POPCNT_INTRINSIC(x) __popcnt64(x)
+#define POPCNT_INTRINSIC(x) _mm_popcnt_u64(x)
-#elif defined(__INTEL_COMPILER) && defined(IS_64BIT) && !defined(NO_POPCNT) // Intel compiler
+#elif defined(_MSC_VER) && defined(IS_64BIT) && defined(USE_POPCNT) // Microsoft compiler
-#include <nmmintrin.h>
+#include <intrin.h>
inline bool cpu_has_popcnt() {
return (CPUInfo[2] >> 23) & 1;
}
-// See comment of __popcnt64<>() few lines above for an explanation.
-template<typename T> unsigned _mm_popcnt_u64(T) { return 0; } // Is never called
+// See comment of _mm_popcnt_u64<>() few lines above for an explanation.
+template<typename T> inline unsigned __popcnt64(T) { return 0; } // Is never called
-#define POPCNT_INTRINSIC(x) _mm_popcnt_u64(x)
+#define POPCNT_INTRINSIC(x) __popcnt64(x)
-#else // Safe fallback for unsupported compilers or when NO_POPCNT is defined
+#else // Safe fallback for unsupported compilers or when USE_POPCNT is disabled
inline bool cpu_has_popcnt() { return false; }
// Global constant initialized at startup that is set to true if
// CPU on which application runs supports POPCNT intrinsic. Unless
-// NO_POPCNT is defined.
-#if defined(NO_POPCNT)
-const bool CpuHasPOPCNT = false;
-#else
+// USE_POPCNT is not defined.
const bool CpuHasPOPCNT = cpu_has_popcnt();
-#endif
// Global constant used to print info about the use of 64 optimized