#else
-# if defined(__INTEL_COMPILER) || defined(__ICL) || defined(_MSC_VER)
-# include <xmmintrin.h>
-# endif
-
void prefetch(char* addr) {
-# if defined(__INTEL_COMPILER) || defined(__ICL)
+# if defined(__INTEL_COMPILER)
// This hack prevents prefetches to be optimized away by
// Intel compiler. Both MSVC and gcc seems not affected.
__asm__ ("");
# endif
-# if defined(__INTEL_COMPILER) || defined(__ICL) || defined(_MSC_VER)
- _mm_prefetch(addr, _MM_HINT_T2);
- _mm_prefetch(addr+64, _MM_HINT_T2); // 64 bytes ahead
+# if defined(__INTEL_COMPILER) || defined(_MSC_VER)
+ _mm_prefetch(addr, _MM_HINT_T0);
+ _mm_prefetch(addr+64, _MM_HINT_T0); // 64 bytes ahead
# else
__builtin_prefetch(addr);
__builtin_prefetch(addr+64);
# include <nmmintrin.h> // Intel header for _mm_popcnt_u64() intrinsic
#endif
+# if !defined(NO_PREFETCH) && (defined(__INTEL_COMPILER) || defined(_MSC_VER))
+# include <xmmintrin.h> // Intel and Microsoft header for _mm_prefetch()
+# endif
+
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
# define CACHE_LINE_ALIGNMENT __declspec(align(64))
#else