From: Marco Costalba Date: Wed, 12 Aug 2009 07:40:03 +0000 (+0200) Subject: Finally fix prefetch on Linux X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=fd12e8cb239180607559bb805e233f7ea704a67c Finally fix prefetch on Linux It was due to a missing -msse compiler option ! Without this option the CPU silently discards prefetcht2 instructions during execution. Also added a (gcc documented) hack to prevent Intel compiler to optimize away the prefetches. Special thanks to Heinz for testing and suggesting improvments. And for Jim for testing icc on Windows. Signed-off-by: Marco Costalba --- diff --git a/src/Makefile b/src/Makefile index 7ca0495e..ff3405f7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -26,8 +26,8 @@ EXE = stockfish ### Compiler speed switches for both GCC and ICC. These settings are generally ### fast on a broad range of systems, but may be changed experimentally ### ========================================================================== -GCCFLAGS = -O3 -ICCFLAGS = -fast +GCCFLAGS = -O3 -msse +ICCFLAGS = -fast -msse ### ========================================================================== @@ -169,6 +169,6 @@ $(EXE): $(OBJS) ### Dependencies. Do not change .depend: - $(CXX) -MM $(OBJS:.o=.cpp) > $@ + $(CXX) -msse -MM $(OBJS:.o=.cpp) > $@ include .depend diff --git a/src/tt.cpp b/src/tt.cpp index 5ea6a808..e140a0ba 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -25,14 +25,11 @@ #include #include #include +#include #include "movegen.h" #include "tt.h" -#if defined(_MSC_VER) -#include -#endif - // The main transposition table TranspositionTable TT; @@ -175,16 +172,15 @@ TTEntry* TranspositionTable::retrieve(const Key posKey) const { void TranspositionTable::prefetch(const Key posKey) const { -#if defined(_MSC_VER) - char* addr = (char*)first_entry(posKey); - _mm_prefetch(addr, _MM_HINT_T0); - _mm_prefetch(addr+64, _MM_HINT_T0); -#else - // We need to force an asm volatile here because gcc builtin - // is optimized away by Intel compiler. - char* addr = (char*)first_entry(posKey); - asm volatile("prefetcht0 %0" :: "m" (addr)); +#if defined(__INTEL_COMPILER) || defined(__ICL) + // This hack prevents prefetches to be optimized away by the + // Intel compiler. Both MSVC and gcc seems not affected. + __asm__ (""); #endif + + char const* addr = (char*)first_entry(posKey); + _mm_prefetch(addr, _MM_HINT_T2); + _mm_prefetch(addr+64, _MM_HINT_T2); // 64 bytes ahead }