From: Marco Costalba Date: Fri, 26 Apr 2013 16:45:54 +0000 (+0200) Subject: Cache line aligned TT X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=083fe5812485597e13943b690cc24a8f25c0d140;ds=sidebyside Cache line aligned TT Let TT clusters (16*4=64 bytes) to hold on a singe cache line. This avoids the need for the double prefetch. Original patches by Lucas and Jean-Francois that has also tested on his AMD FX: BIG HASHTABLE ./stockfish bench 1024 1 18 > /dev/null Before: 1437642 nps 1426519 nps 1438493 nps After: 1474482 nps 1476375 nps 1475877 nps SMALL HASHTABLE ./stockfish bench 128 1 18 > /dev/null Before: 1435207 nps 1435586 nps 1433741 nps After: 1479143 nps 1471042 nps 1472286 nps No functional change. --- diff --git a/src/misc.cpp b/src/misc.cpp index c0c00b00..477fb392 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -237,10 +237,8 @@ void prefetch(char* addr) { # if defined(__INTEL_COMPILER) || defined(_MSC_VER) _mm_prefetch(addr, _MM_HINT_T0); - _mm_prefetch(addr+64, _MM_HINT_T0); // 64 bytes ahead # else __builtin_prefetch(addr); - __builtin_prefetch(addr+64); # endif } diff --git a/src/tt.cpp b/src/tt.cpp index 80ea493b..998d7378 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -39,18 +39,18 @@ void TranspositionTable::set_size(size_t mbSize) { if (hashMask == size - ClusterSize) return; - hashMask = size - ClusterSize; - delete [] table; - table = new (std::nothrow) TTEntry[size]; - - if (!table) + free(mem); + mem = malloc(size * sizeof(TTEntry) + (CACHE_LINE_SIZE - 1)); + if (!mem) { std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; exit(EXIT_FAILURE); } - clear(); // Operator new is not guaranteed to initialize memory to zero + table = (TTEntry*)((size_t(mem) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)); + hashMask = size - ClusterSize; + clear(); // Newly allocated block of memory is not initialized } diff --git a/src/tt.h b/src/tt.h index c7c39e90..1caa277e 100644 --- a/src/tt.h +++ b/src/tt.h @@ -85,7 +85,7 @@ class TranspositionTable { static const unsigned ClusterSize = 4; // A cluster is 64 Bytes public: - ~TranspositionTable() { delete [] table; } + ~TranspositionTable() { free(mem); } void new_search() { generation++; } TTEntry* probe(const Key key) const; @@ -98,6 +98,7 @@ public: private: uint32_t hashMask; TTEntry* table; + void* mem; uint8_t generation; // Size must be not bigger then TTEntry::generation8 }; diff --git a/src/types.h b/src/types.h index 1def7d00..dae86db3 100644 --- a/src/types.h +++ b/src/types.h @@ -56,10 +56,11 @@ # include // Intel and Microsoft header for _mm_prefetch() # endif +#define CACHE_LINE_SIZE 64 #if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define CACHE_LINE_ALIGNMENT __declspec(align(64)) +# define CACHE_LINE_ALIGNMENT __declspec(align(CACHE_LINE_SIZE)) #else -# define CACHE_LINE_ALIGNMENT __attribute__ ((aligned(64))) +# define CACHE_LINE_ALIGNMENT __attribute__ ((aligned(CACHE_LINE_SIZE))) #endif #if defined(_MSC_VER)