X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Ftt.cpp;h=0396b287cc5734b9517569e1124f91ddf0fcde73;hp=49dae31d70ffb5b634709e339c7815d8d5e5184d;hb=f4140ecc0c78d3d89f4e2459105e3ce3a1ab3ce1;hpb=6f1475b6fcd3e3728d800e622ab7a22265fb8ca4 diff --git a/src/tt.cpp b/src/tt.cpp index 49dae31d..0396b287 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -29,10 +29,15 @@ #include "movegen.h" #include "tt.h" +#if defined(_MSC_VER) +#include +#endif -/// This is the number of TTEntry slots for each position +// This is the number of TTEntry slots for each position static const int ClusterSize = 4; +// The main transposition table +TranspositionTable TT; //// //// Functions @@ -92,6 +97,16 @@ void TranspositionTable::clear() { } +/// TranspositionTable::first_entry returns a pointer to the first +/// entry of a cluster given a position. The low 32 bits of the key +/// are used to get the index in the table. + +inline TTEntry* TranspositionTable::first_entry(const Key posKey) const { + + return entries + ((uint32_t(posKey) & (size - 1)) * ClusterSize); +} + + /// TranspositionTable::store writes a new entry containing a position, /// a value, a value type, a search depth, and a best move to the /// transposition table. Transposition table is organized in clusters of @@ -144,7 +159,7 @@ void TranspositionTable::store(const Key posKey, Value v, ValueType t, Depth d, TTEntry* TranspositionTable::retrieve(const Key posKey) const { uint32_t posKey32 = posKey >> 32; - TTEntry *tte = first_entry(posKey); + TTEntry* tte = first_entry(posKey); for (int i = 0; i < ClusterSize; i++, tte++) if (tte->key() == posKey32) @@ -154,15 +169,26 @@ TTEntry* TranspositionTable::retrieve(const Key posKey) const { } -/// TranspositionTable::first_entry returns a pointer to the first -/// entry of a cluster given a position. The low 32 bits of the key -/// are used to get the index in the table. +/// TranspositionTable::prefetch looks up the current position in the +/// transposition table and load it in L1/L2 cache. This is a non +/// blocking function and do not stalls the CPU waiting for data +/// to be loaded from RAM, that can be very slow. When we will +/// subsequently call retrieve() the TT data will be already +/// quickly accessible in L1/l2 CPU cache. -inline TTEntry* TranspositionTable::first_entry(const Key posKey) const { +void TranspositionTable::prefetch(const Key posKey) const { - return entries + ((uint32_t(posKey) & (size - 1)) * ClusterSize); +#if defined(_MSC_VER) + _mm_prefetch((char*)first_entry(posKey), _MM_HINT_T0); +#else + // We need to force an asm volatile here because gcc builtin + // is optimized away by Intel compiler. + char* addr = (char*)first_entry(posKey); + asm volatile("prefetcht0 %0" :: "m" (addr)); +#endif } + /// TranspositionTable::new_search() is called at the beginning of every new /// search. It increments the "generation" variable, which is used to /// distinguish transposition table entries from previous searches from