#include "movegen.h"
#include "tt.h"
+#if defined(_MSC_VER)
+#include <xmmintrin.h>
+#endif
-/// This is the number of TTEntry slots for each position
+// This is the number of TTEntry slots for each position
static const int ClusterSize = 4;
+// The main transposition table
+TranspositionTable TT;
////
//// Functions
}
+/// TranspositionTable::first_entry returns a pointer to the first
+/// entry of a cluster given a position. The low 32 bits of the key
+/// are used to get the index in the table.
+
+inline TTEntry* TranspositionTable::first_entry(const Key posKey) const {
+
+ return entries + ((uint32_t(posKey) & (size - 1)) * ClusterSize);
+}
+
+
/// TranspositionTable::store writes a new entry containing a position,
/// a value, a value type, a search depth, and a best move to the
/// transposition table. Transposition table is organized in clusters of
TTEntry* TranspositionTable::retrieve(const Key posKey) const {
uint32_t posKey32 = posKey >> 32;
- TTEntry *tte = first_entry(posKey);
+ TTEntry* tte = first_entry(posKey);
for (int i = 0; i < ClusterSize; i++, tte++)
if (tte->key() == posKey32)
}
-/// TranspositionTable::first_entry returns a pointer to the first
-/// entry of a cluster given a position. The low 32 bits of the key
-/// are used to get the index in the table.
-
-inline TTEntry* TranspositionTable::first_entry(const Key posKey) const {
-
- return entries + ((uint32_t(posKey) & (size - 1)) * ClusterSize);
+/// TranspositionTable::prefetch looks up the current position in the
+/// transposition table and load it in L1/L2 cache. This is a non
+/// blocking function and do not stalls the CPU waiting for data
+/// to be loaded from RAM, that can be very slow. When we will
+/// subsequently call retrieve() the TT data will be already
+/// quickly accessible in L1/L2 CPU cache.
+
+void TranspositionTable::prefetch(const Key posKey) const {
+
+#if defined(_MSC_VER)
+ char* addr = (char*)first_entry(posKey);
+ _mm_prefetch(addr, _MM_HINT_T0);
+ _mm_prefetch(addr+64, _MM_HINT_T0);
+#else
+ // We need to force an asm volatile here because gcc builtin
+ // is optimized away by Intel compiler.
+ char* addr = (char*)first_entry(posKey);
+ asm volatile("prefetcht0 %0" :: "m" (addr));
+#endif
}
+
/// TranspositionTable::new_search() is called at the beginning of every new
/// search. It increments the "generation" variable, which is used to
/// distinguish transposition table entries from previous searches from