-/// TranspositionTable::first_entry returns a pointer to the first
-/// entry of a cluster given a position.
-
-inline TTEntry* TranspositionTable::first_entry(const Position &pos) const {
-
- return entries + (int(pos.get_key() & (size - 1)) << 2);
+/// TranspositionTable::prefetch looks up the current position in the
+/// transposition table and load it in L1/L2 cache. This is a non
+/// blocking function and do not stalls the CPU waiting for data
+/// to be loaded from RAM, that can be very slow. When we will
+/// subsequently call retrieve() the TT data will be already
+/// quickly accessible in L1/L2 CPU cache.
+
+void TranspositionTable::prefetch(const Key posKey) const {
+
+#if defined(_MSC_VER)
+ char* addr = (char*)first_entry(posKey);
+ _mm_prefetch(addr, _MM_HINT_T0);
+ _mm_prefetch(addr+64, _MM_HINT_T0);
+#else
+ // We need to force an asm volatile here because gcc builtin
+ // is optimized away by Intel compiler.
+ char* addr = (char*)first_entry(posKey);
+ asm volatile("prefetcht0 %0" :: "m" (addr));
+#endif