-/// transposition table, and extracts the value, value type, depth and
-/// best move if the position is found. The return value is true if
-/// the position is found, and false if it isn't.
-
-bool TranspositionTable::retrieve(const Position &pos, Value *value,
- Depth *d, Move *move,
- ValueType *type) const {
- TTEntry *tte;
- bool found = false;
-
- tte = entries + int(pos.get_key() & (size - 1)) * 4;
- for (int i = 0; i < 4 && !found ; i++)
- if ((tte+i)->key() == pos.get_key())
- {
- tte = tte + i;
- found = true;
- }
- if (!found) {
- *move = MOVE_NONE;
- return false;
- }
- *value = tte->value();
- *type = tte->type();
- *d = tte->depth();
- *move = tte->move();
- return true;
+/// transposition table. Returns a pointer to the TTEntry or NULL
+/// if position is not found.
+
+TTEntry* TranspositionTable::retrieve(const Key posKey) const {
+
+ uint32_t posKey32 = posKey >> 32;
+ TTEntry* tte = first_entry(posKey);
+
+ for (int i = 0; i < ClusterSize; i++, tte++)
+ if (tte->key() == posKey32)
+ return tte;
+
+ return NULL;
+}
+
+
+/// TranspositionTable::prefetch looks up the current position in the
+/// transposition table and load it in L1/L2 cache. This is a non
+/// blocking function and do not stalls the CPU waiting for data
+/// to be loaded from RAM, that can be very slow. When we will
+/// subsequently call retrieve() the TT data will be already
+/// quickly accessible in L1/L2 CPU cache.
+#if defined(NO_PREFETCH)
+void TranspositionTable::prefetch(const Key) const {}
+#else
+
+void TranspositionTable::prefetch(const Key posKey) const {
+
+#if defined(__INTEL_COMPILER) || defined(__ICL)
+ // This hack prevents prefetches to be optimized away by
+ // Intel compiler. Both MSVC and gcc seems not affected.
+ __asm__ ("");
+#endif
+
+ char const* addr = (char*)first_entry(posKey);
+ _mm_prefetch(addr, _MM_HINT_T2);
+ _mm_prefetch(addr+64, _MM_HINT_T2); // 64 bytes ahead