From: Marco Costalba Date: Sat, 19 Jun 2010 10:10:54 +0000 (+0100) Subject: Move prefetch() out of TT X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=47ee6d9fa4091aa05f6fedb62a2bb652b0422f08 Move prefetch() out of TT This code is platform specific and has nothing to do with TT class, so move to misc.cpp This patch is a prerequisite to use extend prefetch use also to other hash tables apart from Transposition Table. No functional change. Signed-off-by: Marco Costalba --- diff --git a/src/misc.cpp b/src/misc.cpp index 2e43a76b..2970ac3b 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -39,6 +39,10 @@ #endif +#if !defined(NO_PREFETCH) +# include +#endif + #include #include #include @@ -287,4 +291,26 @@ int Bioskey() return 0; } } + +/// prefetch() preloads the given address in L1/L2 cache. This is a non +/// blocking function and do not stalls the CPU waiting for data to be +/// loaded from RAM, that can be very slow. +#if defined(NO_PREFETCH) +void prefetch(char*) {} +#else + +void prefetch(char* addr) { + +#if defined(__INTEL_COMPILER) || defined(__ICL) + // This hack prevents prefetches to be optimized away by + // Intel compiler. Both MSVC and gcc seems not affected. + __asm__ (""); +#endif + + _mm_prefetch(addr, _MM_HINT_T2); + _mm_prefetch(addr+64, _MM_HINT_T2); // 64 bytes ahead +} + +#endif + #endif diff --git a/src/misc.h b/src/misc.h index 0721196f..4d9b2429 100644 --- a/src/misc.h +++ b/src/misc.h @@ -55,6 +55,7 @@ extern const std::string engine_name(); extern int get_system_time(); extern int cpu_count(); extern int Bioskey(); +extern void prefetch(char* addr); //// diff --git a/src/position.cpp b/src/position.cpp index 3cf0016b..a069cf40 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -772,7 +772,7 @@ void Position::do_move(Move m, StateInfo& newSt, const CheckInfo& ci, bool moveI } // Prefetch TT access as soon as we know key is updated - TT.prefetch(key); + prefetch((char*)TT.first_entry(key)); // Move the piece Bitboard move_bb = make_move_bb(from, to); @@ -1250,7 +1250,7 @@ void Position::do_null_move(StateInfo& backupSt) { st->key ^= zobEp[st->epSquare]; st->key ^= zobSideToMove; - TT.prefetch(st->key); + prefetch((char*)TT.first_entry(st->key)); sideToMove = opposite_color(sideToMove); st->epSquare = SQ_NONE; diff --git a/src/tt.cpp b/src/tt.cpp index 501d016f..fc0bdfb4 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -25,9 +25,6 @@ #include #include #include -#if !defined(NO_PREFETCH) -# include -#endif #include "movegen.h" #include "tt.h" @@ -91,16 +88,6 @@ void TranspositionTable::clear() { } -/// TranspositionTable::first_entry returns a pointer to the first -/// entry of a cluster given a position. The low 32 bits of the key -/// are used to get the index in the table. - -inline TTEntry* TranspositionTable::first_entry(const Key posKey) const { - - return entries[uint32_t(posKey) & (size - 1)].data; -} - - /// TranspositionTable::store writes a new entry containing a position, /// a value, a value type, a search depth, and a best move to the /// transposition table. Transposition table is organized in clusters of @@ -160,31 +147,6 @@ TTEntry* TranspositionTable::retrieve(const Key posKey) const { } -/// TranspositionTable::prefetch looks up the current position in the -/// transposition table and load it in L1/L2 cache. This is a non -/// blocking function and do not stalls the CPU waiting for data -/// to be loaded from RAM, that can be very slow. When we will -/// subsequently call retrieve() the TT data will be already -/// quickly accessible in L1/L2 CPU cache. -#if defined(NO_PREFETCH) -void TranspositionTable::prefetch(const Key) const {} -#else - -void TranspositionTable::prefetch(const Key posKey) const { - -#if defined(__INTEL_COMPILER) || defined(__ICL) - // This hack prevents prefetches to be optimized away by - // Intel compiler. Both MSVC and gcc seems not affected. - __asm__ (""); -#endif - - char const* addr = (char*)first_entry(posKey); - _mm_prefetch(addr, _MM_HINT_T2); - _mm_prefetch(addr+64, _MM_HINT_T2); // 64 bytes ahead -} - -#endif - /// TranspositionTable::new_search() is called at the beginning of every new /// search. It increments the "generation" variable, which is used to /// distinguish transposition table entries from previous searches from diff --git a/src/tt.h b/src/tt.h index 042ccdfe..bcc761ac 100644 --- a/src/tt.h +++ b/src/tt.h @@ -108,15 +108,13 @@ public: void clear(); void store(const Key posKey, Value v, ValueType type, Depth d, Move m, Value statV, Value kingD); TTEntry* retrieve(const Key posKey) const; - void prefetch(const Key posKey) const; void new_search(); void insert_pv(const Position& pos, Move pv[]); void extract_pv(const Position& pos, Move pv[], const int PLY_MAX); int full() const; + TTEntry* first_entry(const Key posKey) const; private: - inline TTEntry* first_entry(const Key posKey) const; - // Be sure 'writes' is at least one cache line away // from read only variables. unsigned char pad_before[64 - sizeof(unsigned)]; @@ -130,4 +128,14 @@ private: extern TranspositionTable TT; + +/// TranspositionTable::first_entry returns a pointer to the first +/// entry of a cluster given a position. The low 32 bits of the key +/// are used to get the index in the table. + +inline TTEntry* TranspositionTable::first_entry(const Key posKey) const { + + return entries[uint32_t(posKey) & (size - 1)].data; +} + #endif // !defined(TT_H_INCLUDED)