From 4b10578acbe099482ed40200478df4d775c01af5 Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Fri, 5 Jun 2020 20:17:00 +0300 Subject: [PATCH] Increase the maximum hash size by a factor of 256 Conceptually group hash clusters into super clusters of 256 clusters. This scheme allows us to use hash sizes up to 32 TB (= 2^32 super clusters = 2^40 clusters). Use 48 bits of the Zobrist key to choose the cluster index. We use 8 extra bits to mitigate the quantization error for very large hashes when scaling the hash key to cluster index. The hash index computation is organized to be compatible with the existing scheme for power-of-two hash sizes up to 128 GB. Fixes https://github.com/official-stockfish/Stockfish/issues/1349 closes https://github.com/official-stockfish/Stockfish/pull/2722 Passed non-regression STC: LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 37976 W: 7336 L: 7211 D: 23429 Ptnml(0-2): 578, 4295, 9149, 4356, 610 https://tests.stockfishchess.org/tests/view/5edcbaaef29b40b0fc95abc5 No functional change. --- src/tt.cpp | 8 ++++++-- src/tt.h | 13 ++++++++++--- src/ucioption.cpp | 4 ++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/tt.cpp b/src/tt.cpp index 0a3c54a1..92aaee00 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -65,8 +65,10 @@ void TranspositionTable::resize(size_t mbSize) { aligned_ttmem_free(mem); - clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - table = static_cast(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem)); + superClusterCount = mbSize * 1024 * 1024 / (sizeof(Cluster) * ClustersPerSuperCluster); + + table = static_cast( + aligned_ttmem_alloc(superClusterCount * ClustersPerSuperCluster * sizeof(Cluster), mem)); if (!mem) { std::cerr << "Failed to allocate " << mbSize @@ -89,6 +91,8 @@ void TranspositionTable::clear() { { threads.emplace_back([this, idx]() { + const size_t clusterCount = superClusterCount * ClustersPerSuperCluster; + // Thread binding gives faster search on systems with a first-touch policy if (Options["Threads"] > 8) WinProcGroup::bindThisThread(idx); diff --git a/src/tt.h b/src/tt.h index bd723a86..76db03da 100644 --- a/src/tt.h +++ b/src/tt.h @@ -66,6 +66,7 @@ private: class TranspositionTable { static constexpr int ClusterSize = 3; + static constexpr int ClustersPerSuperCluster = 256; struct Cluster { TTEntry entry[ClusterSize]; @@ -82,15 +83,21 @@ public: void resize(size_t mbSize); void clear(); - // The 32 lowest order bits of the key are used to get the index of the cluster TTEntry* first_entry(const Key key) const { - return &table[(uint32_t(key) * uint64_t(clusterCount)) >> 32].entry[0]; + + // The index is computed from + // Idx = (K48 * SCC) / 2^40, with K48 the 48 lowest bits swizzled. + + const uint64_t firstTerm = uint32_t(key) * uint64_t(superClusterCount); + const uint64_t secondTerm = (uint16_t(key >> 32) * uint64_t(superClusterCount)) >> 16; + + return &table[(firstTerm + secondTerm) >> 24].entry[0]; } private: friend struct TTEntry; - size_t clusterCount; + size_t superClusterCount; Cluster* table; void* mem; uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 16add76e..90190b53 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -56,8 +56,8 @@ bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const void init(OptionsMap& o) { - // at most 2^32 clusters. - constexpr int MaxHashMB = Is64Bit ? 131072 : 2048; + // At most 2^32 superclusters. Supercluster = 8 kB + constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; o["Debug Log File"] << Option("", on_logger); o["Contempt"] << Option(24, -100, 100); -- 2.39.2