summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
b081e52)
Conceptually group hash clusters into super clusters of 256 clusters.
This scheme allows us to use hash sizes up to 32 TB
(= 2^32 super clusters = 2^40 clusters).
Use 48 bits of the Zobrist key to choose the cluster index. We use 8
extra bits to mitigate the quantization error for very large hashes when
scaling the hash key to cluster index.
The hash index computation is organized to be compatible with the existing
scheme for power-of-two hash sizes up to 128 GB.
Fixes https://github.com/official-stockfish/Stockfish/issues/1349
closes https://github.com/official-stockfish/Stockfish/pull/2722
Passed non-regression STC:
LLR: 2.93 (-2.94,2.94) {-1.50,0.50}
Total: 37976 W: 7336 L: 7211 D: 23429
Ptnml(0-2): 578, 4295, 9149, 4356, 610
https://tests.stockfishchess.org/tests/view/
5edcbaaef29b40b0fc95abc5
No functional change.
- clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
- table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
+ superClusterCount = mbSize * 1024 * 1024 / (sizeof(Cluster) * ClustersPerSuperCluster);
+
+ table = static_cast<Cluster*>(
+ aligned_ttmem_alloc(superClusterCount * ClustersPerSuperCluster * sizeof(Cluster), mem));
if (!mem)
{
std::cerr << "Failed to allocate " << mbSize
if (!mem)
{
std::cerr << "Failed to allocate " << mbSize
{
threads.emplace_back([this, idx]() {
{
threads.emplace_back([this, idx]() {
+ const size_t clusterCount = superClusterCount * ClustersPerSuperCluster;
+
// Thread binding gives faster search on systems with a first-touch policy
if (Options["Threads"] > 8)
WinProcGroup::bindThisThread(idx);
// Thread binding gives faster search on systems with a first-touch policy
if (Options["Threads"] > 8)
WinProcGroup::bindThisThread(idx);
class TranspositionTable {
static constexpr int ClusterSize = 3;
class TranspositionTable {
static constexpr int ClusterSize = 3;
+ static constexpr int ClustersPerSuperCluster = 256;
struct Cluster {
TTEntry entry[ClusterSize];
struct Cluster {
TTEntry entry[ClusterSize];
void resize(size_t mbSize);
void clear();
void resize(size_t mbSize);
void clear();
- // The 32 lowest order bits of the key are used to get the index of the cluster
TTEntry* first_entry(const Key key) const {
TTEntry* first_entry(const Key key) const {
- return &table[(uint32_t(key) * uint64_t(clusterCount)) >> 32].entry[0];
+
+ // The index is computed from
+ // Idx = (K48 * SCC) / 2^40, with K48 the 48 lowest bits swizzled.
+
+ const uint64_t firstTerm = uint32_t(key) * uint64_t(superClusterCount);
+ const uint64_t secondTerm = (uint16_t(key >> 32) * uint64_t(superClusterCount)) >> 16;
+
+ return &table[(firstTerm + secondTerm) >> 24].entry[0];
}
private:
friend struct TTEntry;
}
private:
friend struct TTEntry;
+ size_t superClusterCount;
Cluster* table;
void* mem;
uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
Cluster* table;
void* mem;
uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
void init(OptionsMap& o) {
void init(OptionsMap& o) {
- // at most 2^32 clusters.
- constexpr int MaxHashMB = Is64Bit ? 131072 : 2048;
+ // At most 2^32 superclusters. Supercluster = 8 kB
+ constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
o["Debug Log File"] << Option("", on_logger);
o["Contempt"] << Option(24, -100, 100);
o["Debug Log File"] << Option("", on_logger);
o["Contempt"] << Option(24, -100, 100);