From: Steinar H. Gunderson Date: Thu, 2 Dec 2021 19:01:42 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=17f8b7e18673fc7b542c6e40a896cd1fe1a25248;hp=d49757a79e76699a9c2894b03d39a31c47d42361 Merge remote-tracking branch 'upstream/master' --- diff --git a/AUTHORS b/AUTHORS index 5b5bbf22..35ccdaf5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -21,6 +21,7 @@ Alexander Kure Alexander Pagel (Lolligerhans) Alfredo Menezes (lonfom169) Ali AlZhrani (Cooffe) +Andrei Vetrov (proukornew) Andrew Grant (AndyGrant) Andrey Neporada (nepal) Andy Duplain @@ -143,6 +144,7 @@ Nikolay Kostov (NikolayIT) Nguyen Pham (nguyenpham) Norman Schmidt (FireFather) notruck +Ofek Shochat (OfekShochat, ghostway) Ondrej Mosnáček (WOnder93) Oskar Werkelin Ahlin Pablo Vazquez @@ -192,6 +194,7 @@ tttak Unai Corzo (unaiic) Uri Blass (uriblass) Vince Negri (cuddlestmonkey) +xefoci7612 zz4032 diff --git a/README.md b/README.md index 79db8170..330d19ed 100644 --- a/README.md +++ b/README.md @@ -175,8 +175,12 @@ on the evaluations of millions of positions at moderate search depth. The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. It can be evaluated efficiently on CPUs, and exploits the fact that only parts of the neural network need to be updated after a typical chess move. -[The nodchip repository](https://github.com/nodchip/Stockfish) provides additional -tools to train and develop the NNUE networks. On CPUs supporting modern vector instructions +[The nodchip repository](https://github.com/nodchip/Stockfish) provided the first version of +the needed tools to train and develop the NNUE networks. Today, more advanced training tools are available +in [the nnue-pytorch repository](https://github.com/glinscott/nnue-pytorch/), while data generation tools +are available in [a dedicated branch](https://github.com/official-stockfish/Stockfish/tree/tools). + +On CPUs supporting modern vector instructions (avx2 and similar), the NNUE evaluation results in much stronger playing strength, even if the nodes per second computed by the engine is somewhat lower (roughly 80% of nps is typical). diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index ab608409..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,88 +0,0 @@ -version: 1.0.{build} -clone_depth: 50 - -branches: - only: - - master - -# Operating system (build VM template) -os: Visual Studio 2019 - -# Build platform, i.e. x86, x64, AnyCPU. This setting is optional. -platform: - - x86 - - x64 - -# build Configuration, i.e. Debug, Release, etc. -configuration: - - Debug - - Release - -matrix: - # The build fail immediately once one of the job fails - fast_finish: true - -# Scripts that are called at very beginning, before repo cloning -init: - - cmake --version - - msbuild /version - -before_build: - - ps: | - # Get sources - $src = get-childitem -Path *.cpp -Recurse | select -ExpandProperty FullName - $src = $src -join ' ' - $src = $src.Replace("\", "/") - - # Build CMakeLists.txt - $t = 'cmake_minimum_required(VERSION 3.17)', - 'project(Stockfish)', - 'set(CMAKE_CXX_STANDARD 17)', - 'set(CMAKE_CXX_STANDARD_REQUIRED ON)', - 'set (CMAKE_CXX_EXTENSIONS OFF)', - 'set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/src)', - 'set(source_files', $src, ')', - 'add_executable(stockfish ${source_files})' - - # Write CMakeLists.txt withouth BOM - $MyPath = (Get-Item -Path "." -Verbose).FullName + '\CMakeLists.txt' - $Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False - [System.IO.File]::WriteAllLines($MyPath, $t, $Utf8NoBomEncoding) - - # Obtain bench reference from git log - $b = git log HEAD | sls "\b[Bb]ench[ :]+[0-9]{7}" | select -first 1 - $bench = $b -match '\D+(\d+)' | % { $matches[1] } - Write-Host "Reference bench:" $bench - $g = "Visual Studio 16 2019" - If (${env:PLATFORM} -eq 'x64') { $a = "x64" } - If (${env:PLATFORM} -eq 'x86') { $a = "Win32" } - cmake -G "${g}" -A ${a} . - Write-Host "Generated files for: " $g $a - -build_script: - - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - - ps: | - # Download default NNUE net from fishtest - $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" - $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" - $nnuenet = $Matches.nnuenet - Write-Host "Default net:" $nnuenet - $nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet" - $nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet" - if (Test-Path -Path $nnuefilepath) { - Write-Host "Already available." - } else { - Write-Host "Downloading $nnuedownloadurl to $nnuefilepath" - Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath - } - -before_test: - - cd src/%CONFIGURATION% - - stockfish bench 2> out.txt >NUL - - ps: | - # Verify bench number - $s = (gc "./out.txt" | out-string) - $r = ($s -match 'Nodes searched \D+(\d+)' | % { $matches[1] }) - Write-Host "Engine bench:" $r - Write-Host "Reference bench:" $bench - If ($r -ne $bench) { exit 1 } diff --git a/src/Makefile b/src/Makefile index cf4f4ecf..505fb2cf 100644 --- a/src/Makefile +++ b/src/Makefile @@ -91,7 +91,7 @@ endif # at the end of the line for flag values. # # Example of use for these flags: -# make build ARCH=x86-64-avx512 debug=on sanitize="address undefined" +# make build ARCH=x86-64-avx512 debug=yes sanitize="address undefined" ### 2.1. General and architecture defaults @@ -520,7 +520,7 @@ ifeq ($(bits),64) CXXFLAGS += -DIS_64BIT endif -### 3.5 prefetch +### 3.5 prefetch and popcount ifeq ($(prefetch),yes) ifeq ($(sse),yes) CXXFLAGS += -msse @@ -529,7 +529,6 @@ else CXXFLAGS += -DNO_PREFETCH endif -### 3.6 popcnt ifeq ($(popcnt),yes) ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT @@ -540,6 +539,7 @@ ifeq ($(popcnt),yes) endif endif +### 3.6 SIMD architectures ifeq ($(avx2),yes) CXXFLAGS += -DUSE_AVX2 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) @@ -754,7 +754,7 @@ profile-build: net config-sanity objclean profileclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) @echo "" @echo "Step 2/4. Running benchmark for pgo-build ..." - $(PGOBENCH) > /dev/null + $(PGOBENCH) 2>&1 | tail -n 4 @echo "" @echo "Step 3/4. Building optimized executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 62d4be84..1dc701cf 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -61,7 +61,7 @@ namespace Stockfish { namespace Eval { bool useNNUE; - string eval_file_loaded = "None"; + string currentEvalFileName = "None"; /// NNUE::init() tries to load a NNUE network at startup time, or when the engine /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" @@ -90,13 +90,13 @@ namespace Eval { #endif for (string directory : dirs) - if (eval_file_loaded != eval_file) + if (currentEvalFileName != eval_file) { if (directory != "") { ifstream stream(directory + eval_file, ios::binary); if (load_eval(eval_file, stream)) - eval_file_loaded = eval_file; + currentEvalFileName = eval_file; } if (directory == "" && eval_file == EvalFileDefaultName) @@ -111,7 +111,7 @@ namespace Eval { istream stream(&buffer); if (load_eval(eval_file, stream)) - eval_file_loaded = eval_file; + currentEvalFileName = eval_file; } } } @@ -123,7 +123,7 @@ namespace Eval { if (eval_file.empty()) eval_file = EvalFileDefaultName; - if (useNNUE && eval_file_loaded != eval_file) + if (useNNUE && currentEvalFileName != eval_file) { string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; @@ -988,7 +988,9 @@ namespace { // Early exit if score is high auto lazy_skip = [&](Value lazyThreshold) { - return abs(mg_value(score) + eg_value(score)) > lazyThreshold + pos.non_pawn_material() / 32; + return abs(mg_value(score) + eg_value(score)) > lazyThreshold + + std::abs(pos.this_thread()->bestValue) * 5 / 4 + + pos.non_pawn_material() / 32; }; if (lazy_skip(LazyThreshold1)) @@ -1053,26 +1055,22 @@ make_v: if ( pos.piece_on(SQ_A1) == W_BISHOP && pos.piece_on(SQ_B2) == W_PAWN) - correction += !pos.empty(SQ_B3) ? -CorneredBishop * 4 - : -CorneredBishop * 3; + correction -= CorneredBishop; if ( pos.piece_on(SQ_H1) == W_BISHOP && pos.piece_on(SQ_G2) == W_PAWN) - correction += !pos.empty(SQ_G3) ? -CorneredBishop * 4 - : -CorneredBishop * 3; + correction -= CorneredBishop; if ( pos.piece_on(SQ_A8) == B_BISHOP && pos.piece_on(SQ_B7) == B_PAWN) - correction += !pos.empty(SQ_B6) ? CorneredBishop * 4 - : CorneredBishop * 3; + correction += CorneredBishop; if ( pos.piece_on(SQ_H8) == B_BISHOP && pos.piece_on(SQ_G7) == B_PAWN) - correction += !pos.empty(SQ_G6) ? CorneredBishop * 4 - : CorneredBishop * 3; + correction += CorneredBishop; - return pos.side_to_move() == WHITE ? Value(correction) - : -Value(correction); + return pos.side_to_move() == WHITE ? Value(5 * correction) + : -Value(5 * correction); } } // namespace Eval @@ -1085,37 +1083,30 @@ Value Eval::evaluate(const Position& pos) { Value v; - if (!Eval::useNNUE) - v = Evaluation(pos).value(); + // Deciding between classical and NNUE eval: for high PSQ imbalance we use classical, + // but we switch to NNUE during long shuffling or with high material on the board. + + if ( !useNNUE + || abs(eg_value(pos.psq_score())) * 5 > (850 + pos.non_pawn_material() / 64) * (5 + pos.rule50_count())) + v = Evaluation(pos).value(); // classical else { - // Scale and shift NNUE for compatibility with search and classical evaluation - auto adjusted_NNUE = [&]() - { - int scale = 883 - + 32 * pos.count() - + 32 * pos.non_pawn_material() / 1024; - - Value nnue = NNUE::evaluate(pos, true) * scale / 1024; - - if (pos.is_chess960()) - nnue += fix_FRC(pos); + int scale = 1049 + + 8 * pos.count() + + 20 * pos.non_pawn_material() / 1024; - return nnue; - }; + Value nnue = NNUE::evaluate(pos, true); // NNUE + Color stm = pos.side_to_move(); + Value optimism = pos.this_thread()->optimism[stm]; - // If there is PSQ imbalance we use the classical eval, but we switch to - // NNUE eval faster when shuffling or if the material on the board is high. - int r50 = pos.rule50_count(); - Value psq = Value(abs(eg_value(pos.psq_score()))); - bool classical = psq * 5 > (850 + pos.non_pawn_material() / 64) * (5 + r50); + v = (nnue + optimism) * scale / 1024 - optimism; - v = classical ? Evaluation(pos).value() // classical - : adjusted_NNUE(); // NNUE + if (pos.is_chess960()) + v += fix_FRC(pos); } // Damp down the evaluation linearly when shuffling - v = v * (100 - pos.rule50_count()) / 100; + v = v * (207 - pos.rule50_count()) / 207; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); @@ -1140,7 +1131,11 @@ std::string Eval::trace(Position& pos) { std::memset(scores, 0, sizeof(scores)); - pos.this_thread()->trend = SCORE_ZERO; // Reset any dynamic contempt + // Reset any global variable used in eval + pos.this_thread()->trend = SCORE_ZERO; + pos.this_thread()->bestValue = VALUE_ZERO; + pos.this_thread()->optimism[WHITE] = VALUE_ZERO; + pos.this_thread()->optimism[BLACK] = VALUE_ZERO; v = Evaluation(pos).value(); diff --git a/src/evaluate.h b/src/evaluate.h index d7cc6e29..77c3b4e7 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -34,12 +34,12 @@ namespace Eval { Value evaluate(const Position& pos); extern bool useNNUE; - extern std::string eval_file_loaded; + extern std::string currentEvalFileName; // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-6762d36ad265.nnue" + #define EvalFileDefaultName "nn-4f56ecfca5b7.nnue" namespace NNUE { diff --git a/src/misc.cpp b/src/misc.cpp index 4cac7e98..769761d9 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -36,6 +36,8 @@ typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); +typedef WORD(*fun5_t)(); } #endif @@ -496,11 +498,11 @@ void bindThisThread(size_t) {} #else -/// best_group() retrieves logical processor information using Windows specific -/// API and returns the best group id for the thread with index idx. Original +/// best_node() retrieves logical processor information using Windows specific +/// API and returns the best node id for the thread with index idx. Original /// code from Texel by Peter Österlund. -int best_group(size_t idx) { +int best_node(size_t idx) { int threads = 0; int nodes = 0; @@ -514,7 +516,8 @@ int best_group(size_t idx) { if (!fun1) return -1; - // First call to get returnLength. We expect it to fail due to null buffer + // First call to GetLogicalProcessorInformationEx() to get returnLength. + // We expect the call to fail due to null buffer. if (fun1(RelationAll, nullptr, &returnLength)) return -1; @@ -522,7 +525,7 @@ int best_group(size_t idx) { SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); - // Second call, now we expect to succeed + // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed if (!fun1(RelationAll, buffer, &returnLength)) { free(buffer); @@ -572,22 +575,38 @@ int best_group(size_t idx) { void bindThisThread(size_t idx) { // Use only local variables to be thread-safe - int group = best_group(idx); + int node = best_node(idx); - if (group == -1) + if (node == -1) return; // Early exit if the needed API are not available at runtime HMODULE k32 = GetModuleHandle("Kernel32.dll"); auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); + auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); + auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount"); if (!fun2 || !fun3) return; - GROUP_AFFINITY affinity; - if (fun2(group, &affinity)) - fun3(GetCurrentThread(), &affinity, nullptr); + if (!fun4 || !fun5) + { + GROUP_AFFINITY affinity; + if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx + fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity + } + else + { + // If a numa node has more than one processor group, we assume they are + // sized equal and we spread threads evenly across the groups. + USHORT elements, returnedElements; + elements = fun5(); // GetMaximumProcessorGroupCount + GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY)); + if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 + fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity + free(affinity); + } } #endif diff --git a/src/misc.h b/src/misc.h index dae37cd9..c1744130 100644 --- a/src/misc.h +++ b/src/misc.h @@ -85,19 +85,30 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; static inline const bool IsLittleEndian = (Le.c[0] == 4); -template -class ValueListInserter { -public: - ValueListInserter(T* v, std::size_t& s) : - values(v), - size(&s) - { - } - - void push_back(const T& value) { values[(*size)++] = value; } -private: - T* values; - std::size_t* size; +// RunningAverage : a class to calculate a running average of a series of values. +// For efficiency, all computations are done with integers. +class RunningAverage { + public: + + // Constructor + RunningAverage() {} + + // Reset the running average to rational value p / q + void set(int64_t p, int64_t q) + { average = p * PERIOD * RESOLUTION / q; } + + // Update average with value v + void update(int64_t v) + { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; } + + // Test if average is strictly greater than rational a / b + bool is_greater(int64_t a, int64_t b) + { return b * average > a * PERIOD * RESOLUTION ; } + + private : + static constexpr int64_t PERIOD = 4096; + static constexpr int64_t RESOLUTION = 1024; + int64_t average; }; template @@ -113,7 +124,6 @@ public: const T& operator[](std::size_t index) const { return values_[index]; } const T* begin() const { return values_; } const T* end() const { return values_ + size_; } - operator ValueListInserter() { return ValueListInserter(values_, size_); } void swap(ValueList& other) { const std::size_t maxSize = std::max(size_, other.size_); @@ -128,6 +138,33 @@ private: std::size_t size_ = 0; }; + +/// sigmoid(t, x0, y0, C, P, Q) implements a sigmoid-like function using only integers, +/// with the following properties: +/// +/// - sigmoid is centered in (x0, y0) +/// - sigmoid has amplitude [-P/Q , P/Q] instead of [-1 , +1] +/// - limit is (y0 - P/Q) when t tends to -infinity +/// - limit is (y0 + P/Q) when t tends to +infinity +/// - the slope can be adjusted using C > 0, smaller C giving a steeper sigmoid +/// - the slope of the sigmoid when t = x0 is P/(Q*C) +/// - sigmoid is increasing with t when P > 0 and Q > 0 +/// - to get a decreasing sigmoid, call with -t, or change sign of P +/// - mean value of the sigmoid is y0 +/// +/// Use to draw the sigmoid + +inline int64_t sigmoid(int64_t t, int64_t x0, + int64_t y0, + int64_t C, + int64_t P, + int64_t Q) +{ + assert(C > 0); + return y0 + P * (t-x0) / (Q * (std::abs(t-x0) + C)) ; +} + + /// xorshift64star Pseudo-Random Number Generator /// This class is based on original code written and dedicated /// to the public domain by Sebastiano Vigna (2014). diff --git a/src/movepick.h b/src/movepick.h index c76d4957..7d78886f 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -86,11 +86,11 @@ enum StatsType { NoCaptures, Captures }; /// unsuccessful during the current search, and is used for reduction and move /// ordering decisions. It uses 2 tables (one for each color) indexed by /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards -typedef Stats ButterflyHistory; +typedef Stats ButterflyHistory; /// At higher depths LowPlyHistory records successful quiet moves near the root -/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new -/// search and filled during iterative deepening. +/// and quiet moves which are/were in the PV (ttPv). LowPlyHistory is populated during +/// iterative deepening and at each new search the data is shifted down by 2 plies constexpr int MAX_LPH = 4; typedef Stats LowPlyHistory; diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 891f8faa..bd473294 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -143,6 +143,7 @@ namespace Stockfish::Eval::NNUE { // overaligning stack variables with alignas() doesn't work correctly. constexpr uint64_t alignment = CacheLineSize; + int delta = 7; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType transformedFeaturesUnaligned[ @@ -162,20 +163,14 @@ namespace Stockfish::Eval::NNUE { const std::size_t bucket = (pos.count() - 1) / 4; const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto output = network[bucket]->propagate(transformedFeatures, buffer); + const auto positional = network[bucket]->propagate(transformedFeatures, buffer)[0]; - int materialist = psqt; - int positional = output[0]; - - int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK)); - int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0); - - int A = 128 - entertainment; - int B = 128 + entertainment; - - int sum = (A * materialist + B * positional) / 128; - - return static_cast( sum / OutputScale ); + // Give more value to positional evaluation when material is balanced + if ( adjusted + && abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK)) <= RookValueMg - BishopValueMg) + return static_cast(((128 - delta) * psqt + (128 + delta) * positional) / 128 / OutputScale); + else + return static_cast((psqt + positional) / OutputScale); } struct NnueEvalTrace { @@ -239,7 +234,7 @@ namespace Stockfish::Eval::NNUE { { buffer[1] = '0' + cp / 10000; cp %= 10000; buffer[2] = '0' + cp / 1000; cp %= 1000; - buffer[3] = '0' + cp / 100; cp %= 100; + buffer[3] = '0' + cp / 100; buffer[4] = ' '; } else if (cp >= 1000) @@ -396,7 +391,7 @@ namespace Stockfish::Eval::NNUE { actualFilename = filename.value(); else { - if (eval_file_loaded != EvalFileDefaultName) + if (currentEvalFileName != EvalFileDefaultName) { msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified"; diff --git a/src/nnue/features/half_ka_v2_hm.cpp b/src/nnue/features/half_ka_v2_hm.cpp index 098a6d60..6face217 100644 --- a/src/nnue/features/half_ka_v2_hm.cpp +++ b/src/nnue/features/half_ka_v2_hm.cpp @@ -39,7 +39,7 @@ namespace Stockfish::Eval::NNUE::Features { void HalfKAv2_hm::append_active_indices( const Position& pos, Color perspective, - ValueListInserter active + IndexList& active ) { Square ksq = pos.square(perspective); Bitboard bb = pos.pieces(); @@ -55,22 +55,20 @@ namespace Stockfish::Eval::NNUE::Features { void HalfKAv2_hm::append_changed_indices( Square ksq, - StateInfo* st, + const DirtyPiece& dp, Color perspective, - ValueListInserter removed, - ValueListInserter added + IndexList& removed, + IndexList& added ) { - const auto& dp = st->dirtyPiece; for (int i = 0; i < dp.dirty_num; ++i) { - Piece pc = dp.piece[i]; if (dp.from[i] != SQ_NONE) - removed.push_back(make_index(perspective, dp.from[i], pc, ksq)); + removed.push_back(make_index(perspective, dp.from[i], dp.piece[i], ksq)); if (dp.to[i] != SQ_NONE) - added.push_back(make_index(perspective, dp.to[i], pc, ksq)); + added.push_back(make_index(perspective, dp.to[i], dp.piece[i], ksq)); } } - int HalfKAv2_hm::update_cost(StateInfo* st) { + int HalfKAv2_hm::update_cost(const StateInfo* st) { return st->dirtyPiece.dirty_num; } @@ -78,7 +76,7 @@ namespace Stockfish::Eval::NNUE::Features { return pos.count(); } - bool HalfKAv2_hm::requires_refresh(StateInfo* st, Color perspective) { + bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) { return st->dirtyPiece.piece[0] == make_piece(perspective, KING); } diff --git a/src/nnue/features/half_ka_v2_hm.h b/src/nnue/features/half_ka_v2_hm.h index 2c1144f6..c7b1a68d 100644 --- a/src/nnue/features/half_ka_v2_hm.h +++ b/src/nnue/features/half_ka_v2_hm.h @@ -50,7 +50,7 @@ namespace Stockfish::Eval::NNUE::Features { PS_W_QUEEN = 8 * SQUARE_NB, PS_B_QUEEN = 9 * SQUARE_NB, PS_KING = 10 * SQUARE_NB, - PS_NB = 11 * SQUARE_NB + PS_NB = 11 * SQUARE_NB }; static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { @@ -85,36 +85,38 @@ namespace Stockfish::Eval::NNUE::Features { -1, -1, -1, -1, 23, 22, 21, 20, -1, -1, -1, -1, 19, 18, 17, 16, -1, -1, -1, -1, 15, 14, 13, 12, - -1, -1, -1, -1, 11, 10, 9, 8, - -1, -1, -1, -1, 7, 6, 5, 4, - -1, -1, -1, -1, 3, 2, 1, 0 + -1, -1, -1, -1, 11, 10, 9, 8, + -1, -1, -1, -1, 7, 6, 5, 4, + -1, -1, -1, -1, 3, 2, 1, 0 }; // Maximum number of simultaneously active features. static constexpr IndexType MaxActiveDimensions = 32; + using IndexList = ValueList; // Get a list of indices for active features static void append_active_indices( const Position& pos, Color perspective, - ValueListInserter active); + IndexList& active); // Get a list of indices for recently changed features static void append_changed_indices( Square ksq, - StateInfo* st, + const DirtyPiece& dp, Color perspective, - ValueListInserter removed, - ValueListInserter added); + IndexList& removed, + IndexList& added + ); // Returns the cost of updating one perspective, the most costly one. // Assumes no refresh needed. - static int update_cost(StateInfo* st); + static int update_cost(const StateInfo* st); static int refresh_cost(const Position& pos); // Returns whether the change stored in this StateInfo means that // a full accumulator refresh is required. - static bool requires_refresh(StateInfo* st, Color perspective); + static bool requires_refresh(const StateInfo* st, Color perspective); }; } // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 75ac7862..74eaae17 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE { // write_little_endian() is our utility to write an integer (signed or unsigned, any size) // to a stream in little-endian order. We swap the byte order before the write if - // necessary to always write in little endian order, independantly of the byte + // necessary to always write in little endian order, independently of the byte // ordering of the compiling machine. template inline void write_little_endian(std::ostream& stream, IntType value) { diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 59a965ac..0297b323 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -370,7 +370,6 @@ namespace Stockfish::Eval::NNUE { // That might depend on the feature set and generally relies on the // feature set's update cost calculation to be correct and never // allow updates with more added/removed features than MaxActiveDimensions. - using IndexList = ValueList; #ifdef VECTOR // Gcc-10.2 unnecessarily spills AVX2 registers if this array @@ -404,12 +403,12 @@ namespace Stockfish::Eval::NNUE { // Gather all features to be updated. const Square ksq = pos.square(perspective); - IndexList removed[2], added[2]; + FeatureSet::IndexList removed[2], added[2]; FeatureSet::append_changed_indices( - ksq, next, perspective, removed[0], added[0]); + ksq, next->dirtyPiece, perspective, removed[0], added[0]); for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) FeatureSet::append_changed_indices( - ksq, st2, perspective, removed[1], added[1]); + ksq, st2->dirtyPiece, perspective, removed[1], added[1]); // Mark the accumulators as computed. next->accumulator.computed[perspective] = true; @@ -534,7 +533,7 @@ namespace Stockfish::Eval::NNUE { // Refresh the accumulator auto& accumulator = pos.state()->accumulator; accumulator.computed[perspective] = true; - IndexList active; + FeatureSet::IndexList active; FeatureSet::append_active_indices(pos, perspective, active); #ifdef VECTOR diff --git a/src/position.cpp b/src/position.cpp index 97581e12..a4dab37d 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -1011,9 +1011,9 @@ void Position::do_null_move(StateInfo& newSt) { } st->key ^= Zobrist::side; + ++st->rule50; prefetch(TT.first_entry(key())); - ++st->rule50; st->pliesFromNull = 0; sideToMove = ~sideToMove; diff --git a/src/search.cpp b/src/search.cpp index 0c05713c..c926c1d2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -61,9 +61,6 @@ namespace { // Different node types, used as a template parameter enum NodeType { NonPV, PV, Root }; - constexpr uint64_t TtHitAverageWindow = 4096; - constexpr uint64_t TtHitAverageResolution = 1024; - // Futility margin Value futility_margin(Depth d, bool improving) { return Value(214 * (d - improving)); @@ -72,9 +69,9 @@ namespace { // Reductions lookup table, initialized at startup int Reductions[MAX_MOVES]; // [depth or moveNumber] - Depth reduction(bool i, Depth d, int mn) { + Depth reduction(bool i, Depth d, int mn, bool rangeReduction) { int r = Reductions[d] * Reductions[mn]; - return (r + 534) / 1024 + (!i && r > 904); + return (r + 534) / 1024 + (!i && r > 904) + rangeReduction; } constexpr int futility_move_count(bool improving, Depth depth) { @@ -83,7 +80,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return d > 14 ? 73 : 6 * d * d + 229 * d - 215; + return std::min((6 * d + 229) * d - 215 , 2000); } // Add a small random component to draw evaluations to avoid 3-fold blindness @@ -91,14 +88,46 @@ namespace { return VALUE_DRAW + Value(2 * (thisThread->nodes & 1) - 1); } - // Skill structure is used to implement strength limit + // Check if the current thread is in a search explosion + ExplosionState search_explosion(Thread* thisThread) { + + uint64_t nodesNow = thisThread->nodes; + bool explosive = thisThread->doubleExtensionAverage[WHITE].is_greater(2, 100) + || thisThread->doubleExtensionAverage[BLACK].is_greater(2, 100); + + if (explosive) + thisThread->nodesLastExplosive = nodesNow; + else + thisThread->nodesLastNormal = nodesNow; + + if ( explosive + && thisThread->state == EXPLOSION_NONE + && nodesNow - thisThread->nodesLastNormal > 6000000) + thisThread->state = MUST_CALM_DOWN; + + if ( thisThread->state == MUST_CALM_DOWN + && nodesNow - thisThread->nodesLastExplosive > 6000000) + thisThread->state = EXPLOSION_NONE; + + return thisThread->state; + } + + // Skill structure is used to implement strength limit. If we have an uci_elo then + // we convert it to a suitable fractional skill level using anchoring to CCRL Elo + // (goldfish 1.13 = 2000) and a fit through Ordo derived Elo for match (TC 60+0.6) + // results spanning a wide range of k values. struct Skill { - explicit Skill(int l) : level(l) {} - bool enabled() const { return level < 20; } - bool time_to_pick(Depth depth) const { return depth == 1 + level; } + Skill(int skill_level, int uci_elo) { + if (uci_elo) + level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0); + else + level = double(skill_level); + } + bool enabled() const { return level < 20.0; } + bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } Move pick_best(size_t multiPV); - int level; + double level; Move best = MOVE_NONE; }; @@ -152,7 +181,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int(21.9 * std::log(i)); + Reductions[i] = int((21.9 + std::log(Threads.size()) / 2) * std::log(i)); } @@ -222,10 +251,11 @@ void MainThread::search() { Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); Thread* bestThread = this; + Skill skill = Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); if ( int(Options["MultiPV"]) == 1 && !Limits.depth - && !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"])) + && !skill.enabled() && rootMoves[0].pv[0] != MOVE_NONE) bestThread = Threads.get_best_thread(); @@ -256,7 +286,7 @@ void Thread::search() { // The latter is needed for statScore and killer initialization. Stack stack[MAX_PLY+10], *ss = stack+7; Move pv[MAX_PLY+1]; - Value bestValue, alpha, beta, delta; + Value alpha, beta, delta; Move lastBestMove = MOVE_NONE; Depth lastBestMoveDepth = 0; MainThread* mainThread = (this == Threads.main() ? Threads.main() : nullptr); @@ -290,19 +320,7 @@ void Thread::search() { std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0); size_t multiPV = size_t(Options["MultiPV"]); - - // Pick integer skill levels, but non-deterministically round up or down - // such that the average integer skill corresponds to the input floating point one. - // UCI_Elo is converted to a suitable fractional skill level, using anchoring - // to CCRL Elo (goldfish 1.13 = 2000) and a fit through Ordo derived Elo - // for match (TC 60+0.6) results spanning a wide range of k values. - PRNG rng(now()); - double floatLevel = Options["UCI_LimitStrength"] ? - std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : - double(Options["Skill Level"]); - int intLevel = int(floatLevel) + - ((floatLevel - int(floatLevel)) * 1024 > rng.rand() % 1024 ? 1 : 0); - Skill skill(intLevel); + Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); // When playing with strength handicap enable MultiPV search that we will // use behind the scenes to retrieve a set of possible moves. @@ -310,9 +328,16 @@ void Thread::search() { multiPV = std::max(multiPV, (size_t)4); multiPV = std::min(multiPV, rootMoves.size()); - ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2; - trend = SCORE_ZERO; + doubleExtensionAverage[WHITE].set(0, 100); // initialize the running average at 0% + doubleExtensionAverage[BLACK].set(0, 100); // initialize the running average at 0% + + nodesLastExplosive = nodes; + nodesLastNormal = nodes; + state = EXPLOSION_NONE; + trend = SCORE_ZERO; + optimism[ us] = Value(25); + optimism[~us] = -optimism[us]; int searchAgainCounter = 0; @@ -353,16 +378,19 @@ void Thread::search() { // Reset aspiration window starting size if (rootDepth >= 4) { - Value prev = rootMoves[pvIdx].previousScore; - delta = Value(17); + Value prev = rootMoves[pvIdx].averageScore; + delta = Value(17) + int(prev) * prev / 16384; alpha = std::max(prev - delta,-VALUE_INFINITE); beta = std::min(prev + delta, VALUE_INFINITE); - // Adjust trend based on root move's previousScore (dynamic contempt) - int tr = 113 * prev / (abs(prev) + 147); - + // Adjust trend and optimism based on root move's previousScore + int tr = sigmoid(prev, 0, 0, 147, 113, 1); trend = (us == WHITE ? make_score(tr, tr / 2) : -make_score(tr, tr / 2)); + + int opt = sigmoid(prev, 0, 25, 147, 14464, 256); + optimism[ us] = Value(opt); + optimism[~us] = -optimism[us]; } // Start with a small aspiration window and, in the case of a fail @@ -449,6 +477,13 @@ void Thread::search() { if (skill.enabled() && skill.time_to_pick(rootDepth)) skill.pick_best(multiPV); + // Use part of the gained time from a previous stable move for the current move + for (Thread* th : Threads) + { + totBestMoveChanges += th->bestMoveChanges; + th->bestMoveChanges = 0; + } + // Do we have time for the next iteration? Can we stop searching now? if ( Limits.use_time_management() && !Threads.stop @@ -461,13 +496,6 @@ void Thread::search() { // If the bestMove is stable over several iterations, reduce time accordingly timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95; double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction); - - // Use part of the gained time from a previous stable move for the current move - for (Thread* th : Threads) - { - totBestMoveChanges += th->bestMoveChanges; - th->bestMoveChanges = 0; - } double bestMoveInstability = 1.073 + std::max(1.0, 2.25 - 9.9 / rootDepth) * totBestMoveChanges / Threads.size(); double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability; @@ -518,6 +546,14 @@ namespace { template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { + Thread* thisThread = pos.this_thread(); + + // Step 0. Limit search explosion + if ( ss->ply > 10 + && search_explosion(thisThread) == MUST_CALM_DOWN + && depth > (ss-1)->depth) + depth = (ss-1)->depth; + constexpr bool PvNode = nodeType != NonPV; constexpr bool rootNode = nodeType == Root; const Depth maxNextDepth = rootNode ? depth : depth + 1; @@ -556,14 +592,13 @@ namespace { bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; - int moveCount, captureCount, quietCount; + int moveCount, captureCount, quietCount, bestMoveCount, improvement; // Step 1. Initialize node - Thread* thisThread = pos.this_thread(); ss->inCheck = pos.checkers(); priorCapture = pos.captured_piece(); Color us = pos.side_to_move(); - moveCount = captureCount = quietCount = ss->moveCount = 0; + moveCount = bestMoveCount = captureCount = quietCount = ss->moveCount = 0; bestValue = -VALUE_INFINITE; maxValue = VALUE_INFINITE; @@ -595,6 +630,8 @@ namespace { if (alpha >= beta) return alpha; } + else + thisThread->rootDelta = beta - alpha; assert(0 <= ss->ply && ss->ply < MAX_PLY); @@ -602,8 +639,12 @@ namespace { (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; ss->doubleExtensions = (ss-1)->doubleExtensions; + ss->depth = depth; Square prevSq = to_sq((ss-1)->currentMove); + // Update the running average statistics for double extensions + thisThread->doubleExtensionAverage[us].update(ss->depth > (ss-1)->depth); + // Initialize statScore to zero for the grandchildren of the current position. // So statScore is shared between all grandchildren and only the first grandchild // starts with statScore = 0. Later grandchildren start with the last calculated @@ -621,6 +662,7 @@ namespace { ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] : ss->ttHit ? tte->move() : MOVE_NONE; + ttCapture = ttMove && pos.capture_or_promotion(ttMove); if (!excludedMove) ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); @@ -632,14 +674,10 @@ namespace { && is_ok((ss-1)->currentMove)) thisThread->lowPlyHistory[ss->ply - 1][from_to((ss-1)->currentMove)] << stat_bonus(depth - 5); - // thisThread->ttHitAverage can be used to approximate the running average of ttHit - thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow - + TtHitAverageResolution * ss->ttHit; - // At non-PV nodes we check for an early TT cutoff if ( !PvNode && ss->ttHit - && tte->depth() >= depth + && tte->depth() > depth - (thisThread->id() % 2 == 1) && ttValue != VALUE_NONE // Possible in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) : (tte->bound() & BOUND_UPPER))) @@ -650,7 +688,7 @@ namespace { if (ttValue >= beta) { // Bonus for a quiet ttMove that fails high - if (!pos.capture_or_promotion(ttMove)) + if (!ttCapture) update_quiet_stats(pos, ss, ttMove, stat_bonus(depth), depth); // Extra penalty for early quiet moves of the previous ply @@ -658,7 +696,7 @@ namespace { update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1)); } // Penalty for a quiet ttMove that fails low - else if (!pos.capture_or_promotion(ttMove)) + else if (!ttCapture) { int penalty = -stat_bonus(depth); thisThread->mainHistory[us][from_to(ttMove)] << penalty; @@ -732,6 +770,7 @@ namespace { // Skip early pruning when in check ss->staticEval = eval = VALUE_NONE; improving = false; + improvement = 0; goto moves_loop; } else if (ss->ttHit) @@ -752,39 +791,36 @@ namespace { } else { - // In case of null move search use previous static eval with a different sign - // and addition of two tempos - if ((ss-1)->currentMove != MOVE_NULL) - ss->staticEval = eval = evaluate(pos); - else - ss->staticEval = eval = -(ss-1)->staticEval; + ss->staticEval = eval = evaluate(pos); // Save static evaluation into transposition table - if(!excludedMove) - tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + if (!excludedMove) + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } // Use static evaluation difference to improve quiet move ordering if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) { - int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval), -1000, 1000); + int bonus = std::clamp(-16 * int((ss-1)->staticEval + ss->staticEval), -2000, 2000); thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; } - // Set up improving flag that is used in various pruning heuristics - // We define position as improving if static evaluation of position is better - // Than the previous static evaluation at our turn - // In case of us being in check at our previous move we look at move prior to it - improving = (ss-2)->staticEval == VALUE_NONE - ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE - : ss->staticEval > (ss-2)->staticEval; + // Set up the improvement variable, which is the difference between the current + // static evaluation and the previous static evaluation at our turn (if we were + // in check at our previous move we look at the move prior to it). The improvement + // margin and the improving flag are used in various pruning heuristics. + improvement = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval + : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval + : 200; + + improving = improvement > 0; // Step 7. Futility pruning: child node (~50 Elo). // The depth condition is important for mate finding. - if ( !PvNode + if ( !ss->ttPv && depth < 9 && eval - futility_margin(depth, improving) >= beta - && eval < VALUE_KNOWN_WIN) // Do not return unproven wins + && eval < 15000) // 50% larger than VALUE_KNOWN_WIN, but smaller than TB wins. return eval; // Step 8. Null move search with verification search (~40 Elo) @@ -793,7 +829,7 @@ namespace { && (ss-1)->statScore < 23767 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 20 * depth - 22 * improving + 168 * ss->ttPv + 177 + && ss->staticEval >= beta - 20 * depth - improvement / 15 + 204 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -857,19 +893,16 @@ namespace { assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); - int probCutCount = 0; bool ttPv = ss->ttPv; ss->ttPv = false; - while ( (move = mp.next_move()) != MOVE_NONE - && probCutCount < 2 + 2 * cutNode) + while ((move = mp.next_move()) != MOVE_NONE) if (move != excludedMove && pos.legal(move)) { assert(pos.capture_or_promotion(move)); assert(depth >= 5); captureOrPromotion = true; - probCutCount++; ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] @@ -916,7 +949,7 @@ namespace { moves_loop: // When in check, search starts here - ttCapture = ttMove && pos.capture_or_promotion(ttMove); + int rangeReduction = 0; // Step 11. A small Probcut idea, when we are in check probCutBeta = beta + 409; @@ -949,7 +982,6 @@ moves_loop: // When in check, search starts here value = bestValue; singularQuietLMR = moveCountPruning = false; - bool doubleExtension = false; // Indicate PvNodes that will probably fail low if the node was searched // at a depth equal or greater than the current depth, and the result of this search was a fail low. @@ -1005,7 +1037,7 @@ moves_loop: // When in check, search starts here moveCountPruning = moveCount >= futility_move_count(improving, depth); // Reduced depth of the next LMR search - int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount), 0); + int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount, rangeReduction > 2), 0); if ( captureOrPromotion || givesCheck) @@ -1022,17 +1054,21 @@ moves_loop: // When in check, search starts here } else { + int history = (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)]; + // Continuation history based pruning (~20 Elo) - if (lmrDepth < 5 - && (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] < -3000 * depth + 3000) + if ( lmrDepth < 5 + && history < -3000 * depth + 3000) continue; + history += thisThread->mainHistory[us][from_to(move)]; + // Futility pruning: parent node (~5 Elo) if ( !ss->inCheck - && lmrDepth < 7 - && ss->staticEval + 172 + 157 * lmrDepth <= alpha) + && lmrDepth < 8 + && ss->staticEval + 142 + 139 * lmrDepth + history / 64 <= alpha) continue; // Prune moves with negative SEE (~20 Elo) @@ -1057,7 +1093,7 @@ moves_loop: // When in check, search starts here && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - 2 * depth; + Value singularBeta = ttValue - 3 * depth; Depth singularDepth = (depth - 1) / 2; ss->excludedMove = move; @@ -1069,14 +1105,11 @@ moves_loop: // When in check, search starts here extension = 1; singularQuietLMR = !ttCapture; - // Avoid search explosion by limiting the number of double extensions to at most 3 + // Avoid search explosion by limiting the number of double extensions if ( !PvNode - && value < singularBeta - 93 - && ss->doubleExtensions < 3) - { + && value < singularBeta - 75 + && ss->doubleExtensions <= 6) extension = 2; - doubleExtension = true; - } } // Multi-cut pruning @@ -1087,17 +1120,9 @@ moves_loop: // When in check, search starts here else if (singularBeta >= beta) return singularBeta; - // If the eval of ttMove is greater than beta we try also if there is another - // move that pushes it over beta, if so also produce a cutoff. + // If the eval of ttMove is greater than beta, we reduce it (negative extension) else if (ttValue >= beta) - { - ss->excludedMove = move; - value = search(pos, ss, beta - 1, beta, (depth + 3) / 2, cutNode); - ss->excludedMove = MOVE_NONE; - - if (value >= beta) - return beta; - } + extension = -2; } // Capture extensions for PvNodes and cutNodes @@ -1109,7 +1134,14 @@ moves_loop: // When in check, search starts here // Check extensions else if ( givesCheck && depth > 6 - && abs(ss->staticEval) > Value(100)) + && abs(ss->staticEval) > 100) + extension = 1; + + // Quiet ttMove extensions + else if ( PvNode + && move == ttMove + && move == ss->killers[0] + && (*contHist[0])[movedPiece][to_sq(move)] >= 10000) extension = 1; // Add extension to new depth @@ -1135,18 +1167,16 @@ moves_loop: // When in check, search starts here // cases where we extend a son if it has good chances to be "interesting". if ( depth >= 3 && moveCount > 1 + 2 * rootNode - && ( !captureOrPromotion - || (cutNode && (ss-1)->moveCount > 1) - || !ss->ttPv) - && (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3)) + && ( !ss->ttPv + || !captureOrPromotion + || (cutNode && (ss-1)->moveCount > 1))) { - Depth r = reduction(improving, depth, moveCount); - - if (PvNode) - r--; + Depth r = reduction(improving, depth, moveCount, rangeReduction > 2); - // Decrease reduction if the ttHit running average is large (~0 Elo) - if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024) + // Decrease reduction at some PvNodes (~2 Elo) + if ( PvNode + && bestMoveCount <= 3 + && beta - alpha >= thisThread->rootDelta / 4) r--; // Decrease reduction if position is or has been on the PV @@ -1155,8 +1185,8 @@ moves_loop: // When in check, search starts here && !likelyFailLow) r -= 2; - // Increase reduction at root and non-PV nodes when the best move does not change frequently - if ( (rootNode || !PvNode) + // Increase reduction at non-PV nodes when the best move does not change frequently + if ( !PvNode && thisThread->bestMoveChanges <= 2) r++; @@ -1185,13 +1215,23 @@ moves_loop: // When in check, search starts here // Decrease/increase reduction for moves with a good/bad history (~30 Elo) r -= ss->statScore / 14721; - // In general we want to cap the LMR depth search at newDepth. But if - // reductions are really negative and movecount is low, we allow this move - // to be searched deeper than the first move in specific cases. - Depth d = std::clamp(newDepth - r, 1, newDepth + (r < -1 && (moveCount <= 5 || (depth > 6 && PvNode)) && !doubleExtension)); + // In general we want to cap the LMR depth search at newDepth. But if reductions + // are really negative and movecount is low, we allow this move to be searched + // deeper than the first move (this may lead to hidden double extensions). + int deeper = r >= -1 ? 0 + : moveCount <= 5 ? 2 + : PvNode && depth > 6 ? 1 + : cutNode && moveCount <= 7 ? 1 + : 0; + + Depth d = std::clamp(newDepth - r, 1, newDepth + deeper); value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); + // Range reductions (~3 Elo) + if (ss->staticEval - value < 30 && depth > 7) + rangeReduction++; + // If the son is reduced and fails high it will be re-searched at full depth doFullDepthSearch = value > alpha && d < newDepth; didLMR = true; @@ -1246,6 +1286,8 @@ moves_loop: // When in check, search starts here RootMove& rm = *std::find(thisThread->rootMoves.begin(), thisThread->rootMoves.end(), move); + rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; + // PV move or new best move? if (moveCount == 1 || value > alpha) { @@ -1258,9 +1300,11 @@ moves_loop: // When in check, search starts here for (Move* m = (ss+1)->pv; *m != MOVE_NONE; ++m) rm.pv.push_back(*m); - // We record how often the best move has been changed in each - // iteration. This information is used for time management and LMR - if (moveCount > 1) + // We record how often the best move has been changed in each iteration. + // This information is used for time management and LMR. In MultiPV mode, + // we must take care to only do this for the first PV line. + if ( moveCount > 1 + && !thisThread->pvIdx) ++thisThread->bestMoveChanges; } else @@ -1282,7 +1326,10 @@ moves_loop: // When in check, search starts here update_pv(ss->pv, move, (ss+1)->pv); if (PvNode && value < beta) // Update alpha! Always alpha < beta + { alpha = value; + bestMoveCount++; + } else { assert(value >= beta); // Fail high @@ -1377,13 +1424,12 @@ moves_loop: // When in check, search starts here Key posKey; Move ttMove, move, bestMove; Depth ttDepth; - Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha; + Value bestValue, value, ttValue, futilityValue, futilityBase; bool pvHit, givesCheck, captureOrPromotion; int moveCount; if (PvNode) { - oldAlpha = alpha; // To flag BOUND_EXACT when eval above alpha and no available moves (ss+1)->pv = pv; ss->pv[0] = MOVE_NONE; } @@ -1441,7 +1487,6 @@ moves_loop: // When in check, search starts here } else // In case of null move search use previous static eval with a different sign - // and addition of two tempos ss->staticEval = bestValue = (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) : -(ss-1)->staticEval; @@ -1574,8 +1619,7 @@ moves_loop: // When in check, search starts here // Save gathered info in transposition table tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit, - bestValue >= beta ? BOUND_LOWER : - PvNode && bestValue > oldAlpha ? BOUND_EXACT : BOUND_UPPER, + bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, ttDepth, bestMove, ss->staticEval); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); @@ -1651,8 +1695,8 @@ moves_loop: // When in check, search starts here PieceType captured = type_of(pos.piece_on(to_sq(bestMove))); bonus1 = stat_bonus(depth + 1); - bonus2 = bestValue > beta + PawnValueMg ? bonus1 // larger bonus - : std::min(bonus1, stat_bonus(depth)); // smaller bonus + bonus2 = bestValue > beta + PawnValueMg ? bonus1 // larger bonus + : stat_bonus(depth); // smaller bonus if (!pos.capture_or_promotion(bestMove)) { @@ -1718,10 +1762,6 @@ moves_loop: // When in check, search starts here thisThread->mainHistory[us][from_to(move)] << bonus; update_continuation_histories(ss, pos.moved_piece(move), to_sq(move), bonus); - // Penalty for reversed move in case of moved piece not being a pawn - if (type_of(pos.moved_piece(move)) != PAWN) - thisThread->mainHistory[us][from_to(reverse_move(move))] << -bonus; - // Update countermove history if (is_ok((ss-1)->currentMove)) { @@ -1745,8 +1785,8 @@ moves_loop: // When in check, search starts here // RootMoves are already sorted by score in descending order Value topScore = rootMoves[0].score; int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg); - int weakness = 120 - 2 * level; int maxScore = -VALUE_INFINITE; + double weakness = 120 - 2 * level; // Choose best move. For each move score we add two terms, both dependent on // weakness. One is deterministic and bigger for weaker levels, and one is @@ -1754,8 +1794,8 @@ moves_loop: // When in check, search starts here for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula - int push = ( weakness * int(topScore - rootMoves[i].score) - + delta * (rng.rand() % weakness)) / 128; + int push = int(( weakness * int(topScore - rootMoves[i].score) + + delta * (rng.rand() % int(weakness))) / 128); if (rootMoves[i].score + push >= maxScore) { diff --git a/src/search.h b/src/search.h index 801baacc..7a5d5bdf 100644 --- a/src/search.h +++ b/src/search.h @@ -47,6 +47,7 @@ struct Stack { Move excludedMove; Move killers[2]; Value staticEval; + Depth depth; int statScore; int moveCount; bool inCheck; @@ -72,6 +73,7 @@ struct RootMove { Value score = -VALUE_INFINITE; Value previousScore = -VALUE_INFINITE; + Value averageScore = -VALUE_INFINITE; int selDepth = 0; int tbRank = 0; Value tbScore; diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 96b2970f..41e867c0 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -769,7 +769,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu goto encode_remaining; // With pawns we have finished special treatments } - // In positions withouth pawns, we further flip the squares to ensure leading + // In positions without pawns, we further flip the squares to ensure leading // piece is below RANK_5. if (rank_of(squares[0]) > RANK_4) for (int i = 0; i < size; ++i) @@ -812,7 +812,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be // swapped and still get the same position.) // - // In case we have at least 3 unique pieces (inlcuded kings) we encode them + // In case we have at least 3 unique pieces (included kings) we encode them // together. if (entry->hasUniquePieces) { @@ -827,7 +827,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu + (squares[1] - adjust1)) * 62 + squares[2] - adjust2; - // First piece is on a1-h8 diagonal, second below: map this occurence to + // First piece is on a1-h8 diagonal, second below: map this occurrence to // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27. else if (off_A1H8(squares[1])) @@ -857,7 +857,7 @@ encode_remaining: idx *= d->groupIdx[0]; Square* groupSq = squares + d->groupLen[0]; - // Encode remainig pawns then pieces according to square, in ascending order + // Encode remaining pawns then pieces according to square, in ascending order bool remainingPawns = entry->hasPawns && entry->pawnCount[1]; while (d->groupLen[++next]) @@ -885,7 +885,7 @@ encode_remaining: // Group together pieces that will be encoded together. The general rule is that // a group contains pieces of same type and color. The exception is the leading -// group that, in case of positions withouth pawns, can be formed by 3 different +// group that, in case of positions without pawns, can be formed by 3 different // pieces (default) or by the king pair when there is not a unique piece apart // from the kings. When there are pawns, pawns are always first in pieces[]. // @@ -917,7 +917,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { // // This ensures unique encoding for the whole position. The order of the // groups is a per-table parameter and could not follow the canonical leading - // pawns/pieces -> remainig pawns -> remaining pieces. In particular the + // pawns/pieces -> remaining pawns -> remaining pieces. In particular the // first group is at order[0] position and the remaining pawns, when present, // are at order[1] position. bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides @@ -937,7 +937,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { d->groupIdx[1] = idx; idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]]; } - else // Remainig pieces + else // Remaining pieces { d->groupIdx[next] = idx; idx *= Binomial[d->groupLen[next]][freeSquares]; @@ -947,7 +947,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { d->groupIdx[n] = idx; } -// In Recursive Pairing each symbol represents a pair of childern symbols. So +// In Recursive Pairing each symbol represents a pair of children symbols. So // read d->btree[] symbols data and expand each one in his left and right child // symbol until reaching the leafs that represent the symbol value. uint8_t set_symlen(PairsData* d, Sym s, std::vector& visited) { @@ -1317,7 +1317,7 @@ void Tablebases::init(const std::string& paths) { for (auto p : bothOnDiagonal) MapKK[p.first][p.second] = code++; - // Binomial[] stores the Binomial Coefficents using Pascal rule. There + // Binomial[] stores the Binomial Coefficients using Pascal rule. There // are Binomial[k][n] ways to choose k elements from a set of n elements. Binomial[0][0] = 1; @@ -1337,7 +1337,7 @@ void Tablebases::init(const std::string& paths) { for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt) for (File f = FILE_A; f <= FILE_D; ++f) { - // Restart the index at every file because TB table is splitted + // Restart the index at every file because TB table is split // by file, so we can reuse the same index for different files. int idx = 0; diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h index 56734af9..cf61b767 100644 --- a/src/syzygy/tbprobe.h +++ b/src/syzygy/tbprobe.h @@ -38,7 +38,7 @@ enum WDLScore { // Possible states after a probing operation enum ProbeState { FAIL = 0, // Probe failed (missing file table) - OK = 1, // Probe succesful + OK = 1, // Probe successful CHANGE_STM = -1, // DTZ should check the other side ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) }; diff --git a/src/thread.h b/src/thread.h index 5bfa2359..cd206faa 100644 --- a/src/thread.h +++ b/src/thread.h @@ -60,15 +60,21 @@ public: Pawns::Table pawnsTable; Material::Table materialTable; size_t pvIdx, pvLast; - uint64_t ttHitAverage; + RunningAverage doubleExtensionAverage[COLOR_NB]; + uint64_t nodesLastExplosive; + uint64_t nodesLastNormal; + std::atomic nodes, tbHits, bestMoveChanges; + Value bestValue; int selDepth, nmpMinPly; Color nmpColor; - std::atomic nodes, tbHits, bestMoveChanges; + ExplosionState state; + Value optimism[COLOR_NB]; Position rootPos; StateInfo rootState; Search::RootMoves rootMoves; Depth rootDepth, completedDepth; + Value rootDelta; CounterMoveHistory counterMoves; ButterflyHistory mainHistory; LowPlyHistory lowPlyHistory; diff --git a/src/timeman.cpp b/src/timeman.cpp index f742d1e4..69d1c96f 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -68,6 +68,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { TimePoint timeLeft = std::max(TimePoint(1), limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg)); + // Use extra time with larger increments + double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12); + // A user may scale time usage by setting UCI option "Slow Mover" // Default is 100 and changing this value will probably lose elo. timeLeft = slowMover * timeLeft / 100; @@ -78,15 +81,16 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { if (limits.movestogo == 0) { optScale = std::min(0.0084 + std::pow(ply + 3.0, 0.5) * 0.0042, - 0.2 * limits.time[us] / double(timeLeft)); + 0.2 * limits.time[us] / double(timeLeft)) + * optExtra; maxScale = std::min(7.0, 4.0 + ply / 12.0); } // x moves in y seconds (+ z increment) else { - optScale = std::min((0.8 + ply / 128.0) / mtg, - 0.8 * limits.time[us] / double(timeLeft)); + optScale = std::min((0.88 + ply / 116.4) / mtg, + 0.88 * limits.time[us] / double(timeLeft)); maxScale = std::min(6.3, 1.5 + 0.11 * mtg); } diff --git a/src/tune.h b/src/tune.h index b5c715b3..53d52a65 100644 --- a/src/tune.h +++ b/src/tune.h @@ -84,7 +84,7 @@ class Tune { static Tune& instance() { static Tune t; return t; } // Singleton - // Use polymorphism to accomodate Entry of different types in the same vector + // Use polymorphism to accommodate Entry of different types in the same vector struct EntryBase { virtual ~EntryBase() = default; virtual void init_option() = 0; diff --git a/src/types.h b/src/types.h index 0bd4a1c4..02cd19de 100644 --- a/src/types.h +++ b/src/types.h @@ -173,6 +173,11 @@ enum Bound { BOUND_EXACT = BOUND_UPPER | BOUND_LOWER }; +enum ExplosionState { + EXPLOSION_NONE, + MUST_CALM_DOWN +}; + enum Value : int { VALUE_ZERO = 0, VALUE_DRAW = 0, @@ -465,10 +470,6 @@ constexpr Move make_move(Square from, Square to) { return Move((from << 6) + to); } -constexpr Move reverse_move(Move m) { - return make_move(to_sq(m), from_sq(m)); -} - template constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); diff --git a/tests/reprosearch.sh b/tests/reprosearch.sh index c1167f7f..e16ba4ae 100755 --- a/tests/reprosearch.sh +++ b/tests/reprosearch.sh @@ -43,7 +43,7 @@ cat << EOF > repeat.exp expect eof EOF -# to increase the likelyhood of finding a non-reproducible case, +# to increase the likelihood of finding a non-reproducible case, # the allowed number of nodes are varied systematically for i in `seq 1 20` do