X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=src%2Fthread.h;h=d6a48eca73d14937f19212d462d9a120398d21d0;hb=HEAD;hp=a639cf7ea051e143782fc206212256aa8b2e248a;hpb=6645115377bd9699ae6785608753d33067b8e036;p=stockfish

diff --git a/src/thread.h b/src/thread.h
index a639cf7e..43e2e142 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -1,7 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
+  Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -21,168 +20,159 @@
 #define THREAD_H_INCLUDED
 
 #include <atomic>
-#include <bitset>
 #include <condition_variable>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
 #include <mutex>
-#include <thread>
 #include <vector>
 
-#include "material.h"
-#include "movepick.h"
-#include "pawns.h"
+#include "numa.h"
 #include "position.h"
 #include "search.h"
-
-struct Thread;
-
-const size_t MAX_THREADS = 128;
-const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
-const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
-
-#if !defined(NO_SPINLOCK)
-/// Spinlock class wraps low level atomic operations to provide a spin lock
-
-class Spinlock {
-
-  std::atomic_int lock;
-
-public:
-  Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013
-  void acquire() {
-    while (lock.fetch_sub(1, std::memory_order_acquire) != 1)
-        while (lock.load(std::memory_order_relaxed) <= 0) {}
-  }
-  void release() { lock.store(1, std::memory_order_release); }
+#include "thread_win32_osx.h"
+
+namespace Stockfish {
+
+
+class OptionsMap;
+using Value = int;
+
+// Sometimes we don't want to actually bind the threads, but the recipient still
+// needs to think it runs on *some* NUMA node, such that it can access structures
+// that rely on NUMA node knowledge. This class encapsulates this optional process
+// such that the recipient does not need to know whether the binding happened or not.
+class OptionalThreadToNumaNodeBinder {
+   public:
+    OptionalThreadToNumaNodeBinder(NumaIndex n) :
+        numaConfig(nullptr),
+        numaId(n) {}
+
+    OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) :
+        numaConfig(&cfg),
+        numaId(n) {}
+
+    NumaReplicatedAccessToken operator()() const {
+        if (numaConfig != nullptr)
+            return numaConfig->bind_current_thread_to_numa_node(numaId);
+        else
+            return NumaReplicatedAccessToken(numaId);
+    }
+
+   private:
+    const NumaConfig* numaConfig;
+    NumaIndex         numaId;
 };
 
-#else
-
-class Spinlock {
-
-  std::mutex mutex;
-
-public:
-  void acquire() { mutex.lock(); }
-  void release() { mutex.unlock(); }
+// Abstraction of a thread. It contains a pointer to the worker and a native thread.
+// After construction, the native thread is started with idle_loop()
+// waiting for a signal to start searching.
+// When the signal is received, the thread starts searching and when
+// the search is finished, it goes back to idle_loop() waiting for a new signal.
+class Thread {
+   public:
+    Thread(Search::SharedState&,
+           std::unique_ptr<Search::ISearchManager>,
+           size_t,
+           OptionalThreadToNumaNodeBinder);
+    virtual ~Thread();
+
+    void idle_loop();
+    void start_searching();
+    void clear_worker();
+    void run_custom_job(std::function<void()> f);
+
+    void ensure_network_replicated();
+
+    // Thread has been slightly altered to allow running custom jobs, so
+    // this name is no longer correct. However, this class (and ThreadPool)
+    // require further work to make them properly generic while maintaining
+    // appropriate specificity regarding search, from the point of view of an
+    // outside user, so renaming of this function is left for whenever that happens.
+    void   wait_for_search_finished();
+    size_t id() const { return idx; }
+
+    std::unique_ptr<Search::Worker> worker;
+    std::function<void()>           jobFunc;
+
+   private:
+    std::mutex                mutex;
+    std::condition_variable   cv;
+    size_t                    idx, nthreads;
+    bool                      exit = false, searching = true;  // Set before starting std::thread
+    NativeThread              stdThread;
+    NumaReplicatedAccessToken numaAccessToken;
 };
 
-#endif
-
-/// SplitPoint struct stores information shared by the threads searching in
-/// parallel below the same split point. It is populated at splitting time.
-
-struct SplitPoint {
-
-  // Const data after split point has been setup
-  const Position* pos;
-  Search::Stack* ss;
-  Thread* master;
-  Depth depth;
-  Value beta;
-  int nodeType;
-  bool cutNode;
-
-  // Const pointers to shared data
-  MovePicker* movePicker;
-  SplitPoint* parentSplitPoint;
-
-  // Shared variable data
-  Spinlock spinlock;
-  std::bitset<MAX_THREADS> slavesMask;
-  volatile bool allSlavesSearching;
-  volatile uint64_t nodes;
-  volatile Value alpha;
-  volatile Value bestValue;
-  volatile Move bestMove;
-  volatile int moveCount;
-  volatile bool cutoff;
-};
-
-
-/// ThreadBase struct is the base of the hierarchy from where we derive all the
-/// specialized thread classes.
-
-struct ThreadBase {
-
-  virtual ~ThreadBase() = default;
-  virtual void idle_loop() = 0;
-  void notify_one();
-  void wait_for(volatile const bool& b);
-
-  std::thread nativeThread;
-  std::mutex mutex;
-  std::condition_variable sleepCondition;
-  volatile bool exit = false;
-};
-
-
-/// Thread struct keeps together all the thread related stuff like locks, state
-/// and especially split points. We also use per-thread pawn and material hash
-/// tables so that once we get a pointer to an entry its life time is unlimited
-/// and we don't have to care about someone changing the entry under our feet.
-
-struct Thread : public ThreadBase {
-
-  Thread();
-  virtual void idle_loop();
-  bool cutoff_occurred() const;
-  bool can_join(const SplitPoint* sp) const;
-
-  void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove,
-             Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode);
-
-  SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD];
-  Pawns::Table pawnsTable;
-  Material::Table materialTable;
-  Endgames endgames;
-  Position* activePosition;
-  size_t idx;
-  int maxPly;
-  SplitPoint* volatile activeSplitPoint;
-  volatile size_t splitPointsSize;
-  volatile bool searching;
-};
-
-
-/// MainThread and TimerThread are derived classes used to characterize the two
-/// special threads: the main one and the recurring timer.
-
-struct MainThread : public Thread {
-  virtual void idle_loop();
-  volatile bool thinking = true; // Avoid a race with start_thinking()
-};
-
-struct TimerThread : public ThreadBase {
-
-  static const int Resolution = 5; // Millisec between two check_time() calls
-
-  virtual void idle_loop();
-
-  bool run = false;
-};
-
-
-/// ThreadPool struct handles all the threads related stuff like init, starting,
-/// parking and, most importantly, launching a slave thread at a split point.
-/// All the access to shared thread data is done through this class.
-
-struct ThreadPool : public std::vector<Thread*> {
-
-  void init(); // No c'tor and d'tor, threads rely on globals that should be
-  void exit(); // initialized and are valid during the whole thread lifetime.
-
-  MainThread* main() { return static_cast<MainThread*>(at(0)); }
-  void read_uci_options();
-  Thread* available_slave(const SplitPoint* sp) const;
-  void wait_for_think_finished();
-  void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
 
-  Depth minimumSplitDepth;
-  Spinlock spinlock;
-  std::condition_variable sleepCondition;
-  TimerThread* timer;
+// ThreadPool struct handles all the threads-related stuff like init, starting,
+// parking and, most importantly, launching a thread. All the access to threads
+// is done through this class.
+class ThreadPool {
+   public:
+    ThreadPool() {}
+
+    ~ThreadPool() {
+        // destroy any existing thread(s)
+        if (threads.size() > 0)
+        {
+            main_thread()->wait_for_search_finished();
+
+            threads.clear();
+        }
+    }
+
+    ThreadPool(const ThreadPool&) = delete;
+    ThreadPool(ThreadPool&&)      = delete;
+
+    ThreadPool& operator=(const ThreadPool&) = delete;
+    ThreadPool& operator=(ThreadPool&&)      = delete;
+
+    void   start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType);
+    void   run_on_thread(size_t threadId, std::function<void()> f);
+    void   wait_on_thread(size_t threadId);
+    size_t num_threads() const;
+    void   clear();
+    void   set(const NumaConfig& numaConfig,
+               Search::SharedState,
+               const Search::SearchManager::UpdateContext&);
+
+    Search::SearchManager* main_manager();
+    Thread*                main_thread() const { return threads.front().get(); }
+    uint64_t               nodes_searched() const;
+    uint64_t               tb_hits() const;
+    Thread*                get_best_thread() const;
+    void                   start_searching();
+    void                   wait_for_search_finished() const;
+
+    std::vector<size_t> get_bound_thread_count_by_numa_node() const;
+
+    void ensure_network_replicated();
+
+    std::atomic_bool stop, abortedSearch, increaseDepth;
+
+    auto cbegin() const noexcept { return threads.cbegin(); }
+    auto begin() noexcept { return threads.begin(); }
+    auto end() noexcept { return threads.end(); }
+    auto cend() const noexcept { return threads.cend(); }
+    auto size() const noexcept { return threads.size(); }
+    auto empty() const noexcept { return threads.empty(); }
+
+   private:
+    StateListPtr                         setupStates;
+    std::vector<std::unique_ptr<Thread>> threads;
+    std::vector<NumaIndex>               boundThreadToNumaNode;
+
+    uint64_t accumulate(std::atomic<uint64_t> Search::Worker::*member) const {
+
+        uint64_t sum = 0;
+        for (auto&& th : threads)
+            sum += (th->worker.get()->*member).load(std::memory_order_relaxed);
+        return sum;
+    }
 };
 
-extern ThreadPool Threads;
+}  // namespace Stockfish
 
-#endif // #ifndef THREAD_H_INCLUDED
+#endif  // #ifndef THREAD_H_INCLUDED