+#define stringify2(x) #x
+#define stringify(x) stringify2(x)
+
+namespace Stockfish {
+
+std::string engine_info(bool to_uci = false);
+std::string compiler_info();
+
+// Preloads the given address in L1/L2 cache. This is a non-blocking
+// function that doesn't stall the CPU waiting for data to be loaded from memory,
+// which can be quite slow.
+void prefetch(void* addr);
+
+void start_logger(const std::string& fname);
+void* std_aligned_alloc(size_t alignment, size_t size);
+void std_aligned_free(void* ptr);
+// memory aligned by page size, min alignment: 4096 bytes
+void* aligned_large_pages_alloc(size_t size);
+// nop if mem == nullptr
+void aligned_large_pages_free(void* mem);
+
+void dbg_hit_on(bool cond, int slot = 0);
+void dbg_mean_of(int64_t value, int slot = 0);
+void dbg_stdev_of(int64_t value, int slot = 0);
+void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0);
+void dbg_print();
+
+using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds
+static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
+inline TimePoint now() {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::steady_clock::now().time_since_epoch())
+ .count();
+}
+
+
+enum SyncCout {
+ IO_LOCK,
+ IO_UNLOCK
+};
+std::ostream& operator<<(std::ostream&, SyncCout);
+
+#define sync_cout std::cout << IO_LOCK
+#define sync_endl std::endl << IO_UNLOCK
+
+
+// Get the first aligned element of an array.
+// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
+// where N is the number of elements in the array.
+template<uintptr_t Alignment, typename T>
+T* align_ptr_up(T* ptr) {
+ static_assert(alignof(T) < Alignment);
+
+ const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
+ return reinterpret_cast<T*>(
+ reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
+}
+
+
+// True if and only if the binary is compiled on a little-endian machine
+static inline const union {
+ uint32_t i;
+ char c[4];
+} Le = {0x01020304};
+static inline const bool IsLittleEndian = (Le.c[0] == 4);
+
+
+template<typename T, std::size_t MaxSize>
+class ValueList {
+
+ public:
+ std::size_t size() const { return size_; }
+ void push_back(const T& value) { values_[size_++] = value; }
+ const T* begin() const { return values_; }
+ const T* end() const { return values_ + size_; }
+ const T& operator[](int index) const { return values_[index]; }
+
+ private:
+ T values_[MaxSize];
+ std::size_t size_ = 0;
+};
+
+
+// xorshift64star Pseudo-Random Number Generator
+// This class is based on original code written and dedicated
+// to the public domain by Sebastiano Vigna (2014).
+// It has the following characteristics:
+//
+// - Outputs 64-bit numbers
+// - Passes Dieharder and SmallCrush test batteries
+// - Does not require warm-up, no zeroland to escape
+// - Internal state is a single 64-bit integer
+// - Period is 2^64 - 1
+// - Speed: 1.60 ns/call (Core i7 @3.40GHz)
+//
+// For further analysis see
+// <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>
+
+class PRNG {
+
+ uint64_t s;
+
+ uint64_t rand64() {
+
+ s ^= s >> 12, s ^= s << 25, s ^= s >> 27;
+ return s * 2685821657736338717LL;
+ }
+
+ public:
+ PRNG(uint64_t seed) :
+ s(seed) {
+ assert(seed);
+ }
+
+ template<typename T>
+ T rand() {
+ return T(rand64());
+ }
+
+ // Special generator used to fast init magic numbers.
+ // Output values only have 1/8th of their bits set on average.
+ template<typename T>
+ T sparse_rand() {
+ return T(rand64() & rand64() & rand64());
+ }
+};
+
+inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
+#if defined(__GNUC__) && defined(IS_64BIT)
+ __extension__ using uint128 = unsigned __int128;
+ return (uint128(a) * uint128(b)) >> 64;
+#else
+ uint64_t aL = uint32_t(a), aH = a >> 32;
+ uint64_t bL = uint32_t(b), bH = b >> 32;
+ uint64_t c1 = (aL * bL) >> 32;
+ uint64_t c2 = aH * bL + c1;
+ uint64_t c3 = aL * bH + uint32_t(c2);
+ return aH * bH + (c2 >> 32) + (c3 >> 32);
+#endif
+}
+
+// Under Windows it is not possible for a process to run on more than one
+// logical processor group. This usually means being limited to using max 64
+// cores. To overcome this, some special platform-specific API should be
+// called to set group affinity for each thread. Original code from Texel by
+// Peter Ă–sterlund.
+namespace WinProcGroup {
+void bindThisThread(size_t idx);
+}
+
+namespace CommandLine {
+void init(int argc, char* argv[]);
+
+extern std::string binaryDirectory; // path of the executable directory
+extern std::string workingDirectory; // path of the working directory
+}
+
+} // namespace Stockfish
+
+#endif // #ifndef MISC_H_INCLUDED