/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
+typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
+typedef WORD(*fun5_t)();
}
#endif
#include <sys/mman.h>
#endif
+#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) || defined(__e2k__)
+#define POSIXALIGNEDALLOC
+#include <stdlib.h>
+#endif
+
#include "misc.h"
#include "thread.h"
using namespace std;
+namespace Stockfish {
+
namespace {
/// Version number. If Version is left empty, then compile date in the format
static Logger l;
- if (!fname.empty() && !l.file.is_open())
+ if (l.file.is_open())
+ {
+ cout.rdbuf(l.out.buf);
+ cin.rdbuf(l.in.buf);
+ l.file.close();
+ }
+
+ if (!fname.empty())
{
l.file.open(fname, ifstream::out);
cin.rdbuf(&l.in);
cout.rdbuf(&l.out);
}
- else if (fname.empty() && l.file.is_open())
- {
- cout.rdbuf(l.out.buf);
- cin.rdbuf(l.in.buf);
- l.file.close();
- }
}
};
} // namespace
+
/// engine_info() returns the full name of the current Stockfish version. This
/// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
/// the program was compiled) or "Stockfish <Version>", depending on whether
/// Version is empty.
-const string engine_info(bool to_uci) {
+string engine_info(bool to_uci) {
const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
string month, day, year;
/// compiler_info() returns a string trying to describe the compiler we use
-const std::string compiler_info() {
+std::string compiler_info() {
#define stringify2(x) #x
#define stringify(x) stringify2(x)
compiler += "(version ";
compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
compiler += ")";
+ #elif defined(__e2k__) && defined(__LCC__)
+ #define dot_ver2(n) \
+ compiler += (char)'.'; \
+ compiler += (char)('0' + (n) / 10); \
+ compiler += (char)('0' + (n) % 10);
+
+ compiler += "MCST LCC ";
+ compiler += "(version ";
+ compiler += std::to_string(__LCC__ / 100);
+ dot_ver2(__LCC__ % 100)
+ dot_ver2(__LCC_MINOR__)
+ compiler += ")";
#elif __GNUC__
compiler += "g++ (GNUC) ";
compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
compiler += "\nCompilation settings include: ";
compiler += (Is64Bit ? " 64bit" : " 32bit");
+ #if defined(USE_VNNI)
+ compiler += " VNNI";
+ #endif
#if defined(USE_AVX512)
compiler += " AVX512";
#endif
+ compiler += (HasPext ? " BMI2" : "");
#if defined(USE_AVX2)
compiler += " AVX2";
#endif
- #if defined(USE_SSE42)
- compiler += " SSE42";
- #endif
#if defined(USE_SSE41)
compiler += " SSE41";
#endif
#if defined(USE_SSSE3)
compiler += " SSSE3";
#endif
- #if defined(USE_SSE3)
- compiler += " SSE3";
+ #if defined(USE_SSE2)
+ compiler += " SSE2";
+ #endif
+ compiler += (HasPopCnt ? " POPCNT" : "");
+ #if defined(USE_MMX)
+ compiler += " MMX";
#endif
- compiler += (HasPext ? " BMI2" : "");
- compiler += (HasPopCnt ? " POPCNT" : "");
+ #if defined(USE_NEON)
+ compiler += " NEON";
+ #endif
+
#if !defined(NDEBUG)
compiler += " DEBUG";
#endif
#endif
-/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
-/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free.
-///
+
+/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation
+/// does not guarantee the availability of aligned_alloc(). Memory allocated with
+/// std_aligned_alloc() must be freed with std_aligned_free().
void* std_aligned_alloc(size_t alignment, size_t size) {
-#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
- return aligned_alloc(alignment, size);
-#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
+
+#if defined(POSIXALIGNEDALLOC)
+ void *mem;
+ return posix_memalign(&mem, alignment, size) ? nullptr : mem;
+#elif defined(_WIN32)
return _mm_malloc(size, alignment);
#else
return std::aligned_alloc(alignment, size);
}
void std_aligned_free(void* ptr) {
-#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
+
+#if defined(POSIXALIGNEDALLOC)
free(ptr);
-#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
+#elif defined(_WIN32)
_mm_free(ptr);
#else
free(ptr);
#endif
}
-/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages.
-/// The returned pointer is the aligned one, while the mem argument is the one that needs
-/// to be passed to free. With c++17 some of this functionality could be simplified.
-
-#if defined(__linux__) && !defined(__ANDROID__)
-
-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
- constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
- size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
- if (posix_memalign(&mem, alignment, size))
- mem = nullptr;
- madvise(mem, allocSize, MADV_HUGEPAGE);
- return mem;
-}
+#if defined(_WIN32)
-#elif defined(_WIN64)
+static void* aligned_large_pages_alloc_windows(size_t allocSize) {
-static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
+ #if !defined(_WIN64)
+ (void)allocSize; // suppress unused-parameter compiler warning
+ return nullptr;
+ #else
HANDLE hProcessToken { };
LUID luid { };
CloseHandle(hProcessToken);
return mem;
-}
-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+ #endif
+}
- static bool firstCall = true;
+void* aligned_large_pages_alloc(size_t allocSize) {
// Try to allocate large pages
- mem = aligned_ttmem_alloc_large_pages(allocSize);
-
- // Suppress info strings on the first call. The first call occurs before 'uci'
- // is received and in that case this output confuses some GUIs.
- if (!firstCall)
- {
- if (mem)
- sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
- else
- sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
- }
- firstCall = false;
+ void* mem = aligned_large_pages_alloc_windows(allocSize);
// Fall back to regular, page aligned, allocation if necessary
if (!mem)
#else
-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+void* aligned_large_pages_alloc(size_t allocSize) {
+
+#if defined(__linux__)
+ constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
+#else
+ constexpr size_t alignment = 4096; // assumed small page size
+#endif
- constexpr size_t alignment = 64; // assumed cache line size
- size_t size = allocSize + alignment - 1; // allocate some extra space
- mem = malloc(size);
- void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
- return ret;
+ // round up to multiples of alignment
+ size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
+ void *mem = std_aligned_alloc(alignment, size);
+#if defined(MADV_HUGEPAGE)
+ madvise(mem, size, MADV_HUGEPAGE);
+#endif
+ return mem;
}
#endif
-/// aligned_ttmem_free() will free the previously allocated ttmem
+/// aligned_large_pages_free() will free the previously allocated ttmem
-#if defined(_WIN64)
+#if defined(_WIN32)
-void aligned_ttmem_free(void* mem) {
+void aligned_large_pages_free(void* mem) {
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
{
DWORD err = GetLastError();
- std::cerr << "Failed to free transposition table. Error code: 0x" <<
- std::hex << err << std::dec << std::endl;
+ std::cerr << "Failed to free large page memory. Error code: 0x"
+ << std::hex << err
+ << std::dec << std::endl;
exit(EXIT_FAILURE);
}
}
#else
-void aligned_ttmem_free(void *mem) {
- free(mem);
+void aligned_large_pages_free(void *mem) {
+ std_aligned_free(mem);
}
#endif
#else
-/// best_group() retrieves logical processor information using Windows specific
-/// API and returns the best group id for the thread with index idx. Original
+/// best_node() retrieves logical processor information using Windows specific
+/// API and returns the best node id for the thread with index idx. Original
/// code from Texel by Peter Ă–sterlund.
-int best_group(size_t idx) {
+int best_node(size_t idx) {
int threads = 0;
int nodes = 0;
if (!fun1)
return -1;
- // First call to get returnLength. We expect it to fail due to null buffer
+ // First call to GetLogicalProcessorInformationEx() to get returnLength.
+ // We expect the call to fail due to null buffer.
if (fun1(RelationAll, nullptr, &returnLength))
return -1;
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);
- // Second call, now we expect to succeed
+ // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed
if (!fun1(RelationAll, buffer, &returnLength))
{
free(buffer);
void bindThisThread(size_t idx) {
// Use only local variables to be thread-safe
- int group = best_group(idx);
+ int node = best_node(idx);
- if (group == -1)
+ if (node == -1)
return;
// Early exit if the needed API are not available at runtime
HMODULE k32 = GetModuleHandle("Kernel32.dll");
auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
+ auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2");
+ auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount");
if (!fun2 || !fun3)
return;
- GROUP_AFFINITY affinity;
- if (fun2(group, &affinity))
- fun3(GetCurrentThread(), &affinity, nullptr);
+ if (!fun4 || !fun5)
+ {
+ GROUP_AFFINITY affinity;
+ if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx
+ fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity
+ }
+ else
+ {
+ // If a numa node has more than one processor group, we assume they are
+ // sized equal and we spread threads evenly across the groups.
+ USHORT elements, returnedElements;
+ elements = fun5(); // GetMaximumProcessorGroupCount
+ GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY));
+ if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2
+ fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity
+ free(affinity);
+ }
}
#endif
} // namespace WinProcGroup
+
+#ifdef _WIN32
+#include <direct.h>
+#define GETCWD _getcwd
+#else
+#include <unistd.h>
+#define GETCWD getcwd
+#endif
+
+namespace CommandLine {
+
+string argv0; // path+name of the executable binary, as given by argv[0]
+string binaryDirectory; // path of the executable directory
+string workingDirectory; // path of the working directory
+
+void init(int argc, char* argv[]) {
+ (void)argc;
+ string pathSeparator;
+
+ // extract the path+name of the executable binary
+ argv0 = argv[0];
+
+#ifdef _WIN32
+ pathSeparator = "\\";
+ #ifdef _MSC_VER
+ // Under windows argv[0] may not have the extension. Also _get_pgmptr() had
+ // issues in some windows 10 versions, so check returned values carefully.
+ char* pgmptr = nullptr;
+ if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
+ argv0 = pgmptr;
+ #endif
+#else
+ pathSeparator = "/";
+#endif
+
+ // extract the working directory
+ workingDirectory = "";
+ char buff[40000];
+ char* cwd = GETCWD(buff, 40000);
+ if (cwd)
+ workingDirectory = cwd;
+
+ // extract the binary directory path from argv0
+ binaryDirectory = argv0;
+ size_t pos = binaryDirectory.find_last_of("\\/");
+ if (pos == std::string::npos)
+ binaryDirectory = "." + pathSeparator;
+ else
+ binaryDirectory.resize(pos + 1);
+
+ // pattern replacement: "./" at the start of path is replaced by the working directory
+ if (binaryDirectory.find("." + pathSeparator) == 0)
+ binaryDirectory.replace(0, 1, workingDirectory);
+}
+
+
+} // namespace CommandLine
+
+} // namespace Stockfish