X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fmisc.cpp;h=7a23b7a9c930791ce41c254ac3834fbff2941978;hp=7075dd3cdc8498192cd8f256740fcc497cb63827;hb=2ec626ddae9c81af6338f72296df8a1465b5e036;hpb=9eccba776198c210563666787a9b5fe44b386fdc diff --git a/src/misc.cpp b/src/misc.cpp index 7075dd3c..7a23b7a9 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -21,9 +21,19 @@ #ifdef _WIN32 #if _WIN32_WINNT < 0x0601 #undef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 // Force to include newest API (Win 7 or later) +#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes #endif -#include // For processor groups +#include +// The needed Windows API for processor groups could be missed from old Windows +// versions, so instead of calling them directly (forcing the linker to resolve +// the calls at compile time), try to load them at runtime. To do this we need +// first to define the corresponding function pointers. +extern "C" { +typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); +typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); +typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +} #endif #include @@ -215,15 +225,14 @@ int get_group(size_t idx) { DWORD returnLength = 0; DWORD byteOffset = 0; - // Early exit if the needed API are not available at runtime + // Early exit if the needed API is not available at runtime HMODULE k32 = GetModuleHandle("Kernel32.dll"); - if ( !GetProcAddress(k32, "GetLogicalProcessorInformationEx") - || !GetProcAddress(k32, "GetNumaNodeProcessorMaskEx") - || !GetProcAddress(k32, "SetThreadGroupAffinity")) + auto fun1 = (fun1_t)GetProcAddress(k32, "GetLogicalProcessorInformationEx"); + if (!fun1) return -1; // First call to get returnLength. We expect it to fail due to null buffer - if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &returnLength)) + if (fun1(RelationAll, nullptr, &returnLength)) return -1; // Once we know returnLength, allocate the buffer @@ -231,7 +240,7 @@ int get_group(size_t idx) { ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); // Second call, now we expect to succeed - if (!GetLogicalProcessorInformationEx(RelationAll, buffer, &returnLength)) + if (!fun1(RelationAll, buffer, &returnLength)) { free(buffer); return -1; @@ -278,15 +287,31 @@ int get_group(size_t idx) { void bindThisThread(size_t idx) { - // Use a local variable instead of a static: slower but thread-safe + // If OS already scheduled us on a different group than 0 then don't overwrite + // the choice, eventually we are one of many one-threaded processes running on + // some Windows NUMA hardware, for instance in fishtest. To make it simple, + // just check if running threads are below a threshold, in this case all this + // NUMA machinery is not needed. + if (Threads.size() < 8) + return; + + // Use only local variables to be thread-safe int group = get_group(idx); if (group == -1) return; - GROUP_AFFINITY mask; - if (GetNumaNodeProcessorMaskEx(group, &mask)) - SetThreadGroupAffinity(GetCurrentThread(), &mask, nullptr); + // Early exit if the needed API are not available at runtime + HMODULE k32 = GetModuleHandle("Kernel32.dll"); + auto fun2 = (fun2_t)GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); + auto fun3 = (fun3_t)GetProcAddress(k32, "SetThreadGroupAffinity"); + + if (!fun2 || !fun3) + return; + + GROUP_AFFINITY affinity; + if (fun2(group, &affinity)) + fun3(GetCurrentThread(), &affinity, nullptr); } #endif