X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fmisc.cpp;h=4dfa9f0ce2f4bf6572f825e0741db602da4d3d55;hb=e4a0c6c75950bf27b6dc32490a1102499643126b;hp=9e7b7e37b8cc99ddb79433033c3b5107f6b7abc6;hpb=7cfc1f9b150d387788a9b02360e49ba2a56505f7;p=stockfish diff --git a/src/misc.cpp b/src/misc.cpp index 9e7b7e37..4dfa9f0c 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -36,6 +36,7 @@ typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); } #endif @@ -110,7 +111,14 @@ public: static Logger l; - if (!fname.empty() && !l.file.is_open()) + if (l.file.is_open()) + { + cout.rdbuf(l.out.buf); + cin.rdbuf(l.in.buf); + l.file.close(); + } + + if (!fname.empty()) { l.file.open(fname, ifstream::out); @@ -123,12 +131,6 @@ public: cin.rdbuf(&l.in); cout.rdbuf(&l.out); } - else if (fname.empty() && l.file.is_open()) - { - cout.rdbuf(l.out.buf); - cin.rdbuf(l.in.buf); - l.file.close(); - } } }; @@ -378,6 +380,7 @@ void std_aligned_free(void* ptr) { static void* aligned_large_pages_alloc_windows(size_t allocSize) { #if !defined(_WIN64) + (void)allocSize; // suppress unused-parameter compiler warning return nullptr; #else @@ -493,11 +496,11 @@ void bindThisThread(size_t) {} #else -/// best_group() retrieves logical processor information using Windows specific -/// API and returns the best group id for the thread with index idx. Original +/// best_node() retrieves logical processor information using Windows specific +/// API and returns the best node id for the thread with index idx. Original /// code from Texel by Peter Österlund. -int best_group(size_t idx) { +int best_node(size_t idx) { int threads = 0; int nodes = 0; @@ -569,22 +572,35 @@ int best_group(size_t idx) { void bindThisThread(size_t idx) { // Use only local variables to be thread-safe - int group = best_group(idx); + int node = best_node(idx); - if (group == -1) + if (node == -1) return; // Early exit if the needed API are not available at runtime HMODULE k32 = GetModuleHandle("Kernel32.dll"); auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); + auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); if (!fun2 || !fun3) return; - GROUP_AFFINITY affinity; - if (fun2(group, &affinity)) - fun3(GetCurrentThread(), &affinity, nullptr); + if (!fun4) { + GROUP_AFFINITY affinity; + if (fun2(node, &affinity)) + fun3(GetCurrentThread(), &affinity, nullptr); + } else { + // If a numa node has more than one processor group, we assume they are + // sized equal and we spread threads evenly across the groups. + USHORT elements, returnedElements; + elements = GetMaximumProcessorGroupCount(); + GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc( + elements * sizeof(GROUP_AFFINITY)); + if (fun4(node, affinity, elements, &returnedElements)) + fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); + free(affinity); + } } #endif