PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
+typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
}
#endif
static Logger l;
- if (!fname.empty() && !l.file.is_open())
+ if (l.file.is_open())
+ {
+ cout.rdbuf(l.out.buf);
+ cin.rdbuf(l.in.buf);
+ l.file.close();
+ }
+
+ if (!fname.empty())
{
l.file.open(fname, ifstream::out);
cin.rdbuf(&l.in);
cout.rdbuf(&l.out);
}
- else if (fname.empty() && l.file.is_open())
- {
- cout.rdbuf(l.out.buf);
- cin.rdbuf(l.in.buf);
- l.file.close();
- }
}
};
#else
-/// best_group() retrieves logical processor information using Windows specific
-/// API and returns the best group id for the thread with index idx. Original
+/// best_node() retrieves logical processor information using Windows specific
+/// API and returns the best node id for the thread with index idx. Original
/// code from Texel by Peter Ă–sterlund.
-int best_group(size_t idx) {
+int best_node(size_t idx) {
int threads = 0;
int nodes = 0;
void bindThisThread(size_t idx) {
// Use only local variables to be thread-safe
- int group = best_group(idx);
+ int node = best_node(idx);
- if (group == -1)
+ if (node == -1)
return;
// Early exit if the needed API are not available at runtime
HMODULE k32 = GetModuleHandle("Kernel32.dll");
auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
+ auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2");
if (!fun2 || !fun3)
return;
- GROUP_AFFINITY affinity;
- if (fun2(group, &affinity))
- fun3(GetCurrentThread(), &affinity, nullptr);
+ if (!fun4) {
+ GROUP_AFFINITY affinity;
+ if (fun2(node, &affinity))
+ fun3(GetCurrentThread(), &affinity, nullptr);
+ } else {
+ // If a numa node has more than one processor group, we assume they are
+ // sized equal and we spread threads evenly across the groups.
+ USHORT elements, returnedElements;
+ elements = GetMaximumProcessorGroupCount();
+ GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(
+ elements * sizeof(GROUP_AFFINITY));
+ if (fun4(node, affinity, elements, &returnedElements))
+ fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr);
+ free(affinity);
+ }
}
#endif