+ s ^= s >> 12, s ^= s << 25, s ^= s >> 27;
+ return s * 2685821657736338717LL;
+ }
+
+ public:
+ PRNG(uint64_t seed) :
+ s(seed) {
+ assert(seed);
+ }
+
+ template<typename T>
+ T rand() {
+ return T(rand64());
+ }
+
+ // Special generator used to fast init magic numbers.
+ // Output values only have 1/8th of their bits set on average.
+ template<typename T>
+ T sparse_rand() {
+ return T(rand64() & rand64() & rand64());
+ }
+};
+
+inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
+#if defined(__GNUC__) && defined(IS_64BIT)
+ __extension__ using uint128 = unsigned __int128;
+ return (uint128(a) * uint128(b)) >> 64;
+#else
+ uint64_t aL = uint32_t(a), aH = a >> 32;
+ uint64_t bL = uint32_t(b), bH = b >> 32;
+ uint64_t c1 = (aL * bL) >> 32;
+ uint64_t c2 = aH * bL + c1;
+ uint64_t c3 = aL * bH + uint32_t(c2);
+ return aH * bH + (c2 >> 32) + (c3 >> 32);
+#endif
+}
+
+// Under Windows it is not possible for a process to run on more than one
+// logical processor group. This usually means being limited to using max 64
+// cores. To overcome this, some special platform-specific API should be
+// called to set group affinity for each thread. Original code from Texel by
+// Peter Ă–sterlund.