Add support for Windows large pages

author Sami Kiminki <skiminki@users.noreply.github.com>

Mon, 4 May 2020 17:49:27 +0000 (20:49 +0300)

committer Joost VandeVondele <Joost.VandeVondele@gmail.com>

Wed, 13 May 2020 18:57:47 +0000 (20:57 +0200)
author Sami Kiminki <skiminki@users.noreply.github.com>
Mon, 4 May 2020 17:49:27 +0000 (20:49 +0300)
committer Joost VandeVondele <Joost.VandeVondele@gmail.com>
Wed, 13 May 2020 18:57:47 +0000 (20:57 +0200)
diff --git a/Readme.md b/Readme.md

index a759eff665bc9e65e64b578b77c6dcf6532bf07a..35ff095d6792857cc8db58ba84c734aaeba52644 100644 (file)
--- a/Readme.md
+++ b/Readme.md
@@ -42,7 +42,7 @@ Currently, Stockfish has the following UCI options:
      this equal to the number of CPU cores available.
  
    * #### Hash
      this equal to the number of CPU cores available.
  
    * #### Hash
-    The size of the hash table in MB.
+    The size of the hash table in MB. It is recommended to set Hash after setting Threads.
  
    * #### Clear Hash
      Clear the hash table.
  
    * #### Clear Hash
      Clear the hash table.
@@ -138,6 +138,30 @@ more compact than Nalimov tablebases, while still storing all information
  needed for optimal play and in addition being able to take into account
  the 50-move rule.
  
  needed for optimal play and in addition being able to take into account
  the 50-move rule.
  
+## Large Pages
+
+Stockfish supports large pages on Linux and Windows. Large pages make
+the hash access more efficient, improving the engine speed, especially
+on large hash sizes. Typical increases are 5..10% in terms of nps, but
+speed increases up to 30% have been measured. The support is
+automatic. Stockfish attempts to use large pages when available and
+will fall back to regular memory allocation when this is not the case.
+
+### Support on Linux
+
+Large page support on Linux is obtained by the Linux kernel
+transparent huge pages functionality. Typically, transparent huge pages
+are already enabled and no configuration is needed.
+
+### Support on Windows
+
+The use of large pages requires "Lock Pages in Memory" privilege. See
+[Enable the Lock Pages in Memory Option (Windows)](https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows)
+on how to enable this privilege. Logout/login may be needed
+afterwards. Due to memory fragmentation, it may not always be
+possible to allocate large pages even when enabled. A reboot
+might alleviate this problem. To determine whether large pages
+are in use, see the engine log.
  
  ## Compiling Stockfish yourself from the sources
  
  
  ## Compiling Stockfish yourself from the sources
  
diff --git a/src/main.cpp b/src/main.cpp

index 6eeda66dff805e7672bc83eef5c342799398ed72..c7cf2c6f28fddb05581667ffc16a5dca5d35657e 100644 (file)
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -49,6 +49,7 @@ int main(int argc, char* argv[]) {
  
    UCI::loop(argc, argv);
  
  
    UCI::loop(argc, argv);
  
+  TT.resize(0);
    Threads.set(0);
    return 0;
  }
    Threads.set(0);
    return 0;
  }
diff --git a/src/misc.cpp b/src/misc.cpp

index 946810088da8a3ee74989215c852029e8879c1cb..b1c0feeb9e139b6b91476663d5a5987872746c0e 100644 (file)
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -309,6 +309,69 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
    return mem;
  }
  
    return mem;
  }
  
+#elif defined(_WIN64)
+
+static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
+
+  HANDLE hProcessToken { };
+  LUID luid { };
+  void* mem = nullptr;
+
+  const size_t largePageSize = GetLargePageMinimum();
+  if (!largePageSize)
+      return nullptr;
+
+  // We need SeLockMemoryPrivilege, so try to enable it for the process
+  if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
+      return nullptr;
+
+  if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
+  {
+      TOKEN_PRIVILEGES tp { };
+      TOKEN_PRIVILEGES prevTp { };
+      DWORD prevTpLen = 0;
+
+      tp.PrivilegeCount = 1;
+      tp.Privileges[0].Luid = luid;
+      tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+      // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
+      // we still need to query GetLastError() to ensure that the privileges were actually obtained...
+      if (AdjustTokenPrivileges(
+              hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) &&
+          GetLastError() == ERROR_SUCCESS)
+      {
+          // round up size to full pages and allocate
+          allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
+          mem = VirtualAlloc(
+              NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+
+          // privilege no longer needed, restore previous state
+          AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL);
+      }
+  }
+
+  CloseHandle(hProcessToken);
+
+  return mem;
+}
+
+void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+
+  // try to allocate large pages
+  mem = aligned_ttmem_alloc_large_pages(allocSize);
+  if (mem)
+      sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
+  else
+      sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
+
+  // fall back to regular, page aligned, allocation if necessary
+  if (!mem)
+      mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+
+  return mem;
+}
+
  #else
  
  void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
  #else
  
  void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
@@ -322,6 +385,28 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
  
  #endif
  
  
  #endif
  
+/// aligned_ttmem_free will free the previously allocated ttmem
+#if defined(_WIN64)
+
+void aligned_ttmem_free(void* mem) {
+
+  if (!VirtualFree(mem, 0, MEM_RELEASE))
+  {
+      DWORD err = GetLastError();
+      std::cerr << "Failed to free transposition table. Error code: 0x" <<
+          std::hex << err << std::dec << std::endl;
+      exit(EXIT_FAILURE);
+  }
+}
+
+#else
+
+void aligned_ttmem_free(void *mem) {
+  free(mem);
+}
+
+#endif
+
  
  namespace WinProcGroup {
  
  
  namespace WinProcGroup {
  
diff --git a/src/misc.h b/src/misc.h

index e0e0e98be8357cee894b2133ead6f654c0abc3f4..9d53c2dab129220f37bf01f273db05048f0fb97b 100644 (file)
--- a/src/misc.h
+++ b/src/misc.h
@@ -34,6 +34,7 @@ const std::string compiler_info();
  void prefetch(void* addr);
  void start_logger(const std::string& fname);
  void* aligned_ttmem_alloc(size_t size, void*& mem);
  void prefetch(void* addr);
  void start_logger(const std::string& fname);
  void* aligned_ttmem_alloc(size_t size, void*& mem);
+void aligned_ttmem_free(void* mem);
  
  void dbg_hit_on(bool b);
  void dbg_hit_on(bool c, bool b);
  
  void dbg_hit_on(bool b);
  void dbg_hit_on(bool c, bool b);
diff --git a/src/tt.cpp b/src/tt.cpp

index 7e95a2a4e6dbcd74f374dd534b1b611fd2787a62..6ee63138d15497851843ce114108cddba262151e 100644 (file)
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -63,7 +63,14 @@ void TranspositionTable::resize(size_t mbSize) {
  
    Threads.main()->wait_for_search_finished();
  
  
    Threads.main()->wait_for_search_finished();
  
-  free(mem);
+  if (mem)
+      aligned_ttmem_free(mem);
+
+  if (!mbSize)
+  {
+      mem = nullptr;
+      return;
+  }
  
    clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
    table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
  
    clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
    table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
author	Sami Kiminki <skiminki@users.noreply.github.com>
	Mon, 4 May 2020 17:49:27 +0000 (20:49 +0300)
committer	Joost VandeVondele <Joost.VandeVondele@gmail.com>
	Wed, 13 May 2020 18:57:47 +0000 (20:57 +0200)
Readme.md		patch \| blob \| history
src/main.cpp		patch \| blob \| history
src/misc.cpp		patch \| blob \| history
src/misc.h		patch \| blob \| history
src/tt.cpp		patch \| blob \| history