]> git.sesse.net Git - stockfish/commitdiff
Merge remote-tracking branch 'upstream/master' into HEAD
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 11 Sep 2020 17:49:12 +0000 (19:49 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 11 Sep 2020 17:49:12 +0000 (19:49 +0200)
1  2 
src/Makefile
src/main.cpp
src/misc.cpp
src/position.cpp
src/syzygy/tbprobe.cpp
src/ucioption.cpp

diff --combined src/Makefile
index 591798129b855fc24ed4a94b8d698d3da1622729,340b3008381be752d1d9edf1cd3020bffd323961..4818066015a3e598bbe853ff3ea125a3011df224
@@@ -39,12 -39,9 +39,12 @@@ PGOBENCH = ./$(EXE) benc
  SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
        material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
        search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
 -      nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp
 +      nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp \
 +      hashprobe.grpc.pb.cc hashprobe.pb.cc
 +CLISRCS = client.cpp hashprobe.grpc.pb.cc hashprobe.pb.cc uci.cpp
  
  OBJS = $(notdir $(SRCS:.cpp=.o))
 +CLIOBJS = $(notdir $(CLISRCS:.cpp=.o))
  
  VPATH = syzygy:nnue:nnue/features
  
@@@ -70,14 -67,16 +70,16 @@@ endi
  # bits = 64/32        --- -DIS_64BIT       --- 64-/32-bit operating system
  # prefetch = yes/no   --- -DUSE_PREFETCH   --- Use prefetch asm-instruction
  # popcnt = yes/no     --- -DUSE_POPCNT     --- Use popcnt asm-instruction
+ # pext = yes/no       --- -DUSE_PEXT       --- Use pext x86_64 asm-instruction
  # sse = yes/no        --- -msse            --- Use Intel Streaming SIMD Extensions
- # sse3 = yes/no       --- -msse3           --- Use Intel Streaming SIMD Extensions 3
+ # mmx = yes/no        --- -mmmx            --- Use Intel MMX instructions
+ # sse2 = yes/no       --- -msse2           --- Use Intel Streaming SIMD Extensions 2
  # ssse3 = yes/no      --- -mssse3          --- Use Intel Supplemental Streaming SIMD Extensions 3
  # sse41 = yes/no      --- -msse4.1         --- Use Intel Streaming SIMD Extensions 4.1
- # sse42 = yes/no      --- -msse4.2         --- Use Intel Streaming SIMD Extensions 4.2
  # avx2 = yes/no       --- -mavx2           --- Use Intel Advanced Vector Extensions 2
- # pext = yes/no       --- -DUSE_PEXT       --- Use pext x86_64 asm-instruction
  # avx512 = yes/no     --- -mavx512bw       --- Use Intel Advanced Vector Extensions 512
+ # vnni256 = yes/no    --- -mavx512vnni     --- Use Intel Vector Neural Network Instructions 256
+ # vnni512 = yes/no    --- -mavx512vnni     --- Use Intel Vector Neural Network Instructions 512
  # neon = yes/no       --- -DUSE_NEON       --- Use ARM SIMD architecture
  #
  # Note that Makefile is space sensitive, so when adding new architectures
  # at the end of the line for flag values.
  
  ### 2.1. General and architecture defaults
+ ifeq ($(ARCH),)
+    ARCH = x86-64-modern
+    help_skip_sanity = yes
+ endif
+ # explicitly check for the list of supported architectures (as listed with make help),
+ # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
+ ifeq ($(ARCH), $(filter $(ARCH), \
+                  x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
+                  x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
+                  x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
+                  armv7 armv7-neon armv8 apple-silicon general-64 general-32))
+    SUPPORTED_ARCH=true
+ else
+    SUPPORTED_ARCH=false
+ endif
  optimize = yes
  debug = no
  sanitize = no
  bits = 64
  prefetch = no
  popcnt = no
+ pext = no
  sse = no
- sse3 = no
+ mmx = no
+ sse2 = no
  ssse3 = no
  sse41 = no
- sse42 = no
  avx2 = no
- pext = no
  avx512 = no
+ vnni256 = no
+ vnni512 = no
  neon = no
- ARCH = x86-64-modern
+ STRIP = strip
  
  ### 2.2 Architecture specific
- ifeq ($(ARCH),general-32)
-       arch = any
-       bits = 32
- endif
  
- ifeq ($(ARCH),x86-32-old)
+ ifeq ($(findstring x86,$(ARCH)),x86)
+ # x86-32/64
+ ifeq ($(findstring x86-32,$(ARCH)),x86-32)
        arch = i386
        bits = 32
+       sse = yes
+       mmx = yes
+ else
+       arch = x86_64
+       sse = yes
+       sse2 = yes
  endif
  
- ifeq ($(ARCH),x86-32)
-       arch = i386
-       bits = 32
-       prefetch = yes
+ ifeq ($(findstring -sse,$(ARCH)),-sse)
        sse = yes
  endif
  
- ifeq ($(ARCH),general-64)
-       arch = any
+ ifeq ($(findstring -popcnt,$(ARCH)),-popcnt)
+       popcnt = yes
  endif
  
- ifeq ($(ARCH),x86-64)
-       arch = x86_64
-       prefetch = yes
-       sse = yes
+ ifeq ($(findstring -mmx,$(ARCH)),-mmx)
+       mmx = yes
  endif
  
- ifeq ($(ARCH),x86-64-sse3)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -sse2,$(ARCH)),-sse2)
        sse = yes
-       sse3 = yes
+       sse2 = yes
  endif
  
- ifeq ($(ARCH),x86-64-sse3-popcnt)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -ssse3,$(ARCH)),-ssse3)
        sse = yes
-       sse3 = yes
-       popcnt = yes
+       sse2 = yes
+       ssse3 = yes
  endif
  
- ifeq ($(ARCH),x86-64-ssse3)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -sse41,$(ARCH)),-sse41)
        sse = yes
-       sse3 = yes
+       sse2 = yes
        ssse3 = yes
+       sse41 = yes
  endif
  
- ifeq ($(ARCH),x86-64-sse41)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -modern,$(ARCH)),-modern)
        popcnt = yes
        sse = yes
-       sse3 = yes
+       sse2 = yes
        ssse3 = yes
        sse41 = yes
  endif
  
- ifeq ($(ARCH),x86-64-modern)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -avx2,$(ARCH)),-avx2)
        popcnt = yes
        sse = yes
-       sse3 = yes
+       sse2 = yes
        ssse3 = yes
        sse41 = yes
+       avx2 = yes
  endif
  
- ifeq ($(ARCH),x86-64-sse42)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -bmi2,$(ARCH)),-bmi2)
        popcnt = yes
        sse = yes
-       sse3 = yes
+       sse2 = yes
        ssse3 = yes
        sse41 = yes
-       sse42 = yes
+       avx2 = yes
+       pext = yes
  endif
  
- ifeq ($(ARCH),x86-64-avx2)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -avx512,$(ARCH)),-avx512)
        popcnt = yes
        sse = yes
-       sse3 = yes
+       sse2 = yes
        ssse3 = yes
        sse41 = yes
-       sse42 = yes
        avx2 = yes
+       pext = yes
+       avx512 = yes
  endif
  
- ifeq ($(ARCH),x86-64-bmi2)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -vnni256,$(ARCH)),-vnni256)
        popcnt = yes
        sse = yes
-       sse3 = yes
+       sse2 = yes
        ssse3 = yes
        sse41 = yes
-       sse42 = yes
        avx2 = yes
        pext = yes
+       vnni256 = yes
  endif
  
- ifeq ($(ARCH),x86-64-avx512)
-       arch = x86_64
-       prefetch = yes
+ ifeq ($(findstring -vnni512,$(ARCH)),-vnni512)
        popcnt = yes
        sse = yes
-       sse3 = yes
+       sse2 = yes
        ssse3 = yes
        sse41 = yes
-       sse42 = yes
        avx2 = yes
        pext = yes
        avx512 = yes
+       vnni512 = yes
+ endif
+ ifeq ($(sse),yes)
+       prefetch = yes
+ endif
+ # 64-bit pext is not available on x86-32
+ ifeq ($(bits),32)
+       pext = no
+ endif
+ else
+ # all other architectures
+ ifeq ($(ARCH),general-32)
+       arch = any
+       bits = 32
+ endif
+ ifeq ($(ARCH),general-64)
+       arch = any
  endif
  
  ifeq ($(ARCH),armv7)
        bits = 32
  endif
  
+ ifeq ($(ARCH),armv7-neon)
+       arch = armv7
+       prefetch = yes
+       popcnt = yes
+       neon = yes
+       bits = 32
+ endif
  ifeq ($(ARCH),armv8)
-       arch = armv8-a
+       arch = armv8
        prefetch = yes
        popcnt = yes
        neon = yes
@@@ -254,6 -289,8 +292,8 @@@ ifeq ($(ARCH),ppc-64
        prefetch = yes
  endif
  
+ endif
  ### ==========================================================================
  ### Section 3. Low-level Configuration
  ### ==========================================================================
@@@ -270,9 -307,9 +310,9 @@@ endi
  ifeq ($(COMP),gcc)
        comp=gcc
        CXX=g++
 -      CXXFLAGS += -pedantic -Wextra -Wshadow
 +      CXXFLAGS += -pedantic -Wextra
  
-       ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8))
+       ifeq ($(arch),$(filter $(arch),armv7 armv8))
                ifeq ($(OS),Android)
                        CXXFLAGS += -m$(bits)
                        LDFLAGS += -m$(bits)
                LDFLAGS += -m$(bits)
        endif
  
+       ifeq ($(arch),$(filter $(arch),armv7))
+               LDFLAGS += -latomic
+       endif
        ifneq ($(KERNEL),Darwin)
           LDFLAGS += -Wl,--no-as-needed
        endif
@@@ -329,7 -370,7 +373,7 @@@ ifeq ($(COMP),clang
        endif
        endif
  
-       ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8))
+       ifeq ($(arch),$(filter $(arch),armv7 armv8))
                ifeq ($(OS),Android)
                        CXXFLAGS += -m$(bits)
                        LDFLAGS += -m$(bits)
@@@ -354,8 -395,28 +398,28 @@@ endi
  endif
  
  ifeq ($(KERNEL),Darwin)
-       CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.15
-       LDFLAGS += -arch $(arch) -mmacosx-version-min=10.15
+       CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+       LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+       XCRUN = xcrun
+ endif
+ # To cross-compile for Android, NDK version r21 or later is recommended.
+ # In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils.
+ # Currently we don't know how to make PGO builds with the NDK yet.
+ ifeq ($(COMP),ndk)
+       CXXFLAGS += -stdlib=libc++ -fPIE
+       ifeq ($(arch),armv7)
+               comp=armv7a-linux-androideabi16-clang
+               CXX=armv7a-linux-androideabi16-clang++
+               CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
+               STRIP=arm-linux-androideabi-strip
+       endif
+       ifeq ($(arch),armv8)
+               comp=aarch64-linux-android21-clang
+               CXX=aarch64-linux-android21-clang++
+               STRIP=aarch64-linux-android-strip
+       endif
+       LDFLAGS += -static-libstdc++ -pie -lm -latomic
  endif
  
  ### Travis CI script uses COMPILER to overwrite CXX
@@@ -368,13 -429,26 +432,26 @@@ ifdef COMPCX
        CXX=$(COMPCXX)
  endif
  
+ ### Sometimes gcc is really clang
+ ifeq ($(COMP),gcc)
+       gccversion = $(shell $(CXX) --version)
+       gccisclang = $(findstring clang,$(gccversion))
+       ifneq ($(gccisclang),)
+               profile_make = clang-profile-make
+               profile_use = clang-profile-use
+       endif
+ endif
  ### On mingw use Windows threads, otherwise POSIX
  ifneq ($(comp),mingw)
+       CXXFLAGS += -DUSE_PTHREADS
        # On Android Bionic's C library comes with its own pthread implementation bundled in
        ifneq ($(OS),Android)
                # Haiku has pthreads in its libroot, so only link it in on other platforms
                ifneq ($(KERNEL),Haiku)
-                       LDFLAGS += -lpthread
+                       ifneq ($(COMP),ndk)
+                               LDFLAGS += -lpthread
+                       endif
                endif
        endif
  endif
@@@ -395,7 -469,7 +472,7 @@@ endi
  ### 3.3 Optimization
  ifeq ($(optimize),yes)
  
 -      CXXFLAGS += -O3
 +      CXXFLAGS += -O3 -g
  
        ifeq ($(comp),gcc)
                ifeq ($(OS), Android)
@@@ -419,7 -493,6 +496,6 @@@ endi
  ifeq ($(prefetch),yes)
        ifeq ($(sse),yes)
                CXXFLAGS += -msse
-               DEPENDFLAGS += -msse
        endif
  else
        CXXFLAGS += -DNO_PREFETCH
@@@ -427,7 -500,7 +503,7 @@@ endi
  
  ### 3.6 popcnt
  ifeq ($(popcnt),yes)
-       ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64))
+       ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64))
                CXXFLAGS += -DUSE_POPCNT
        else ifeq ($(comp),icc)
                CXXFLAGS += -msse3 -DUSE_POPCNT
        endif
  endif
  
  ifeq ($(avx2),yes)
        CXXFLAGS += -DUSE_AVX2
        ifeq ($(comp),$(filter $(comp),gcc clang mingw))
@@@ -446,14 -520,21 +523,21 @@@ endi
  ifeq ($(avx512),yes)
        CXXFLAGS += -DUSE_AVX512
        ifeq ($(comp),$(filter $(comp),gcc clang mingw))
-               CXXFLAGS += -mavx512bw
+               CXXFLAGS += -mavx512f -mavx512bw
+       endif
+ endif
+ ifeq ($(vnni256),yes)
+       CXXFLAGS += -DUSE_VNNI
+       ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+               CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256
        endif
  endif
  
- ifeq ($(sse42),yes)
-       CXXFLAGS += -DUSE_SSE42
+ ifeq ($(vnni512),yes)
+       CXXFLAGS += -DUSE_VNNI
        ifeq ($(comp),$(filter $(comp),gcc clang mingw))
-               CXXFLAGS += -msse4.2
+               CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl
        endif
  endif
  
@@@ -471,19 -552,29 +555,29 @@@ ifeq ($(ssse3),yes
        endif
  endif
  
- ifeq ($(sse3),yes)
-       CXXFLAGS += -DUSE_SSE3
+ ifeq ($(sse2),yes)
+       CXXFLAGS += -DUSE_SSE2
        ifeq ($(comp),$(filter $(comp),gcc clang mingw))
-               CXXFLAGS += -msse3
+               CXXFLAGS += -msse2
        endif
  endif
  
- ifeq ($(neon),yes)
-       CXXFLAGS += -DUSE_NEON
+ ifeq ($(mmx),yes)
+       CXXFLAGS += -DUSE_MMX
+       ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+               CXXFLAGS += -mmmx
+       endif
  endif
  
- ifeq ($(arch),x86_64)
-       CXXFLAGS += -DUSE_SSE2
+ ifeq ($(neon),yes)
+       CXXFLAGS += -DUSE_NEON
+       ifeq ($(KERNEL),Linux)
+       ifneq ($(COMP),ndk)
+       ifneq ($(arch),armv8)
+               CXXFLAGS += -mfpu=neon
+       endif
+       endif
+       endif
  endif
  
  ### 3.7 pext
@@@ -499,18 -590,43 +593,43 @@@ endi
  ### needs access to the optimization flags.
  ifeq ($(optimize),yes)
  ifeq ($(debug), no)
-       ifeq ($(comp),$(filter $(comp),gcc clang))
+       ifeq ($(COMP),ndk)
+               CXXFLAGS += -flto=thin
+               LDFLAGS += $(CXXFLAGS)
+       else ifeq ($(comp),clang)
+               CXXFLAGS += -flto=thin
+               ifneq ($(findstring MINGW,$(KERNEL)),)
+                       CXXFLAGS += -fuse-ld=lld
+               else ifneq ($(findstring MSYS,$(KERNEL)),)
+                       CXXFLAGS += -fuse-ld=lld
+               endif
+               LDFLAGS += $(CXXFLAGS)
+ # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
+ # GCC on some systems.
+       else ifeq ($(comp),gcc)
+       ifeq ($(gccisclang),)
                CXXFLAGS += -flto
+               LDFLAGS += $(CXXFLAGS) -flto=jobserver
+               ifneq ($(findstring MINGW,$(KERNEL)),)
+                       LDFLAGS += -save-temps
+               else ifneq ($(findstring MSYS,$(KERNEL)),)
+                       LDFLAGS += -save-temps
+               endif
+       else
+               CXXFLAGS += -flto=thin
                LDFLAGS += $(CXXFLAGS)
        endif
  
  # To use LTO and static linking on windows, the tool chain requires a recent gcc:
- # gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not.
+ # gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not.
  # So, only enable it for a cross from Linux by default.
-       ifeq ($(comp),mingw)
+       else ifeq ($(comp),mingw)
        ifeq ($(KERNEL),Linux)
+       ifneq ($(arch),i386)
                CXXFLAGS += -flto
-               LDFLAGS += $(CXXFLAGS)
+               LDFLAGS += $(CXXFLAGS) -flto=jobserver
+       endif
        endif
        endif
  endif
@@@ -527,6 -643,7 +646,7 @@@ endi
  ### Section 4. Public Targets
  ### ==========================================================================
  
  help:
        @echo ""
        @echo "To compile stockfish, type: "
        @echo ""
        @echo "Supported targets:"
        @echo ""
+       @echo "help                    > Display architecture details"
        @echo "build                   > Standard build"
-       @echo "profile-build           > Standard build with PGO"
+       @echo "net                     > Download the default nnue net"
+       @echo "profile-build           > Faster build (with profile-guided optimization)"
        @echo "strip                   > Strip executable"
        @echo "install                 > Install executable"
        @echo "clean                   > Clean up"
-       @echo "net                     > Download the default nnue net"
        @echo ""
        @echo "Supported archs:"
        @echo ""
+       @echo "x86-64-vnni512          > x86 64-bit with vnni support 512bit wide"
+       @echo "x86-64-vnni256          > x86 64-bit with vnni support 256bit wide"
        @echo "x86-64-avx512           > x86 64-bit with avx512 support"
        @echo "x86-64-bmi2             > x86 64-bit with bmi2 support"
        @echo "x86-64-avx2             > x86 64-bit with avx2 support"
-       @echo "x86-64-sse42            > x86 64-bit with sse42 support"
-       @echo "x86-64-modern           > x86 64-bit with sse41 support (x86-64-sse41)"
-       @echo "x86-64-sse41            > x86 64-bit with sse41 support"
+       @echo "x86-64-sse41-popcnt     > x86 64-bit with sse41 and popcnt support"
+       @echo "x86-64-modern           > common modern CPU, currently x86-64-sse41-popcnt"
        @echo "x86-64-ssse3            > x86 64-bit with ssse3 support"
        @echo "x86-64-sse3-popcnt      > x86 64-bit with sse3 and popcnt support"
-       @echo "x86-64-sse3             > x86 64-bit with sse3 support"
-       @echo "x86-64                  > x86 64-bit generic"
-       @echo "x86-32                  > x86 32-bit (also enables SSE)"
-       @echo "x86-32-old              > x86 32-bit fall back for old hardware"
+       @echo "x86-64                  > x86 64-bit generic (with sse2 support)"
+       @echo "x86-32-sse41-popcnt     > x86 32-bit with sse41 and popcnt support"
+       @echo "x86-32-sse2             > x86 32-bit with sse2 support"
+       @echo "x86-32                  > x86 32-bit generic (with mmx and sse support)"
        @echo "ppc-64                  > PPC 64-bit"
        @echo "ppc-32                  > PPC 32-bit"
        @echo "armv7                   > ARMv7 32-bit"
-       @echo "armv8                   > ARMv8 64-bit"
+       @echo "armv7-neon              > ARMv7 32-bit with popcnt and neon"
+       @echo "armv8                   > ARMv8 64-bit with popcnt and neon"
        @echo "apple-silicon           > Apple silicon ARM64"
        @echo "general-64              > unspecified 64-bit"
        @echo "general-32              > unspecified 32-bit"
        @echo "mingw                   > Gnu compiler with MinGW under Windows"
        @echo "clang                   > LLVM Clang compiler"
        @echo "icc                     > Intel compiler"
+       @echo "ndk                     > Google NDK to cross-compile for Android"
        @echo ""
        @echo "Simple examples. If you don't know what to do, you likely want to run: "
        @echo ""
-       @echo "make -j build ARCH=x86-64    (This is for 64-bit systems)"
-       @echo "make -j build ARCH=x86-32    (This is for 32-bit systems)"
-       @echo ""
-       @echo "Advanced examples, for experienced users: "
+       @echo "make -j build ARCH=x86-64  (A portable, slow compile for 64-bit systems)"
+       @echo "make -j build ARCH=x86-32  (A portable, slow compile for 32-bit systems)"
        @echo ""
-       @echo "make -j build ARCH=x86-64-modern COMP=clang"
-       @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8"
+       @echo "Advanced examples, for experienced users looking for performance: "
        @echo ""
-       @echo "The selected architecture $(ARCH) enables the following configuration: "
+       @echo "make    help  ARCH=x86-64-bmi2"
+       @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0"
+       @echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
        @echo ""
+       @echo "-------------------------------"
+ ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true)
+       @echo "The selected architecture $(ARCH) will enable the following configuration: "
        @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
+ else
+       @echo "Specify a supported architecture with the ARCH option for more details"
+       @echo ""
+ endif
  
  
  .PHONY: help build profile-build strip install clean net objclean profileclean \
          config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
          clang-profile-use clang-profile-make
  
- build: config-sanity
+ build: config-sanity net
        $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
  
- profile-build: config-sanity objclean profileclean
+ profile-build: net config-sanity objclean profileclean
        @echo ""
        @echo "Step 1/4. Building instrumented executable ..."
        $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
        $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
  
  strip:
-       strip $(EXE)
+       $(STRIP) $(EXE)
  
  install:
        -mkdir -p -m 755 $(BINDIR)
        -cp $(EXE) $(BINDIR)
        -strip $(BINDIR)/$(EXE)
  
- #clean all
+ # clean all
  clean: objclean profileclean
        @rm -f .depend *~ core
  
+ # evaluation network (nnue)
  net:
-       $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+       $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
        @echo "Default net: $(nnuenet)"
        $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
-       $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
-       @if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi
+       $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
+       @if test -f "$(nnuenet)"; then \
+             echo "Already available."; \
+          else \
+             if [ "x$(curl_or_wget)" = "x" ]; then \
+                echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \
+             else \
+                echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\
+             fi; \
+         fi;
+       $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi))
+       @if [ "x$(shasum_command)" != "x" ]; then \
+           if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
+                 echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \
+             fi \
+          else \
+             echo "shasum / sha256sum not found, skipping net validation"; \
+         fi
  
  # clean binaries and objects
  objclean:
  # clean auxiliary profiling files
  profileclean:
        @rm -rf profdir
-       @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda
+       @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s
        @rm -f stockfish.profdata *.profraw
  
  default:
  ### Section 5. Private Targets
  ### ==========================================================================
  
 -all: $(EXE) .depend
 +all: $(EXE) client .depend
  
  config-sanity:
        @echo ""
        @echo "os: '$(OS)'"
        @echo "prefetch: '$(prefetch)'"
        @echo "popcnt: '$(popcnt)'"
+       @echo "pext: '$(pext)'"
        @echo "sse: '$(sse)'"
-       @echo "sse3: '$(sse3)'"
+       @echo "mmx: '$(mmx)'"
+       @echo "sse2: '$(sse2)'"
        @echo "ssse3: '$(ssse3)'"
        @echo "sse41: '$(sse41)'"
-       @echo "sse42: '$(sse42)'"
        @echo "avx2: '$(avx2)'"
-       @echo "pext: '$(pext)'"
        @echo "avx512: '$(avx512)'"
+       @echo "vnni256: '$(vnni256)'"
+       @echo "vnni512: '$(vnni512)'"
        @echo "neon: '$(neon)'"
        @echo ""
        @echo "Flags:"
        @test "$(debug)" = "yes" || test "$(debug)" = "no"
        @test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no"
        @test "$(optimize)" = "yes" || test "$(optimize)" = "no"
+       @test "$(SUPPORTED_ARCH)" = "true"
        @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
         test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
-        test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64"
+        test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64"
        @test "$(bits)" = "32" || test "$(bits)" = "64"
        @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
        @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
+       @test "$(pext)" = "yes" || test "$(pext)" = "no"
        @test "$(sse)" = "yes" || test "$(sse)" = "no"
-       @test "$(sse3)" = "yes" || test "$(sse3)" = "no"
+       @test "$(mmx)" = "yes" || test "$(mmx)" = "no"
+       @test "$(sse2)" = "yes" || test "$(sse2)" = "no"
        @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no"
        @test "$(sse41)" = "yes" || test "$(sse41)" = "no"
-       @test "$(sse42)" = "yes" || test "$(sse42)" = "no"
        @test "$(avx2)" = "yes" || test "$(avx2)" = "no"
-       @test "$(pext)" = "yes" || test "$(pext)" = "no"
        @test "$(avx512)" = "yes" || test "$(avx512)" = "no"
+       @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no"
+       @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no"
        @test "$(neon)" = "yes" || test "$(neon)" = "no"
-       @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
+       @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \
+       || test "$(comp)" = "armv7a-linux-androideabi16-clang"  || test "$(comp)" = "aarch64-linux-android21-clang"
  
  $(EXE): $(OBJS)
-       $(CXX) -o $@ $(OBJS) $(LDFLAGS)
+       +$(CXX) -o $@ $(OBJS) $(LDFLAGS)
  
  clang-profile-make:
        $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
        all
  
  clang-profile-use:
-       llvm-profdata merge -output=stockfish.profdata *.profraw
+       $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw
        $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
        EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \
        EXTRALDFLAGS='-fprofile-use ' \
@@@ -734,32 -884,6 +887,32 @@@ icc-profile-use
        EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
        all
  
 +### GRPC
 +
 +PROTOS_PATH = .
 +PROTOC = protoc
 +GRPC_CPP_PLUGIN = grpc_cpp_plugin
 +GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)`
 +
 +%.grpc.pb.h %.grpc.pb.cc: %.proto
 +      $(PROTOC) -I $(PROTOS_PATH) --grpc_out=. --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $<
 +
 +# oh my
 +%.cpp: %.cc
 +      cp $< $@
 +
 +%.pb.h %.pb.cc: %.proto
 +      $(PROTOC) -I $(PROTOS_PATH) --cpp_out=. $<
 +
 +#LDFLAGS += -Wl,-Bstatic -Wl,-\( -lprotobuf -lgrpc++_unsecure -lgrpc_unsecure -lgrpc -lz -Wl,-\) -Wl,-Bdynamic -ldl
 +LDFLAGS += /usr/lib/x86_64-linux-gnu/libprotobuf.a /usr/lib/x86_64-linux-gnu/libgrpc++_unsecure.a /usr/lib/x86_64-linux-gnu/libgrpc_unsecure.a /usr/lib/x86_64-linux-gnu/libgrpc.a /usr/lib/x86_64-linux-gnu/libcares.a -ldl -lz
 +#LDFLAGS += /usr/lib/x86_64-linux-gnu/libprotobuf.a /usr/lib/libgrpc++_unsecure.a /usr/lib/libgrpc_unsecure.a /usr/lib/libgrpc.a /usr/lib/x86_64-linux-gnu/libcares.a -ldl -lz
 +
 +client: $(CLIOBJS)
 +      $(CXX) -o $@ $(CLIOBJS) $(LDFLAGS)
 +
 +# Other stuff
 +
  .depend:
        -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
  
diff --combined src/main.cpp
index 25d8f4158fdee3addb951b705ce805a6c418f17f,f95db1c2f09379c5c940cca74f81c8a3723c4ff6..a62c3a8562ebb929ccf0a4029c405ab768410f94
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
  
 +#include <deque>
  #include <iostream>
 +#include <stack>
 +#include <thread>
  
  #include "bitboard.h"
  #include "endgame.h"
  #include "uci.h"
  #include "syzygy/tbprobe.h"
  
 +#include <grpc/grpc.h>
 +#include <grpc++/server.h>
 +#include <grpc++/server_builder.h>
 +#include "hashprobe.h"
 +#include "hashprobe.grpc.pb.h"
 +#include "tt.h"
 +
 +using grpc::Server;
 +using grpc::ServerBuilder;
 +using grpc::ServerContext;
 +using grpc::Status;
 +using grpc::StatusCode;
 +using namespace hashprobe;
 +
 +Status HashProbeImpl::Probe(ServerContext* context,
 +                            const HashProbeRequest* request,
 +                          HashProbeResponse *response) {
 +      Position pos;
 +      StateInfo st;
 +      pos.set(request->fen(), /*isChess960=*/false, &st, Threads.main());
 +      if (!pos.pos_is_ok()) {
 +              return Status(StatusCode::INVALID_ARGUMENT, "Invalid FEN");
 +      }
 +
 +      bool invert = (pos.side_to_move() == BLACK);
 +      StateListPtr setup_states = StateListPtr(new std::deque<StateInfo>(1));
 +
 +      ProbeMove(&pos, setup_states.get(), invert, response->mutable_root());
 +
 +      MoveList<LEGAL> moves(pos);
 +      for (const ExtMove* em = moves.begin(); em != moves.end(); ++em) {
 +              HashProbeLine *line = response->add_line();
 +              FillMove(&pos, em->move, line->mutable_move());
 +              setup_states->push_back(StateInfo());
 +              pos.do_move(em->move, setup_states->back());
 +              ProbeMove(&pos, setup_states.get(), !invert, line);
 +              pos.undo_move(em->move);
 +      }
 +
 +      return Status::OK;
 +}
 +
 +void HashProbeImpl::FillMove(Position *pos, Move move, HashProbeMove* decoded) {
 +      if (!is_ok(move)) return;
 +
 +      Square from = from_sq(move);
 +      Square to = to_sq(move);
 +
 +      if (type_of(move) == CASTLING) {
 +              to = make_square(to > from ? FILE_G : FILE_C, rank_of(from));
 +      }
 +
 +      Piece moved_piece = pos->moved_piece(move);
 +      std::string pretty;
 +      if (type_of(move) == CASTLING) {
 +              if (to > from) {
 +                      pretty = "O-O";
 +              } else {
 +                      pretty = "O-O-O";
 +              }
 +      } else if (type_of(moved_piece) == PAWN) {
 +              if (type_of(move) == ENPASSANT || pos->piece_on(to) != NO_PIECE) {
 +                      // Capture.
 +                      pretty = char('a' + file_of(from));
 +                      pretty += "x";
 +              }
 +              pretty += UCI::square(to);
 +              if (type_of(move) == PROMOTION) {
 +                      pretty += "=";
 +                      pretty += " PNBRQK"[promotion_type(move)];
 +              }
 +      } else {
 +              pretty = " PNBRQK"[type_of(moved_piece)];
 +              Bitboard attackers = pos->attackers_to(to) & pos->pieces(color_of(moved_piece), type_of(moved_piece));
 +              if (more_than_one(attackers)) {
 +                      // Remove all illegal moves to disambiguate.
 +                      Bitboard att_copy = attackers;
 +                      while (att_copy) {
 +                              Square s = pop_lsb(&att_copy);
 +                              Move m = make_move(s, to);
 +                              if (!pos->pseudo_legal(m) || !pos->legal(m)) {
 +                                      attackers &= ~SquareBB[s];
 +                              }
 +                      }
 +              }
 +              if (more_than_one(attackers)) {
 +                      // Disambiguate by file if possible.
 +                      Bitboard attackers_this_file = attackers & file_bb(file_of(from));
 +                      if (attackers != attackers_this_file) {
 +                              pretty += char('a' + file_of(from));
 +                              attackers = attackers_this_file;
 +                      }
 +                      if (more_than_one(attackers)) {
 +                              // Still ambiguous, so need to disambiguate by rank.
 +                              pretty += char('1' + rank_of(from));
 +                      }
 +              }
 +
 +              if (type_of(move) == ENPASSANT || pos->piece_on(to) != NO_PIECE) {
 +                      pretty += "x";
 +              }
 +
 +              pretty += UCI::square(to);
 +      }
 +
 +      if (pos->gives_check(move)) {
 +              // Check if mate.
 +              StateInfo si;
 +              pos->do_move(move, si, true);
 +              if (MoveList<LEGAL>(*pos).size() > 0) {
 +                      pretty += "+";
 +              } else {
 +                      pretty += "#";
 +              }
 +              pos->undo_move(move);
 +      }
 +
 +      decoded->set_pretty(pretty);
 +}
 +
 +void HashProbeImpl::ProbeMove(Position* pos, std::deque<StateInfo>* setup_states, bool invert, HashProbeLine* response) {
 +      bool found;
 +      TTEntry *entry = TT.probe(pos->key(), found);
 +      response->set_found(found);
 +      if (found) {
 +              Value value = entry->value();
 +              Value eval = entry->eval();
 +              Bound bound = entry->bound();
 +
 +              if (invert) {
 +                      value = -value;
 +                      eval = -eval;
 +                      if (bound == BOUND_UPPER) {
 +                              bound = BOUND_LOWER;
 +                      } else if (bound == BOUND_LOWER) {
 +                              bound = BOUND_UPPER;
 +                      }
 +              }
 +
 +              response->set_depth(entry->depth());
 +              FillValue(eval, response->mutable_eval());
 +              if (entry->depth() > DEPTH_NONE) {
 +                      FillValue(value, response->mutable_value());
 +              }
 +              response->set_bound(HashProbeLine::ValueBound(bound));
 +
 +              // Follow the PV until we hit an illegal move.
 +              std::stack<Move> pv;
 +              std::set<Key> seen;
 +              while (found && is_ok(entry->move()) &&
 +                     pos->pseudo_legal(entry->move()) &&
 +                     pos->legal(entry->move())) {
 +                      FillMove(pos, entry->move(), response->add_pv());
 +                      if (seen.count(pos->key())) break;
 +                      pv.push(entry->move());
 +                      seen.insert(pos->key());
 +                      setup_states->push_back(StateInfo());
 +                      pos->do_move(entry->move(), setup_states->back());
 +                      entry = TT.probe(pos->key(), found);
 +              }
 +
 +              // Unroll the PV back again, so the Position object remains unchanged.
 +              while (!pv.empty()) {
 +                      pos->undo_move(pv.top());
 +                      pv.pop();
 +              }
 +      }
 +}
 +
 +void HashProbeImpl::FillValue(Value value, HashProbeScore* score) {
 +      if (abs(value) < VALUE_MATE - MAX_PLY) {
 +              score->set_score_type(HashProbeScore::SCORE_CP);
 +              score->set_score_cp(value * 100 / PawnValueEg);
 +      } else {
 +              score->set_score_type(HashProbeScore::SCORE_MATE);
 +              score->set_score_mate((value > 0 ? VALUE_MATE - value + 1 : -VALUE_MATE - value) / 2);
 +      }
 +}
 +
 +HashProbeThread::HashProbeThread(const std::string &server_address) {
 +      builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
 +      builder.RegisterService(&service);
 +      server = std::move(builder.BuildAndStart());
 +      std::cout << "Server listening on " << server_address << std::endl;
 +      std::thread([this]{ server->Wait(); }).detach();
 +}
 +
 +void HashProbeThread::Shutdown() {
 +      server->Shutdown();
 +}
 +
  namespace PSQT {
    void init();
  }
@@@ -229,6 -35,7 +229,7 @@@ int main(int argc, char* argv[]) 
  
    std::cout << engine_info() << std::endl;
  
+   CommandLine::init(argc, argv);
    UCI::init(Options);
    Tune::init();
    PSQT::init();
diff --combined src/misc.cpp
index 4ff52f48288f0f50834809f5fb032835efb69111,3fbdea35d94345db5b82ee96eaaffdf01bb28b96..6a32e7cb53da329da60e789fbb90b07fe4a32c1a
@@@ -51,6 -51,11 +51,11 @@@ typedef bool(*fun3_t)(HANDLE, CONST GRO
  #include <sys/mman.h>
  #endif
  
+ #if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
+ #define POSIXALIGNEDALLOC
+ #include <stdlib.h>
+ #endif
  #include "misc.h"
  #include "thread.h"
  
@@@ -127,6 -132,7 +132,7 @@@ public
  
  } // namespace
  
  /// engine_info() returns the full name of the current Stockfish version. This
  /// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
  /// the program was compiled) or "Stockfish <Version>", depending on whether
@@@ -144,7 -150,6 +150,7 @@@ const string engine_info(bool to_uci) 
    {
        date >> month >> day >> year;
        ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2);
 +      ss << "-asn";
    }
  
    ss << (to_uci  ? "\nid author ": " by ")
@@@ -215,26 -220,33 +221,33 @@@ const std::string compiler_info() 
  
    compiler += "\nCompilation settings include: ";
    compiler += (Is64Bit ? " 64bit" : " 32bit");
+   #if defined(USE_VNNI)
+     compiler += " VNNI";
+   #endif
    #if defined(USE_AVX512)
      compiler += " AVX512";
    #endif
+   compiler += (HasPext ? " BMI2" : "");
    #if defined(USE_AVX2)
      compiler += " AVX2";
    #endif
-   #if defined(USE_SSE42)
-     compiler += " SSE42";
-   #endif
    #if defined(USE_SSE41)
      compiler += " SSE41";
    #endif
    #if defined(USE_SSSE3)
      compiler += " SSSE3";
    #endif
-   #if defined(USE_SSE3)
-     compiler += " SSE3";
+   #if defined(USE_SSE2)
+     compiler += " SSE2";
+   #endif
+   compiler += (HasPopCnt ? " POPCNT" : "");
+   #if defined(USE_MMX)
+     compiler += " MMX";
+   #endif
+   #if defined(USE_NEON)
+     compiler += " NEON";
    #endif
-     compiler += (HasPext ? " BMI2" : "");
-     compiler += (HasPopCnt ? " POPCNT" : "");
    #if !defined(NDEBUG)
      compiler += " DEBUG";
    #endif
@@@ -317,13 -329,16 +330,16 @@@ void prefetch(void* addr) 
  
  #endif
  
- /// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
- /// Memory allocated with std_aligned_alloc must be freed with std_aligned_free.
- ///
+ /// std_aligned_alloc() is our wrapper for systems where the c++17 implementation
+ /// does not guarantee the availability of aligned_alloc(). Memory allocated with
+ /// std_aligned_alloc() must be freed with std_aligned_free().
  
  void* std_aligned_alloc(size_t alignment, size_t size) {
- #if defined(__APPLE__)
-   return aligned_alloc(alignment, size);
+ #if defined(POSIXALIGNEDALLOC)
+   void *mem;
+   return posix_memalign(&mem, alignment, size) ? nullptr : mem;
  #elif defined(_WIN32)
    return _mm_malloc(size, alignment);
  #else
  }
  
  void std_aligned_free(void* ptr) {
- #if defined(__APPLE__)
+ #if defined(POSIXALIGNEDALLOC)
    free(ptr);
  #elif defined(_WIN32)
    _mm_free(ptr);
  #endif
  }
  
- /// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages.
+ /// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
  /// The returned pointer is the aligned one, while the mem argument is the one that needs
  /// to be passed to free. With c++17 some of this functionality could be simplified.
  
@@@ -353,7 -369,9 +370,9 @@@ void* aligned_ttmem_alloc(size_t allocS
    size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
    if (posix_memalign(&mem, alignment, size))
       mem = nullptr;
+ #if defined(MADV_HUGEPAGE)
    madvise(mem, allocSize, MADV_HUGEPAGE);
+ #endif
    return mem;
  }
  
@@@ -572,3 -590,61 +591,61 @@@ void bindThisThread(size_t idx) 
  #endif
  
  } // namespace WinProcGroup
+ #ifdef _WIN32
+ #include <direct.h>
+ #define GETCWD _getcwd
+ #else
+ #include <unistd.h>
+ #define GETCWD getcwd
+ #endif
+ namespace CommandLine {
+ string argv0;            // path+name of the executable binary, as given by argv[0]
+ string binaryDirectory;  // path of the executable directory
+ string workingDirectory; // path of the working directory
+ string pathSeparator;    // Separator for our current OS
+ void init(int argc, char* argv[]) {
+     (void)argc;
+     string separator;
+     // extract the path+name of the executable binary
+     argv0 = argv[0];
+ #ifdef _WIN32
+     pathSeparator = "\\";
+   #ifdef _MSC_VER
+     // Under windows argv[0] may not have the extension. Also _get_pgmptr() had
+     // issues in some windows 10 versions, so check returned values carefully.
+     char* pgmptr = nullptr;
+     if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
+         argv0 = pgmptr;
+   #endif
+ #else
+     pathSeparator = "/";
+ #endif
+     // extract the working directory
+     workingDirectory = "";
+     char buff[40000];
+     char* cwd = GETCWD(buff, 40000);
+     if (cwd)
+         workingDirectory = cwd;
+     // extract the binary directory path from argv0
+     binaryDirectory = argv0;
+     size_t pos = binaryDirectory.find_last_of("\\/");
+     if (pos == std::string::npos)
+         binaryDirectory = "." + pathSeparator;
+     else
+         binaryDirectory.resize(pos + 1);
+     // pattern replacement: "./" at the start of path is replaced by the working directory
+     if (binaryDirectory.find("." + pathSeparator) == 0)
+         binaryDirectory.replace(0, 1, workingDirectory);
+ }
+ } // namespace CommandLine
diff --combined src/position.cpp
index 7b01172f1cd0baf3bef24e93b74f9adc7a18b8bb,e6a760d2c7ac34fb1e3fc381ac7416ca8c25cc3a..2658c71a6e624eb2d431c357ed54b39623599abb
@@@ -198,9 -198,6 +198,6 @@@ Position& Position::set(const string& f
    std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
    st = si;
  
-   // Each piece on board gets a unique ID used to track the piece later
-   PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
    ss >> std::noskipws;
  
    // 1. Piece placement
        else if (token == '/')
            sq += 2 * SOUTH;
  
-       else if ((idx = PieceToChar.find(token)) != string::npos)
-       {
-           auto pc = Piece(idx);
-           put_piece(pc, sq);
-           if (Eval::useNNUE)
-           {
-               // Kings get a fixed ID, other pieces get ID in order of placement
-               piece_id =
-                 (idx == W_KING) ? PIECE_ID_WKING :
-                 (idx == B_KING) ? PIECE_ID_BKING :
-                 next_piece_id++;
-               evalList.put_piece(piece_id, sq, pc);
-           }
+       else if ((idx = PieceToChar.find(token)) != string::npos) {
+           put_piece(Piece(idx), sq);
            ++sq;
        }
    }
    thisThread = th;
    set_state(st);
  
 -  assert(pos_is_ok());
 -
    return *this;
  }
  
@@@ -718,9 -704,6 +702,6 @@@ void Position::do_move(Move m, StateInf
  
    // Used by NNUE
    st->accumulator.computed_accumulation = false;
-   st->accumulator.computed_score = false;
-   PieceId dp0 = PIECE_ID_NONE;
-   PieceId dp1 = PIECE_ID_NONE;
    auto& dp = st->dirtyPiece;
    dp.dirty_num = 1;
  
  
        if (Eval::useNNUE)
        {
-           dp.dirty_num = 2; // 2 pieces moved
-           dp1 = piece_id_on(capsq);
-           dp.pieceId[1] = dp1;
-           dp.old_piece[1] = evalList.piece_with_id(dp1);
-           evalList.put_piece(dp1, capsq, NO_PIECE);
-           dp.new_piece[1] = evalList.piece_with_id(dp1);
+           dp.dirty_num = 2;  // 1 piece moved, 1 piece captured
+           dp.piece[1] = captured;
+           dp.from[1] = capsq;
+           dp.to[1] = SQ_NONE;
        }
  
        // Update board and piece lists
    {
        if (Eval::useNNUE)
        {
-           dp0 = piece_id_on(from);
-           dp.pieceId[0] = dp0;
-           dp.old_piece[0] = evalList.piece_with_id(dp0);
-           evalList.put_piece(dp0, to, pc);
-           dp.new_piece[0] = evalList.piece_with_id(dp0);
+           dp.piece[0] = pc;
+           dp.from[0] = from;
+           dp.to[0] = to;
        }
  
        move_piece(from, to);
  
            if (Eval::useNNUE)
            {
-               dp0 = piece_id_on(to);
-               evalList.put_piece(dp0, to, promotion);
-               dp.new_piece[0] = evalList.piece_with_id(dp0);
+               // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
+               dp.to[0] = SQ_NONE;
+               dp.piece[dp.dirty_num] = promotion;
+               dp.from[dp.dirty_num] = SQ_NONE;
+               dp.to[dp.dirty_num] = to;
+               dp.dirty_num++;
            }
  
            // Update hash keys
@@@ -948,12 -930,6 +928,6 @@@ void Position::undo_move(Move m) 
    {
        move_piece(to, from); // Put the piece back at the source square
  
-       if (Eval::useNNUE)
-       {
-           PieceId dp0 = st->dirtyPiece.pieceId[0];
-           evalList.put_piece(dp0, from, pc);
-       }
        if (st->capturedPiece)
        {
            Square capsq = to;
            }
  
            put_piece(st->capturedPiece, capsq); // Restore the captured piece
-           if (Eval::useNNUE)
-           {
-               PieceId dp1 = st->dirtyPiece.pieceId[1];
-               assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE);
-               assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE);
-               evalList.put_piece(dp1, capsq, st->capturedPiece);
-           }
        }
    }
  
@@@ -999,32 -967,16 +965,16 @@@ void Position::do_castling(Color us, Sq
    rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
    to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
  
-   if (Eval::useNNUE)
+   if (Do && Eval::useNNUE)
    {
-       PieceId dp0, dp1;
        auto& dp = st->dirtyPiece;
-       dp.dirty_num = 2; // 2 pieces moved
-       if (Do)
-       {
-           dp0 = piece_id_on(from);
-           dp1 = piece_id_on(rfrom);
-           dp.pieceId[0] = dp0;
-           dp.old_piece[0] = evalList.piece_with_id(dp0);
-           evalList.put_piece(dp0, to, make_piece(us, KING));
-           dp.new_piece[0] = evalList.piece_with_id(dp0);
-           dp.pieceId[1] = dp1;
-           dp.old_piece[1] = evalList.piece_with_id(dp1);
-           evalList.put_piece(dp1, rto, make_piece(us, ROOK));
-           dp.new_piece[1] = evalList.piece_with_id(dp1);
-       }
-       else
-       {
-           dp0 = piece_id_on(to);
-           dp1 = piece_id_on(rto);
-           evalList.put_piece(dp0, from, make_piece(us, KING));
-           evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
-       }
+       dp.piece[0] = make_piece(us, KING);
+       dp.from[0] = from;
+       dp.to[0] = to;
+       dp.piece[1] = make_piece(us, ROOK);
+       dp.from[1] = rfrom;
+       dp.to[1] = rto;
+       dp.dirty_num = 2;
    }
  
    // Remove both pieces first since squares could overlap in Chess960
@@@ -1047,7 -999,6 +997,6 @@@ void Position::do_null_move(StateInfo& 
    if (Eval::useNNUE)
    {
        std::memcpy(&newSt, st, sizeof(StateInfo));
-       st->accumulator.computed_score = false;
    }
    else
        std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
@@@ -1143,8 -1094,8 +1092,8 @@@ bool Position::see_ge(Move m, Value thr
  
        // Don't allow pinned pieces to attack (except the king) as long as
        // there are pinners on their original square.
-       if (st->pinners[~stm] & occupied)
-           stmAttackers &= ~st->blockersForKing[stm];
+       if (pinners(~stm) & occupied)
+           stmAttackers &= ~blockers_for_king(stm);
  
        if (!stmAttackers)
            break;
diff --combined src/syzygy/tbprobe.cpp
index 61ba8121959ae61b0b8de296b1022730f424386d,4d682f1a90bc2a38f0e0c444a8e45b833afcb781..28b70a4a10714e825be6c23f893dc570daf508da
@@@ -74,7 -74,7 +74,7 @@@ int MapB1H1H7[SQUARE_NB]
  int MapA1D1D4[SQUARE_NB];
  int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB]
  
 -int Binomial[6][SQUARE_NB];    // [k][n] k elements from a set of n elements
 +int Binomial[7][SQUARE_NB];    // [k][n] k elements from a set of n elements
  int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB]
  int LeadPawnsSize[6][4];       // [leadPawnsCnt][FILE_A..FILE_D]
  
@@@ -223,7 -223,9 +223,9 @@@ public
  
          *mapping = statbuf.st_size;
          *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ #if defined(MADV_RANDOM)
          madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
+ #endif
          ::close(fd);
  
          if (*baseAddress == MAP_FAILED)
@@@ -758,7 -760,7 +760,7 @@@ Ret do_probe_table(const Position& pos
      if (entry->hasPawns) {
          idx = LeadPawnIdx[leadPawnsCnt][squares[0]];
  
-         std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
+         std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
  
          for (int i = 1; i < leadPawnsCnt; ++i)
              idx += Binomial[i][MapPawns[squares[i]]];
@@@ -859,7 -861,7 +861,7 @@@ encode_remaining
  
      while (d->groupLen[++next])
      {
-         std::sort(groupSq, groupSq + d->groupLen[next]);
+         std::stable_sort(groupSq, groupSq + d->groupLen[next]);
          uint64_t n = 0;
  
          // Map down a square if "comes later" than a square in the previous
@@@ -1319,7 -1321,7 +1321,7 @@@ void Tablebases::init(const std::string
      Binomial[0][0] = 1;
  
      for (int n = 1; n < 64; n++) // Squares
 -        for (int k = 0; k < 6 && k <= n; ++k) // Pieces
 +        for (int k = 0; k < 7 && k <= n; ++k) // Pieces
              Binomial[k][n] =  (k > 0 ? Binomial[k - 1][n - 1] : 0)
                              + (k < n ? Binomial[k    ][n - 1] : 0);
  
diff --combined src/ucioption.cpp
index 79a2385a1319b15c62824b2ca6175e1801f73676,5e747a7f13fdda58c953209c19c1b9be7470e8eb..df6283cbc00dfadb75ddccf0912c9161342705f7
  #include <ostream>
  #include <sstream>
  
+ #include "evaluate.h"
  #include "misc.h"
  #include "search.h"
  #include "thread.h"
  #include "tt.h"
  #include "uci.h"
 +#include "hashprobe.h"
  #include "syzygy/tbprobe.h"
  
  using std::string;
  
  UCI::OptionsMap Options; // Global object
 +std::unique_ptr<HashProbeThread> hash_probe_thread;
  
  namespace UCI {
  
@@@ -44,13 -43,6 +45,13 @@@ void on_threads(const Option& o) { Thre
  void on_tb_path(const Option& o) { Tablebases::init(o); }
  void on_use_NNUE(const Option& ) { Eval::init_NNUE(); }
  void on_eval_file(const Option& ) { Eval::init_NNUE(); }
 +void on_rpc_server_address(const Option& o) {
 +      if (hash_probe_thread) {
 +              hash_probe_thread->Shutdown();
 +      }
 +      std::string addr = o;
 +      hash_probe_thread.reset(new HashProbeThread(addr));
 +}
  
  /// Our case insensitive less() function as required by UCI protocol
  bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {
@@@ -87,9 -79,8 +88,9 @@@ void init(OptionsMap& o) 
    o["SyzygyProbeDepth"]      << Option(1, 1, 100);
    o["Syzygy50MoveRule"]      << Option(true);
    o["SyzygyProbeLimit"]      << Option(7, 0, 7);
-   o["Use NNUE"]              << Option(false, on_use_NNUE);
-   o["EvalFile"]              << Option("nn-9931db908a9b.nnue", on_eval_file);
+   o["Use NNUE"]              << Option(true, on_use_NNUE);
+   o["EvalFile"]              << Option(EvalFileDefaultName, on_eval_file);
 +  o["RPCServerAddress"]      << Option("<empty>", on_rpc_server_address);
  }