SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
- nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp
+ nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp \
+ hashprobe.grpc.pb.cc hashprobe.pb.cc
+CLISRCS = client.cpp hashprobe.grpc.pb.cc hashprobe.pb.cc uci.cpp
OBJS = $(notdir $(SRCS:.cpp=.o))
+CLIOBJS = $(notdir $(CLISRCS:.cpp=.o))
VPATH = syzygy:nnue:nnue/features
# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3
# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1
# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2
+# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256
# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
# at the end of the line for flag values.
#
# Example of use for these flags:
-# make build ARCH=x86-64-avx512 debug=on sanitize="address undefined"
+# make build ARCH=x86-64-avx512 debug=yes sanitize="address undefined"
### 2.1. General and architecture defaults
# explicitly check for the list of supported architectures (as listed with make help),
# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
ifeq ($(ARCH), $(filter $(ARCH), \
- x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
- x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
+ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \
+ x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
SUPPORTED_ARCH=true
ssse3 = no
sse41 = no
avx2 = no
+avxvnni = no
avx512 = no
vnni256 = no
vnni512 = no
neon = no
+arm_version = 0
STRIP = strip
### 2.2 Architecture specific
avx2 = yes
endif
+ifeq ($(findstring -avxvnni,$(ARCH)),-avxvnni)
+ popcnt = yes
+ sse = yes
+ sse2 = yes
+ ssse3 = yes
+ sse41 = yes
+ avx2 = yes
+ avxvnni = yes
+ pext = yes
+endif
+
ifeq ($(findstring -bmi2,$(ARCH)),-bmi2)
popcnt = yes
sse = yes
arch = armv7
prefetch = yes
bits = 32
+ arm_version = 7
endif
ifeq ($(ARCH),armv7-neon)
popcnt = yes
neon = yes
bits = 32
+ arm_version = 7
endif
ifeq ($(ARCH),armv8)
prefetch = yes
popcnt = yes
neon = yes
+ arm_version = 8
endif
ifeq ($(ARCH),apple-silicon)
prefetch = yes
popcnt = yes
neon = yes
+ arm_version = 8
endif
ifeq ($(ARCH),ppc-32)
ifeq ($(COMP),gcc)
comp=gcc
CXX=g++
- CXXFLAGS += -pedantic -Wextra -Wshadow
+ CXXFLAGS += -pedantic -Wextra
ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android)
CXX=g++
endif
- CXXFLAGS += -Wextra -Wshadow
+ CXXFLAGS += -pedantic -Wextra -Wshadow
LDFLAGS += -static
endif
endif
ifeq ($(KERNEL),Darwin)
- CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
- LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+ CXXFLAGS += -mmacosx-version-min=10.14
+ LDFLAGS += -mmacosx-version-min=10.14
+ ifneq ($(arch),any)
+ CXXFLAGS += -arch $(arch)
+ LDFLAGS += -arch $(arch)
+ endif
XCRUN = xcrun
endif
else
profile_make = gcc-profile-make
profile_use = gcc-profile-use
+ ifeq ($(KERNEL),Darwin)
+ EXTRAPROFILEFLAGS = -fvisibility=hidden
+ endif
endif
### Travis CI script uses COMPILER to overwrite CXX
### 3.3 Optimization
ifeq ($(optimize),yes)
- CXXFLAGS += -O3
+ CXXFLAGS += -O3 -g
ifeq ($(comp),gcc)
ifeq ($(OS), Android)
endif
endif
- ifeq ($(comp),$(filter $(comp),gcc clang icc))
- ifeq ($(KERNEL),Darwin)
- CXXFLAGS += -mdynamic-no-pic
- endif
- endif
+ ifeq ($(KERNEL),Darwin)
+ ifeq ($(comp),$(filter $(comp),clang icc))
+ CXXFLAGS += -mdynamic-no-pic
+ endif
+
+ ifeq ($(comp),gcc)
+ ifneq ($(arch),arm64)
+ CXXFLAGS += -mdynamic-no-pic
+ endif
+ endif
+ endif
ifeq ($(comp),clang)
CXXFLAGS += -fexperimental-new-pass-manager
CXXFLAGS += -DIS_64BIT
endif
-### 3.5 prefetch
+### 3.5 prefetch and popcount
ifeq ($(prefetch),yes)
ifeq ($(sse),yes)
CXXFLAGS += -msse
CXXFLAGS += -DNO_PREFETCH
endif
-### 3.6 popcnt
ifeq ($(popcnt),yes)
ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64))
CXXFLAGS += -DUSE_POPCNT
endif
endif
+### 3.6 SIMD architectures
ifeq ($(avx2),yes)
CXXFLAGS += -DUSE_AVX2
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
endif
endif
+ifeq ($(avxvnni),yes)
+ CXXFLAGS += -DUSE_VNNI -DUSE_AVXVNNI
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mavxvnni
+ endif
+endif
+
ifeq ($(avx512),yes)
CXXFLAGS += -DUSE_AVX512
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
endif
ifeq ($(neon),yes)
- CXXFLAGS += -DUSE_NEON
+ CXXFLAGS += -DUSE_NEON=$(arm_version)
ifeq ($(KERNEL),Linux)
ifneq ($(COMP),ndk)
ifneq ($(arch),armv8)
@echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide"
@echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide"
@echo "x86-64-avx512 > x86 64-bit with avx512 support"
+ @echo "x86-64-avxvnni > x86 64-bit with avxvnni support"
@echo "x86-64-bmi2 > x86 64-bit with bmi2 support"
@echo "x86-64-avx2 > x86 64-bit with avx2 support"
@echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support"
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
@echo ""
@echo "Step 2/4. Running benchmark for pgo-build ..."
- $(PGOBENCH) > /dev/null
+ $(PGOBENCH) 2>&1 | tail -n 4
@echo ""
@echo "Step 3/4. Building optimized executable ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
### Section 5. Private Targets
### ==========================================================================
-all: $(EXE) .depend
+all: $(EXE) client .depend
config-sanity: net
@echo ""
@echo "ssse3: '$(ssse3)'"
@echo "sse41: '$(sse41)'"
@echo "avx2: '$(avx2)'"
+ @echo "avxvnni: '$(avxvnni)'"
@echo "avx512: '$(avx512)'"
@echo "vnni256: '$(vnni256)'"
@echo "vnni512: '$(vnni512)'"
@echo "neon: '$(neon)'"
+ @echo "arm_version: '$(arm_version)'"
@echo ""
@echo "Flags:"
@echo "CXX: $(CXX)"
@mkdir -p profdir
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-generate=profdir' \
+ EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \
EXTRALDFLAGS='-lgcov' \
all
gcc-profile-use:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-use=profdir -fno-peel-loops -fno-tracer' \
+ EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \
EXTRALDFLAGS='-lgcov' \
all
EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
all
-.depend:
+### GRPC
+
+PROTOS_PATH = .
+PROTOC = protoc
+GRPC_CPP_PLUGIN = grpc_cpp_plugin
+GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)`
+
+%.grpc.pb.h %.grpc.pb.cc: %.proto
+ $(PROTOC) -I $(PROTOS_PATH) --grpc_out=. --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $<
+
+# oh my
+%.cpp: %.cc
+ cp $< $@
+
+%.pb.h %.pb.cc: %.proto
+ $(PROTOC) -I $(PROTOS_PATH) --cpp_out=. $<
+
+#LDFLAGS += -Wl,-Bstatic -Wl,-\( -lprotobuf -lgrpc++_unsecure -lgrpc_unsecure -lgrpc -lz -Wl,-\) -Wl,-Bdynamic -ldl
+LDFLAGS += /usr/lib/x86_64-linux-gnu/libprotobuf.a /usr/lib/x86_64-linux-gnu/libgrpc++_unsecure.a /usr/lib/x86_64-linux-gnu/libgrpc_unsecure.a /usr/lib/x86_64-linux-gnu/libgrpc.a /usr/lib/x86_64-linux-gnu/libcares.a /usr/lib/x86_64-linux-gnu/libgpr.a /usr/lib/x86_64-linux-gnu/libabsl_str_format_internal.a /usr/lib/x86_64-linux-gnu/libabsl_strings.a /usr/lib/x86_64-linux-gnu/libabsl_flags_marshalling.a /usr/lib/x86_64-linux-gnu/libabsl_throw_delegate.a /usr/lib/x86_64-linux-gnu/libabsl_raw_logging_internal.a /usr/lib/x86_64-linux-gnu/libabsl_base.a /usr/lib/x86_64-linux-gnu/libabsl_int128.a /usr/lib/x86_64-linux-gnu/libabsl_bad_optional_access.a -ldl -lz
+#LDFLAGS += /usr/lib/x86_64-linux-gnu/libprotobuf.a /usr/lib/libgrpc++_unsecure.a /usr/lib/libgrpc_unsecure.a /usr/lib/libgrpc.a /usr/lib/x86_64-linux-gnu/libcares.a -ldl -lz
+
+client: $(CLIOBJS)
+ $(CXX) -o $@ $(CLIOBJS) $(LDFLAGS)
+
+# Other stuff
+
+.depend: $(SRCS)
-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
-include .depend