From: Steinar H. Gunderson Date: Thu, 2 Dec 2021 19:01:42 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=17f8b7e18673fc7b542c6e40a896cd1fe1a25248;hp=-c Merge remote-tracking branch 'upstream/master' --- 17f8b7e18673fc7b542c6e40a896cd1fe1a25248 diff --combined src/Makefile index cf4f4ecf,5c52661b..505fb2cf --- a/src/Makefile +++ b/src/Makefile @@@ -41,12 -41,9 +41,12 @@@ endi SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ - nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp + nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp \ + hashprobe.grpc.pb.cc hashprobe.pb.cc +CLISRCS = client.cpp hashprobe.grpc.pb.cc hashprobe.pb.cc uci.cpp OBJS = $(notdir $(SRCS:.cpp=.o)) +CLIOBJS = $(notdir $(CLISRCS:.cpp=.o)) VPATH = syzygy:nnue:nnue/features @@@ -91,7 -88,7 +91,7 @@@ endi # at the end of the line for flag values. # # Example of use for these flags: - # make build ARCH=x86-64-avx512 debug=on sanitize="address undefined" + # make build ARCH=x86-64-avx512 debug=yes sanitize="address undefined" ### 2.1. General and architecture defaults @@@ -329,7 -326,7 +329,7 @@@ endi ifeq ($(COMP),gcc) comp=gcc CXX=g++ - CXXFLAGS += -pedantic -Wextra -Wshadow + CXXFLAGS += -pedantic -Wextra ifeq ($(arch),$(filter $(arch),armv7 armv8)) ifeq ($(OS),Android) @@@ -496,7 -493,7 +496,7 @@@ endi ### 3.3 Optimization ifeq ($(optimize),yes) - CXXFLAGS += -O3 + CXXFLAGS += -O3 -g ifeq ($(comp),gcc) ifeq ($(OS), Android) @@@ -520,7 -517,7 +520,7 @@@ ifeq ($(bits),64 CXXFLAGS += -DIS_64BIT endif - ### 3.5 prefetch + ### 3.5 prefetch and popcount ifeq ($(prefetch),yes) ifeq ($(sse),yes) CXXFLAGS += -msse @@@ -529,7 -526,6 +529,6 @@@ els CXXFLAGS += -DNO_PREFETCH endif - ### 3.6 popcnt ifeq ($(popcnt),yes) ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT @@@ -540,6 -536,7 +539,7 @@@ endif endif + ### 3.6 SIMD architectures ifeq ($(avx2),yes) CXXFLAGS += -DUSE_AVX2 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) @@@ -754,7 -751,7 +754,7 @@@ profile-build: net config-sanity objcle $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) @echo "" @echo "Step 2/4. Running benchmark for pgo-build ..." - $(PGOBENCH) > /dev/null + $(PGOBENCH) 2>&1 | tail -n 4 @echo "" @echo "Step 3/4. Building optimized executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean @@@ -819,7 -816,7 +819,7 @@@ default ### Section 5. Private Targets ### ========================================================================== -all: $(EXE) .depend +all: $(EXE) client .depend config-sanity: net @echo "" @@@ -915,32 -912,6 +915,32 @@@ icc-profile-use EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \ all +### GRPC + +PROTOS_PATH = . +PROTOC = protoc +GRPC_CPP_PLUGIN = grpc_cpp_plugin +GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)` + +%.grpc.pb.h %.grpc.pb.cc: %.proto + $(PROTOC) -I $(PROTOS_PATH) --grpc_out=. --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $< + +# oh my +%.cpp: %.cc + cp $< $@ + +%.pb.h %.pb.cc: %.proto + $(PROTOC) -I $(PROTOS_PATH) --cpp_out=. $< + +#LDFLAGS += -Wl,-Bstatic -Wl,-\( -lprotobuf -lgrpc++_unsecure -lgrpc_unsecure -lgrpc -lz -Wl,-\) -Wl,-Bdynamic -ldl +LDFLAGS += /usr/lib/x86_64-linux-gnu/libprotobuf.a /usr/lib/x86_64-linux-gnu/libgrpc++_unsecure.a /usr/lib/x86_64-linux-gnu/libgrpc_unsecure.a /usr/lib/x86_64-linux-gnu/libgrpc.a /usr/lib/x86_64-linux-gnu/libcares.a /usr/lib/x86_64-linux-gnu/libgpr.a /usr/lib/x86_64-linux-gnu/libabsl_str_format_internal.a /usr/lib/x86_64-linux-gnu/libabsl_strings.a /usr/lib/x86_64-linux-gnu/libabsl_flags_marshalling.a /usr/lib/x86_64-linux-gnu/libabsl_throw_delegate.a /usr/lib/x86_64-linux-gnu/libabsl_raw_logging_internal.a /usr/lib/x86_64-linux-gnu/libabsl_base.a /usr/lib/x86_64-linux-gnu/libabsl_int128.a /usr/lib/x86_64-linux-gnu/libabsl_bad_optional_access.a -ldl -lz +#LDFLAGS += /usr/lib/x86_64-linux-gnu/libprotobuf.a /usr/lib/libgrpc++_unsecure.a /usr/lib/libgrpc_unsecure.a /usr/lib/libgrpc.a /usr/lib/x86_64-linux-gnu/libcares.a -ldl -lz + +client: $(CLIOBJS) + $(CXX) -o $@ $(CLIOBJS) $(LDFLAGS) + +# Other stuff + .depend: $(SRCS) -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null diff --combined src/misc.cpp index 4cac7e98,294b7c8f..769761d9 --- a/src/misc.cpp +++ b/src/misc.cpp @@@ -36,6 -36,8 +36,8 @@@ typedef bool(*fun1_t)(LOGICAL_PROCESSOR PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); + typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); + typedef WORD(*fun5_t)(); } #endif @@@ -153,7 -155,6 +155,7 @@@ string engine_info(bool to_uci) { date >> month >> day >> year; ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2); + ss << "-asn"; } ss << (to_uci ? "\nid author ": " by ") @@@ -496,11 -497,11 +498,11 @@@ void bindThisThread(size_t) { #else - /// best_group() retrieves logical processor information using Windows specific - /// API and returns the best group id for the thread with index idx. Original + /// best_node() retrieves logical processor information using Windows specific + /// API and returns the best node id for the thread with index idx. Original /// code from Texel by Peter Österlund. - int best_group(size_t idx) { + int best_node(size_t idx) { int threads = 0; int nodes = 0; @@@ -514,7 -515,8 +516,8 @@@ if (!fun1) return -1; - // First call to get returnLength. We expect it to fail due to null buffer + // First call to GetLogicalProcessorInformationEx() to get returnLength. + // We expect the call to fail due to null buffer. if (fun1(RelationAll, nullptr, &returnLength)) return -1; @@@ -522,7 -524,7 +525,7 @@@ SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); - // Second call, now we expect to succeed + // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed if (!fun1(RelationAll, buffer, &returnLength)) { free(buffer); @@@ -572,22 -574,38 +575,38 @@@ void bindThisThread(size_t idx) { // Use only local variables to be thread-safe - int group = best_group(idx); + int node = best_node(idx); - if (group == -1) + if (node == -1) return; // Early exit if the needed API are not available at runtime HMODULE k32 = GetModuleHandle("Kernel32.dll"); auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); + auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); + auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount"); if (!fun2 || !fun3) return; - GROUP_AFFINITY affinity; - if (fun2(group, &affinity)) - fun3(GetCurrentThread(), &affinity, nullptr); + if (!fun4 || !fun5) + { + GROUP_AFFINITY affinity; + if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx + fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity + } + else + { + // If a numa node has more than one processor group, we assume they are + // sized equal and we spread threads evenly across the groups. + USHORT elements, returnedElements; + elements = fun5(); // GetMaximumProcessorGroupCount + GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY)); + if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 + fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity + free(affinity); + } } #endif diff --combined src/position.cpp index 97581e12,ae1da017..a4dab37d --- a/src/position.cpp +++ b/src/position.cpp @@@ -283,6 -283,8 +283,6 @@@ Position& Position::set(const string& f thisThread = th; set_state(st); - assert(pos_is_ok()); - return *this; } @@@ -1011,9 -1013,9 +1011,9 @@@ void Position::do_null_move(StateInfo& } st->key ^= Zobrist::side; + ++st->rule50; prefetch(TT.first_entry(key())); - ++st->rule50; st->pliesFromNull = 0; sideToMove = ~sideToMove;