- cd src
script:
+ # Download net
+ - make net
+
# Obtain bench reference from git log
- git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
- export benchref=$(cat git_sig)
- echo "Reference bench:" $benchref
- #
# Compiler version string
- $COMPILER -v
- #
+ # test help target
+ - make help
+
# Verify bench number against various builds
- export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
- - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref
+ - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref
+ - export CXXFLAGS="-Werror"
+ - make clean && make -j2 ARCH=x86-64-modern build && ../tests/signature.sh $benchref
+ - make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref
+ - make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref
+ - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref
+ - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
+ - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi
+ - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
+ - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi
+ # workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu
+ - if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi
+
+ # compile only for some more advanced architectures (might not run in travis)
+ - make clean && make -j2 ARCH=x86-64-avx2 build
+ - make clean && make -j2 ARCH=x86-64-bmi2 build
+ - make clean && make -j2 ARCH=x86-64-avx512 build
+ - make clean && make -j2 ARCH=x86-64-vnni512 build
+ - make clean && make -j2 ARCH=x86-64-vnni256 build
#
# Check perft and reproducible search
- - export CXXFLAGS="-Werror"
- - make clean && make -j2 ARCH=x86-64 build
+ - make clean && make -j2 ARCH=x86-64-modern build
- ../tests/perft.sh
- ../tests/reprosearch.sh
# Valgrind
#
- export CXXFLAGS="-O1 -fno-inline"
- - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
+ - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
- if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
#
# Sanitizer
#
- - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
- - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
+ - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
+ - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
candirufish
Chess13234
Chris Cain (ceebo)
+Dale Weiler (graphitemaster)
Dan Schmidt (dfannius)
Daniel Axtens (daxtens)
Daniel Dugovic (ddugovic)
-Dariusz Orzechowski
+Dariusz Orzechowski (dorzechowski)
David Zar
Daylen Yang (daylen)
DiscanX
Linmiao Xu (linrock)
Fabian Beuke (madnight)
Fabian Fichter (ianfab)
+Fanael Linithien (Fanael)
fanon
Fauzi Akram Dabat (FauziAkram)
Felix Wittmann
gamander
Gary Heckman (gheckman)
+George Sobala (gsobala)
gguliash
Gian-Carlo Pascutto (gcp)
Gontran Lemaire (gonlem)
Jean-Francois Romang (jromang)
Jekaa
Jerry Donald Watson (jerrydonaldwatson)
+jjoshua2
Jonathan Calovski (Mysseno)
Jonathan Dumale (SFisGOD)
Joost VandeVondele (vondele)
Nikolay Kostov (NikolayIT)
Nguyen Pham (nguyenpham)
Norman Schmidt (FireFather)
+notruck
Ondrej Mosnáček (WOnder93)
Oskar Werkelin Ahlin
Pablo Vazquez
[![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master)
[Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine
-derived from Glaurung 2.1. It features two evaluation functions, the classical
-evaluation based on handcrafted terms, and the NNUE evaluation based on
-efficiently updateable neural networks. The classical evaluation runs efficiently
-on most 64bit CPU architectures, while the NNUE evaluation benefits strongly from the
-vector intrinsics available on modern CPUs (avx2 or similar).
+derived from Glaurung 2.1. Stockfish is not a complete chess program and requires a
+UCI-compatible graphical user interface (GUI) (e.g. XBoard with PolyGlot, Scid,
+Cute Chess, eboard, Arena, Sigma Chess, Shredder, Chess Partner or Fritz) in order
+to be used comfortably. Read the documentation for your GUI of choice for information
+about how to use Stockfish with it.
-Stockfish is not a complete chess program and requires a
-UCI-compatible GUI (e.g. XBoard with PolyGlot, Scid, Cute Chess, eboard, Arena,
-Sigma Chess, Shredder, Chess Partner or Fritz) in order to be used comfortably.
-Read the documentation for your GUI of choice for information about how to use
-Stockfish with it.
+The Stockfish engine features two evaluation functions for chess, the classical
+evaluation based on handcrafted terms, and the NNUE evaluation based on efficiently
+updateable neural networks. The classical evaluation runs efficiently on almost all
+CPU architectures, while the NNUE evaluation benefits from the vector
+intrinsics available on most CPUs (sse2, avx2, neon, or similar).
## Files
* src, a subdirectory containing the full source code, including a Makefile
that can be used to compile Stockfish on Unix-like systems.
-To use the NNUE evaluation an additional data file with neural network parameters
-needs to be downloaded. The filename for the default set can be found as the default
-value of the `EvalFile` UCI option, with the format
-`nn-[SHA256 first 12 digits].nnue` (e.g. nn-c157e0a5755b.nnue). This file can be downloaded from
+ * a file with the .nnue extension, storing the neural network for the NNUE
+ evaluation. Binary distributions will have this file embedded.
+
+Note: to use the NNUE evaluation, the additional data file with neural network parameters
+needs to be available. Normally, this file is already embedded in the binary or it can be downloaded.
+The filename for the default (recommended) net can be found as the default
+value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue`
+(for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from
```
https://tests.stockfishchess.org/api/nn/[filename]
```
* #### Use NNUE
Toggle between the NNUE and classical evaluation functions. If set to "true",
- the network parameters must be availabe to load from file (see also EvalFile).
+ the network parameters must be available to load from file (see also EvalFile),
+ if they are not embedded in the binary.
* #### EvalFile
The name of the file of the NNUE evaluation parameters. Depending on the GUI the
- filename should include the full path to the folder/directory that contains the file.
-
- * #### Contempt
- A positive value for contempt favors middle game positions and avoids draws,
- effective for the classical evaluation only.
-
- * #### Analysis Contempt
- By default, contempt is set to prefer the side to move. Set this option to "White"
- or "Black" to analyse with contempt for that side, or "Off" to disable contempt.
+ filename might have to include the full path to the folder/directory that contains the file.
+ Other locations, such as the directory that contains the binary and the working directory,
+ are also searched.
* #### UCI_AnalyseMode
An option handled by your GUI.
Limit Syzygy tablebase probing to positions with at most this many pieces left
(including kings and pawns).
+ * #### Contempt
+ A positive value for contempt favors middle game positions and avoids draws,
+ effective for the classical evaluation only.
+
+ * #### Analysis Contempt
+ By default, contempt is set to prefer the side to move. Set this option to "White"
+ or "Black" to analyse with contempt for that side, or "Off" to disable contempt.
+
* #### Move Overhead
Assume a time delay of x ms due to network and GUI overheads. This is useful to
avoid losses on time in those cases.
* #### Debug Log File
Write all communication to and from the engine into a text file.
-## classical and NNUE evaluation
+## A note on classical and NNUE evaluation
Both approaches assign a value to a position that is used in alpha-beta (PVS) search
to find the best move. The classical evaluation computes this value as a function
cd src
make help
make build ARCH=x86-64-modern
+ make net
```
When not using the Makefile to compile (for instance with Microsoft MSVC) you
be found by typing the following commands in a console:
```
- ./stockfish
- compiler
+ ./stockfish compiler
```
## Understanding the code base and participating in the project
-Contributors with >10,000 CPU hours as of January 7, 2020
+Contributors with >10,000 CPU hours as of Sept 2, 2020
Thank you!
Username CPU Hours Games played
--------------------------------------------------
-noobpwnftw 9305707 695548021
-mlang 780050 61648867
-dew 621626 43921547
-mibere 524702 42238645
-crunchy 354587 27344275
-cw 354495 27274181
-fastgm 332801 22804359
-JojoM 295750 20437451
-CSU_Dynasty 262015 21828122
-Fisherman 232181 18939229
-ctoks 218866 17622052
-glinscott 201989 13780820
-tvijlbrief 201204 15337115
-velislav 188630 14348485
-gvreuls 187164 15149976
-bking_US 180289 11876016
-nordlandia 172076 13467830
-leszek 157152 11443978
-Thanar 148021 12365359
-spams 141975 10319326
-drabel 138073 11121749
-vdv 137850 9394330
-mgrabiak 133578 10454324
-TueRens 132485 10878471
-bcross 129683 11557084
-marrco 126078 9356740
-sqrt2 125830 9724586
-robal 122873 9593418
-vdbergh 120766 8926915
-malala 115926 8002293
-CoffeeOne 114241 5004100
-dsmith 113189 7570238
-BrunoBanani 104644 7436849
-Data 92328 8220352
-mhoram 89333 6695109
-davar 87924 7009424
-xoto 81094 6869316
-ElbertoOne 80899 7023771
-grandphish2 78067 6160199
-brabos 77212 6186135
-psk 75733 5984901
-BRAVONE 73875 5054681
-sunu 70771 5597972
-sterni1971 70605 5590573
-MaZePallas 66886 5188978
-Vizvezdenec 63708 4967313
-nssy 63462 5259388
-jromang 61634 4940891
-teddybaer 61231 5407666
-Pking_cda 60099 5293873
-solarlight 57469 5028306
-dv8silencer 56913 3883992
-tinker 54936 4086118
-renouve 49732 3501516
-Freja 49543 3733019
-robnjr 46972 4053117
-rap 46563 3219146
-Bobo1239 46036 3817196
-ttruscott 45304 3649765
-racerschmacer 44881 3975413
-finfish 44764 3370515
-eva42 41783 3599691
-biffhero 40263 3111352
-bigpen0r 39817 3291647
-mhunt 38871 2691355
-ronaldjerum 38820 3240695
-Antihistamine 38785 2761312
-pb00067 38038 3086320
-speedycpu 37591 3003273
-rkl 37207 3289580
-VoyagerOne 37050 3441673
-jbwiebe 35320 2805433
-cuistot 34191 2146279
-homyur 33927 2850481
-manap 32873 2327384
-gri 32538 2515779
-oryx 31267 2899051
-EthanOConnor 30959 2090311
-SC 30832 2730764
-csnodgrass 29505 2688994
-jmdana 29458 2205261
-strelock 28219 2067805
-jkiiski 27832 1904470
-Pyafue 27533 1902349
-Garf 27515 2747562
-eastorwest 27421 2317535
-slakovv 26903 2021889
-Prcuvu 24835 2170122
-anst 24714 2190091
-hyperbolic.tom 24319 2017394
-Patrick_G 23687 1801617
-Sharaf_DG 22896 1786697
-nabildanial 22195 1519409
-chriswk 21931 1868317
-achambord 21665 1767323
-Zirie 20887 1472937
-team-oh 20217 1636708
-Isidor 20096 1680691
-ncfish1 19931 1520927
-nesoneg 19875 1463031
-Spprtr 19853 1548165
-JanErik 19849 1703875
-agg177 19478 1395014
-SFTUser 19231 1567999
-xor12 19017 1680165
-sg4032 18431 1641865
-rstoesser 18118 1293588
-MazeOfGalious 17917 1629593
-j3corre 17743 941444
-cisco2015 17725 1690126
-ianh2105 17706 1632562
-dex 17678 1467203
-jundery 17194 1115855
-iisiraider 17019 1101015
-horst.prack 17012 1465656
-Adrian.Schmidt123 16563 1281436
-purplefishies 16342 1092533
-wei 16274 1745989
-ville 16144 1384026
-eudhan 15712 1283717
-OuaisBla 15581 972000
-DragonLord 15559 1162790
-dju 14716 875569
-chris 14479 1487385
-0xB00B1ES 14079 1001120
-OssumOpossum 13776 1007129
-enedene 13460 905279
-bpfliegel 13346 884523
-Ente 13198 1156722
-IgorLeMasson 13087 1147232
-jpulman 13000 870599
-ako027ako 12775 1173203
-Nikolay.IT 12352 1068349
-Andrew Grant 12327 895539
-joster 12008 950160
-AdrianSA 11996 804972
-Nesa92 11455 1111993
-fatmurphy 11345 853210
-Dark_wizzie 11108 1007152
-modolief 10869 896470
-mschmidt 10757 803401
-infinity 10594 727027
-mabichito 10524 749391
-Thomas A. Anderson 10474 732094
-thijsk 10431 719357
-Flopzee 10339 894821
-crocogoat 10104 1013854
-SapphireBrand 10104 969604
-stocky 10017 699440
+noobpwnftw 19352969 1231459677
+mlang 957168 61657446
+dew 949885 56893432
+mibere 703817 46865007
+crunchy 427035 27344275
+cw 416006 27521077
+JojoM 415904 24479564
+fastgm 404873 23953472
+CSU_Dynasty 335774 22850550
+tvijlbrief 335199 21871270
+Fisherman 325053 21786603
+gvreuls 311480 20751516
+ctoks 275877 18710423
+velislav 241267 15596372
+glinscott 217799 13780820
+nordlandia 211692 13484886
+bcross 206213 14934233
+bking_US 198894 11876016
+leszek 189170 11446821
+mgrabiak 183896 11778092
+drabel 181408 12489478
+TueRens 181349 12192000
+Thanar 179852 12365359
+vdv 175171 9881246
+robal 166948 10702862
+spams 157128 10319326
+marrco 149947 9376421
+sqrt2 147963 9724586
+vdbergh 137041 8926915
+CoffeeOne 136294 5004100
+malala 136182 8002293
+mhoram 128934 8177193
+davar 122092 7960001
+dsmith 122059 7570238
+xoto 119696 8222144
+grandphish2 116481 7582197
+Data 113305 8220352
+BrunoBanani 112960 7436849
+ElbertoOne 99028 7023771
+MaZePallas 98571 6362619
+brabos 92118 6186135
+psk 89957 5984901
+sunu 88463 6007033
+sterni1971 86948 5613788
+Vizvezdenec 83752 5343724
+BRAVONE 81239 5054681
+nssy 76497 5259388
+teddybaer 75125 5407666
+Pking_cda 73776 5293873
+jromang 70695 4940891
+solarlight 70517 5028306
+dv8silencer 70287 3883992
+Bobo1239 68515 4652287
+racerschmacer 67468 4935996
+manap 66273 4121774
+tinker 63458 4213726
+linrock 59082 4516053
+robnjr 57262 4053117
+Freja 56938 3733019
+ttruscott 56005 3679485
+renouve 53811 3501516
+cuistot 52532 3014920
+finfish 51360 3370515
+eva42 51272 3599691
+rkl 50759 3840947
+rap 49985 3219146
+pb00067 49727 3298270
+ronaldjerum 47654 3240695
+bigpen0r 47278 3291647
+biffhero 46564 3111352
+VoyagerOne 45386 3445881
+speedycpu 43842 3003273
+jbwiebe 43305 2805433
+Antihistamine 41788 2761312
+mhunt 41735 2691355
+eastorwest 40387 2812173
+homyur 39893 2850481
+gri 39871 2515779
+oryx 38228 2941656
+0x3C33 37773 2529097
+SC 37290 2731014
+csnodgrass 36207 2688994
+jmdana 36108 2205261
+strelock 34716 2074055
+Garf 33800 2747562
+EthanOConnor 33370 2090311
+slakovv 32915 2021889
+Spprtr 32591 2139601
+Prcuvu 30377 2170122
+anst 30301 2190091
+jkiiski 30136 1904470
+hyperbolic.tom 29840 2017394
+Pyafue 29650 1902349
+OuaisBla 27629 1578000
+chriswk 26902 1868317
+achambord 26582 1767323
+Patrick_G 26276 1801617
+yorkman 26193 1992080
+SFTUser 25182 1675689
+nabildanial 24942 1519409
+Sharaf_DG 24765 1786697
+ncfish1 24411 1520927
+agg177 23890 1395014
+JanErik 23408 1703875
+Isidor 23388 1680691
+Norabor 22976 1587862
+cisco2015 22880 1759669
+Zirie 22542 1472937
+team-oh 22272 1636708
+MazeOfGalious 21978 1629593
+sg4032 21945 1643065
+ianh2105 21725 1632562
+xor12 21628 1680365
+dex 21612 1467203
+nesoneg 21494 1463031
+horst.prack 20878 1465656
+0xB00B1ES 20590 1208666
+j3corre 20405 941444
+Adrian.Schmidt123 20316 1281436
+wei 19973 1745989
+rstoesser 19569 1293588
+eudhan 19274 1283717
+Ente 19070 1373058
+jundery 18445 1115855
+iisiraider 18247 1101015
+ville 17883 1384026
+chris 17698 1487385
+purplefishies 17595 1092533
+DragonLord 17014 1162790
+dju 16515 929427
+IgorLeMasson 16064 1147232
+ako027ako 15671 1173203
+Nikolay.IT 15154 1068349
+Andrew Grant 15114 895539
+yurikvelo 15027 1165616
+OssumOpossum 14857 1007129
+enedene 14476 905279
+bpfliegel 14298 884523
+jpulman 13982 870599
+joster 13794 950160
+Nesa92 13786 1114691
+Dark_wizzie 13422 1007152
+Hjax 13350 900887
+Fifis 13313 965473
+mabichito 12903 749391
+thijsk 12886 722107
+crocogoat 12876 1048802
+AdrianSA 12860 804972
+Flopzee 12698 894821
+fatmurphy 12547 853210
+SapphireBrand 12416 969604
+modolief 12386 896470
+scuzzi 12362 833465
+pgontarz 12151 848794
+stocky 11954 699440
+mschmidt 11941 803401
+infinity 11470 727027
+torbjo 11387 728873
+Thomas A. Anderson 11372 732094
+snicolet 11106 869170
+amicic 10779 733593
+rpngn 10712 688203
+d64 10680 771144
+basepi 10637 744851
+jjoshua2 10559 670905
+dzjp 10343 732529
+ols 10259 570669
+lbraesch 10252 647825
build_script:
- cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
+ - ps: |
+ # Download default NNUE net from fishtest
+ $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
+ $dummy = $nnuenet -match "(?<nnuenet>nn-[a-z0-9]{12}.nnue)"
+ $nnuenet = $Matches.nnuenet
+ Write-Host "Default net:" $nnuenet
+ $nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet"
+ $nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet"
+ if (Test-Path -Path $nnuefilepath) {
+ Write-Host "Already available."
+ } else {
+ Write-Host "Downloading $nnuedownloadurl to $nnuefilepath"
+ Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath
+ }
before_test:
- cd src/%CONFIGURATION%
# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction
# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction
+# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions
-# sse3 = yes/no --- -msse3 --- Use Intel Streaming SIMD Extensions 3
+# mmx = yes/no --- -mmmx --- Use Intel MMX instructions
+# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2
# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3
# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1
-# sse42 = yes/no --- -msse4.2 --- Use Intel Streaming SIMD Extensions 4.2
# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2
-# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
+# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256
+# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
#
# Note that Makefile is space sensitive, so when adding new architectures
# at the end of the line for flag values.
### 2.1. General and architecture defaults
+
+ifeq ($(ARCH),)
+ ARCH = x86-64-modern
+ help_skip_sanity = yes
+endif
+# explicitly check for the list of supported architectures (as listed with make help),
+# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
+ifeq ($(ARCH), $(filter $(ARCH), \
+ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
+ x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
+ x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
+ armv7 armv7-neon armv8 apple-silicon general-64 general-32))
+ SUPPORTED_ARCH=true
+else
+ SUPPORTED_ARCH=false
+endif
+
optimize = yes
debug = no
sanitize = no
bits = 64
prefetch = no
popcnt = no
+pext = no
sse = no
-sse3 = no
+mmx = no
+sse2 = no
ssse3 = no
sse41 = no
-sse42 = no
avx2 = no
-pext = no
avx512 = no
+vnni256 = no
+vnni512 = no
neon = no
-ARCH = x86-64-modern
+STRIP = strip
### 2.2 Architecture specific
-ifeq ($(ARCH),general-32)
- arch = any
- bits = 32
-endif
-ifeq ($(ARCH),x86-32-old)
+ifeq ($(findstring x86,$(ARCH)),x86)
+
+# x86-32/64
+
+ifeq ($(findstring x86-32,$(ARCH)),x86-32)
arch = i386
bits = 32
+ sse = yes
+ mmx = yes
+else
+ arch = x86_64
+ sse = yes
+ sse2 = yes
endif
-ifeq ($(ARCH),x86-32)
- arch = i386
- bits = 32
- prefetch = yes
+ifeq ($(findstring -sse,$(ARCH)),-sse)
sse = yes
endif
-ifeq ($(ARCH),general-64)
- arch = any
+ifeq ($(findstring -popcnt,$(ARCH)),-popcnt)
+ popcnt = yes
endif
-ifeq ($(ARCH),x86-64)
- arch = x86_64
- prefetch = yes
- sse = yes
+ifeq ($(findstring -mmx,$(ARCH)),-mmx)
+ mmx = yes
endif
-ifeq ($(ARCH),x86-64-sse3)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -sse2,$(ARCH)),-sse2)
sse = yes
- sse3 = yes
+ sse2 = yes
endif
-ifeq ($(ARCH),x86-64-sse3-popcnt)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -ssse3,$(ARCH)),-ssse3)
sse = yes
- sse3 = yes
- popcnt = yes
+ sse2 = yes
+ ssse3 = yes
endif
-ifeq ($(ARCH),x86-64-ssse3)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -sse41,$(ARCH)),-sse41)
sse = yes
- sse3 = yes
+ sse2 = yes
ssse3 = yes
+ sse41 = yes
endif
-ifeq ($(ARCH),x86-64-sse41)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -modern,$(ARCH)),-modern)
popcnt = yes
sse = yes
- sse3 = yes
+ sse2 = yes
ssse3 = yes
sse41 = yes
endif
-ifeq ($(ARCH),x86-64-modern)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -avx2,$(ARCH)),-avx2)
popcnt = yes
sse = yes
- sse3 = yes
+ sse2 = yes
ssse3 = yes
sse41 = yes
+ avx2 = yes
endif
-ifeq ($(ARCH),x86-64-sse42)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -bmi2,$(ARCH)),-bmi2)
popcnt = yes
sse = yes
- sse3 = yes
+ sse2 = yes
ssse3 = yes
sse41 = yes
- sse42 = yes
+ avx2 = yes
+ pext = yes
endif
-ifeq ($(ARCH),x86-64-avx2)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -avx512,$(ARCH)),-avx512)
popcnt = yes
sse = yes
- sse3 = yes
+ sse2 = yes
ssse3 = yes
sse41 = yes
- sse42 = yes
avx2 = yes
+ pext = yes
+ avx512 = yes
endif
-ifeq ($(ARCH),x86-64-bmi2)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -vnni256,$(ARCH)),-vnni256)
popcnt = yes
sse = yes
- sse3 = yes
+ sse2 = yes
ssse3 = yes
sse41 = yes
- sse42 = yes
avx2 = yes
pext = yes
+ vnni256 = yes
endif
-ifeq ($(ARCH),x86-64-avx512)
- arch = x86_64
- prefetch = yes
+ifeq ($(findstring -vnni512,$(ARCH)),-vnni512)
popcnt = yes
sse = yes
- sse3 = yes
+ sse2 = yes
ssse3 = yes
sse41 = yes
- sse42 = yes
avx2 = yes
pext = yes
avx512 = yes
+ vnni512 = yes
+endif
+
+ifeq ($(sse),yes)
+ prefetch = yes
+endif
+
+# 64-bit pext is not available on x86-32
+ifeq ($(bits),32)
+ pext = no
+endif
+
+else
+
+# all other architectures
+
+ifeq ($(ARCH),general-32)
+ arch = any
+ bits = 32
+endif
+
+ifeq ($(ARCH),general-64)
+ arch = any
endif
ifeq ($(ARCH),armv7)
bits = 32
endif
+ifeq ($(ARCH),armv7-neon)
+ arch = armv7
+ prefetch = yes
+ popcnt = yes
+ neon = yes
+ bits = 32
+endif
+
ifeq ($(ARCH),armv8)
- arch = armv8-a
+ arch = armv8
prefetch = yes
popcnt = yes
neon = yes
prefetch = yes
endif
+endif
+
### ==========================================================================
### Section 3. Low-level Configuration
### ==========================================================================
CXX=g++
CXXFLAGS += -pedantic -Wextra
- ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8))
+ ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android)
CXXFLAGS += -m$(bits)
LDFLAGS += -m$(bits)
LDFLAGS += -m$(bits)
endif
+ ifeq ($(arch),$(filter $(arch),armv7))
+ LDFLAGS += -latomic
+ endif
+
ifneq ($(KERNEL),Darwin)
LDFLAGS += -Wl,--no-as-needed
endif
endif
endif
- ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8))
+ ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android)
CXXFLAGS += -m$(bits)
LDFLAGS += -m$(bits)
endif
ifeq ($(KERNEL),Darwin)
- CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.15
- LDFLAGS += -arch $(arch) -mmacosx-version-min=10.15
+ CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+ LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+ XCRUN = xcrun
+endif
+
+# To cross-compile for Android, NDK version r21 or later is recommended.
+# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils.
+# Currently we don't know how to make PGO builds with the NDK yet.
+ifeq ($(COMP),ndk)
+ CXXFLAGS += -stdlib=libc++ -fPIE
+ ifeq ($(arch),armv7)
+ comp=armv7a-linux-androideabi16-clang
+ CXX=armv7a-linux-androideabi16-clang++
+ CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
+ STRIP=arm-linux-androideabi-strip
+ endif
+ ifeq ($(arch),armv8)
+ comp=aarch64-linux-android21-clang
+ CXX=aarch64-linux-android21-clang++
+ STRIP=aarch64-linux-android-strip
+ endif
+ LDFLAGS += -static-libstdc++ -pie -lm -latomic
endif
### Travis CI script uses COMPILER to overwrite CXX
CXX=$(COMPCXX)
endif
+### Sometimes gcc is really clang
+ifeq ($(COMP),gcc)
+ gccversion = $(shell $(CXX) --version)
+ gccisclang = $(findstring clang,$(gccversion))
+ ifneq ($(gccisclang),)
+ profile_make = clang-profile-make
+ profile_use = clang-profile-use
+ endif
+endif
+
### On mingw use Windows threads, otherwise POSIX
ifneq ($(comp),mingw)
+ CXXFLAGS += -DUSE_PTHREADS
# On Android Bionic's C library comes with its own pthread implementation bundled in
ifneq ($(OS),Android)
# Haiku has pthreads in its libroot, so only link it in on other platforms
ifneq ($(KERNEL),Haiku)
- LDFLAGS += -lpthread
+ ifneq ($(COMP),ndk)
+ LDFLAGS += -lpthread
+ endif
endif
endif
endif
ifeq ($(prefetch),yes)
ifeq ($(sse),yes)
CXXFLAGS += -msse
- DEPENDFLAGS += -msse
endif
else
CXXFLAGS += -DNO_PREFETCH
### 3.6 popcnt
ifeq ($(popcnt),yes)
- ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64))
+ ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64))
CXXFLAGS += -DUSE_POPCNT
else ifeq ($(comp),icc)
CXXFLAGS += -msse3 -DUSE_POPCNT
endif
endif
+
ifeq ($(avx2),yes)
CXXFLAGS += -DUSE_AVX2
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
ifeq ($(avx512),yes)
CXXFLAGS += -DUSE_AVX512
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
- CXXFLAGS += -mavx512bw
+ CXXFLAGS += -mavx512f -mavx512bw
+ endif
+endif
+
+ifeq ($(vnni256),yes)
+ CXXFLAGS += -DUSE_VNNI
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256
endif
endif
-ifeq ($(sse42),yes)
- CXXFLAGS += -DUSE_SSE42
+ifeq ($(vnni512),yes)
+ CXXFLAGS += -DUSE_VNNI
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
- CXXFLAGS += -msse4.2
+ CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl
endif
endif
endif
endif
-ifeq ($(sse3),yes)
- CXXFLAGS += -DUSE_SSE3
+ifeq ($(sse2),yes)
+ CXXFLAGS += -DUSE_SSE2
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
- CXXFLAGS += -msse3
+ CXXFLAGS += -msse2
endif
endif
-ifeq ($(neon),yes)
- CXXFLAGS += -DUSE_NEON
+ifeq ($(mmx),yes)
+ CXXFLAGS += -DUSE_MMX
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mmmx
+ endif
endif
-ifeq ($(arch),x86_64)
- CXXFLAGS += -DUSE_SSE2
+ifeq ($(neon),yes)
+ CXXFLAGS += -DUSE_NEON
+ ifeq ($(KERNEL),Linux)
+ ifneq ($(COMP),ndk)
+ ifneq ($(arch),armv8)
+ CXXFLAGS += -mfpu=neon
+ endif
+ endif
+ endif
endif
### 3.7 pext
### needs access to the optimization flags.
ifeq ($(optimize),yes)
ifeq ($(debug), no)
- ifeq ($(comp),$(filter $(comp),gcc clang))
+ ifeq ($(COMP),ndk)
+ CXXFLAGS += -flto=thin
+ LDFLAGS += $(CXXFLAGS)
+ else ifeq ($(comp),clang)
+ CXXFLAGS += -flto=thin
+ ifneq ($(findstring MINGW,$(KERNEL)),)
+ CXXFLAGS += -fuse-ld=lld
+ else ifneq ($(findstring MSYS,$(KERNEL)),)
+ CXXFLAGS += -fuse-ld=lld
+ endif
+ LDFLAGS += $(CXXFLAGS)
+
+# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
+# GCC on some systems.
+ else ifeq ($(comp),gcc)
+ ifeq ($(gccisclang),)
CXXFLAGS += -flto
+ LDFLAGS += $(CXXFLAGS) -flto=jobserver
+ ifneq ($(findstring MINGW,$(KERNEL)),)
+ LDFLAGS += -save-temps
+ else ifneq ($(findstring MSYS,$(KERNEL)),)
+ LDFLAGS += -save-temps
+ endif
+ else
+ CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS)
endif
# To use LTO and static linking on windows, the tool chain requires a recent gcc:
-# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not.
+# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not.
# So, only enable it for a cross from Linux by default.
- ifeq ($(comp),mingw)
+ else ifeq ($(comp),mingw)
ifeq ($(KERNEL),Linux)
+ ifneq ($(arch),i386)
CXXFLAGS += -flto
- LDFLAGS += $(CXXFLAGS)
+ LDFLAGS += $(CXXFLAGS) -flto=jobserver
+ endif
endif
endif
endif
### Section 4. Public Targets
### ==========================================================================
+
help:
@echo ""
@echo "To compile stockfish, type: "
@echo ""
@echo "Supported targets:"
@echo ""
+ @echo "help > Display architecture details"
@echo "build > Standard build"
- @echo "profile-build > Standard build with PGO"
+ @echo "net > Download the default nnue net"
+ @echo "profile-build > Faster build (with profile-guided optimization)"
@echo "strip > Strip executable"
@echo "install > Install executable"
@echo "clean > Clean up"
- @echo "net > Download the default nnue net"
@echo ""
@echo "Supported archs:"
@echo ""
+ @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide"
+ @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide"
@echo "x86-64-avx512 > x86 64-bit with avx512 support"
@echo "x86-64-bmi2 > x86 64-bit with bmi2 support"
@echo "x86-64-avx2 > x86 64-bit with avx2 support"
- @echo "x86-64-sse42 > x86 64-bit with sse42 support"
- @echo "x86-64-modern > x86 64-bit with sse41 support (x86-64-sse41)"
- @echo "x86-64-sse41 > x86 64-bit with sse41 support"
+ @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support"
+ @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt"
@echo "x86-64-ssse3 > x86 64-bit with ssse3 support"
@echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support"
- @echo "x86-64-sse3 > x86 64-bit with sse3 support"
- @echo "x86-64 > x86 64-bit generic"
- @echo "x86-32 > x86 32-bit (also enables SSE)"
- @echo "x86-32-old > x86 32-bit fall back for old hardware"
+ @echo "x86-64 > x86 64-bit generic (with sse2 support)"
+ @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support"
+ @echo "x86-32-sse2 > x86 32-bit with sse2 support"
+ @echo "x86-32 > x86 32-bit generic (with mmx and sse support)"
@echo "ppc-64 > PPC 64-bit"
@echo "ppc-32 > PPC 32-bit"
@echo "armv7 > ARMv7 32-bit"
- @echo "armv8 > ARMv8 64-bit"
+ @echo "armv7-neon > ARMv7 32-bit with popcnt and neon"
+ @echo "armv8 > ARMv8 64-bit with popcnt and neon"
@echo "apple-silicon > Apple silicon ARM64"
@echo "general-64 > unspecified 64-bit"
@echo "general-32 > unspecified 32-bit"
@echo "mingw > Gnu compiler with MinGW under Windows"
@echo "clang > LLVM Clang compiler"
@echo "icc > Intel compiler"
+ @echo "ndk > Google NDK to cross-compile for Android"
@echo ""
@echo "Simple examples. If you don't know what to do, you likely want to run: "
@echo ""
- @echo "make -j build ARCH=x86-64 (This is for 64-bit systems)"
- @echo "make -j build ARCH=x86-32 (This is for 32-bit systems)"
- @echo ""
- @echo "Advanced examples, for experienced users: "
+ @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)"
+ @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)"
@echo ""
- @echo "make -j build ARCH=x86-64-modern COMP=clang"
- @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8"
+ @echo "Advanced examples, for experienced users looking for performance: "
@echo ""
- @echo "The selected architecture $(ARCH) enables the following configuration: "
+ @echo "make help ARCH=x86-64-bmi2"
+ @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0"
+ @echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
@echo ""
+ @echo "-------------------------------"
+ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true)
+ @echo "The selected architecture $(ARCH) will enable the following configuration: "
@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
+else
+ @echo "Specify a supported architecture with the ARCH option for more details"
+ @echo ""
+endif
.PHONY: help build profile-build strip install clean net objclean profileclean \
config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
clang-profile-use clang-profile-make
-build: config-sanity
+build: config-sanity net
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
-profile-build: config-sanity objclean profileclean
+profile-build: net config-sanity objclean profileclean
@echo ""
@echo "Step 1/4. Building instrumented executable ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
strip:
- strip $(EXE)
+ $(STRIP) $(EXE)
install:
-mkdir -p -m 755 $(BINDIR)
-cp $(EXE) $(BINDIR)
-strip $(BINDIR)/$(EXE)
-#clean all
+# clean all
clean: objclean profileclean
@rm -f .depend *~ core
+# evaluation network (nnue)
net:
- $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+ $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
@echo "Default net: $(nnuenet)"
$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
- $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
- @if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi
+ $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
+ @if test -f "$(nnuenet)"; then \
+ echo "Already available."; \
+ else \
+ if [ "x$(curl_or_wget)" = "x" ]; then \
+ echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \
+ else \
+ echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\
+ fi; \
+ fi;
+ $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi))
+ @if [ "x$(shasum_command)" != "x" ]; then \
+ if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
+ echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \
+ fi \
+ else \
+ echo "shasum / sha256sum not found, skipping net validation"; \
+ fi
# clean binaries and objects
objclean:
# clean auxiliary profiling files
profileclean:
@rm -rf profdir
- @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda
+ @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s
@rm -f stockfish.profdata *.profraw
default:
@echo "os: '$(OS)'"
@echo "prefetch: '$(prefetch)'"
@echo "popcnt: '$(popcnt)'"
+ @echo "pext: '$(pext)'"
@echo "sse: '$(sse)'"
- @echo "sse3: '$(sse3)'"
+ @echo "mmx: '$(mmx)'"
+ @echo "sse2: '$(sse2)'"
@echo "ssse3: '$(ssse3)'"
@echo "sse41: '$(sse41)'"
- @echo "sse42: '$(sse42)'"
@echo "avx2: '$(avx2)'"
- @echo "pext: '$(pext)'"
@echo "avx512: '$(avx512)'"
+ @echo "vnni256: '$(vnni256)'"
+ @echo "vnni512: '$(vnni512)'"
@echo "neon: '$(neon)'"
@echo ""
@echo "Flags:"
@test "$(debug)" = "yes" || test "$(debug)" = "no"
@test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no"
@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
+ @test "$(SUPPORTED_ARCH)" = "true"
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
- test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64"
+ test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64"
@test "$(bits)" = "32" || test "$(bits)" = "64"
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
+ @test "$(pext)" = "yes" || test "$(pext)" = "no"
@test "$(sse)" = "yes" || test "$(sse)" = "no"
- @test "$(sse3)" = "yes" || test "$(sse3)" = "no"
+ @test "$(mmx)" = "yes" || test "$(mmx)" = "no"
+ @test "$(sse2)" = "yes" || test "$(sse2)" = "no"
@test "$(ssse3)" = "yes" || test "$(ssse3)" = "no"
@test "$(sse41)" = "yes" || test "$(sse41)" = "no"
- @test "$(sse42)" = "yes" || test "$(sse42)" = "no"
@test "$(avx2)" = "yes" || test "$(avx2)" = "no"
- @test "$(pext)" = "yes" || test "$(pext)" = "no"
@test "$(avx512)" = "yes" || test "$(avx512)" = "no"
+ @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no"
+ @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no"
@test "$(neon)" = "yes" || test "$(neon)" = "no"
- @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
+ @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \
+ || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang"
$(EXE): $(OBJS)
- $(CXX) -o $@ $(OBJS) $(LDFLAGS)
+ +$(CXX) -o $@ $(OBJS) $(LDFLAGS)
clang-profile-make:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
all
clang-profile-use:
- llvm-profdata merge -output=stockfish.profdata *.profraw
+ $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \
EXTRALDFLAGS='-fprofile-use ' \
/// setup_bench() builds a list of UCI commands to be run by bench. There
/// are five parameters: TT size in MB, number of search threads that
/// should be used, the limit value spent for each position, a file name
-/// where to look for positions in FEN format and the type of the limit:
-/// depth, perft, nodes and movetime (in millisecs).
+/// where to look for positions in FEN format, the type of the limit:
+/// depth, perft, nodes and movetime (in millisecs), and evaluation type
+/// mixed (default), classical, NNUE.
///
/// bench -> search default positions up to depth 13
/// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB)
string limit = (is >> token) ? token : "13";
string fenFile = (is >> token) ? token : "default";
string limitType = (is >> token) ? token : "depth";
+ string evalType = (is >> token) ? token : "mixed";
go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;
list.emplace_back("setoption name Hash value " + ttSize);
list.emplace_back("ucinewgame");
+ size_t posCounter = 0;
+
for (const string& fen : fens)
if (fen.find("setoption") != string::npos)
list.emplace_back(fen);
else
{
+ if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0))
+ list.emplace_back("setoption name Use NNUE value false");
+ else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0))
+ list.emplace_back("setoption name Use NNUE value true");
list.emplace_back("position fen " + fen);
list.emplace_back(go);
+ ++posCounter;
}
+ list.emplace_back("setoption name Use NNUE value true");
+
return list;
}
Bitboard BishopTable[0x1480]; // To store bishop attacks
void init_magics(PieceType pt, Bitboard table[], Magic magics[]);
+
+}
+
+
+/// safe_destination() returns the bitboard of target square for the given step
+/// from the given square. If the step is off the board, returns empty bitboard.
+
+inline Bitboard safe_destination(Square s, int step) {
+ Square to = Square(s + step);
+ return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
}
Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST};
Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST};
- for(Direction d : (pt == ROOK ? RookDirections : BishopDirections))
+ for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
{
Square s = sq;
while(safe_destination(s, d) && !(occupied & s))
inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); }
-/// safe_destination() returns the bitboard of target square for the given step
-/// from the given square. If the step is off the board, returns empty bitboard.
-
-inline Bitboard safe_destination(Square s, int step)
-{
- Square to = Square(s + step);
- return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
-}
-
-
/// attacks_bb(Square) returns the pseudo attacks of the give piece type
/// assuming an empty board.
#include <cassert>
#include <cstdlib>
#include <cstring> // For std::memset
+#include <fstream>
#include <iomanip>
#include <sstream>
#include <iostream>
+#include <streambuf>
+#include <vector>
#include "bitboard.h"
#include "evaluate.h"
#include "material.h"
+#include "misc.h"
#include "pawns.h"
#include "thread.h"
#include "uci.h"
+#include "incbin/incbin.h"
+
+
+// Macro to embed the default NNUE file data in the engine binary (using incbin.h, by Dale Weiler).
+// This macro invocation will declare the following three variables
+// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data
+// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end
+// const unsigned int gEmbeddedNNUESize; // the size of the embedded file
+// Note that this does not work in Microsof Visual Studio.
+#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
+ INCBIN(EmbeddedNNUE, EvalFileDefaultName);
+#else
+ const unsigned char gEmbeddedNNUEData[1] = {0x0};
+ const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1];
+ const unsigned int gEmbeddedNNUESize = 1;
+#endif
+
+
+using namespace std;
+using namespace Eval::NNUE;
namespace Eval {
bool useNNUE;
- std::string eval_file_loaded="None";
+ string eval_file_loaded = "None";
+
+ /// init_NNUE() tries to load a nnue network at startup time, or when the engine
+ /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
+ /// The name of the nnue network is always retrieved from the EvalFile option.
+ /// We search the given network in three locations: internally (the default
+ /// network may be embedded in the binary), in the active working directory and
+ /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY
+ /// variable to have the engine search in a special directory in their distro.
void init_NNUE() {
useNNUE = Options["Use NNUE"];
- std::string eval_file = std::string(Options["EvalFile"]);
- if (useNNUE && eval_file_loaded != eval_file)
- if (Eval::NNUE::load_eval_file(eval_file))
- eval_file_loaded = eval_file;
+ if (!useNNUE)
+ return;
+
+ string eval_file = string(Options["EvalFile"]);
+
+ #if defined(DEFAULT_NNUE_DIRECTORY)
+ #define stringify2(x) #x
+ #define stringify(x) stringify2(x)
+ vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
+ #else
+ vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory };
+ #endif
+
+ for (string directory : dirs)
+ if (eval_file_loaded != eval_file)
+ {
+ if (directory != "<internal>")
+ {
+ ifstream stream(directory + eval_file, ios::binary);
+ if (load_eval(eval_file, stream))
+ eval_file_loaded = eval_file;
+ }
+
+ if (directory == "<internal>" && eval_file == EvalFileDefaultName)
+ {
+ // C++ way to prepare a buffer for a memory stream
+ class MemoryBuffer : public basic_streambuf<char> {
+ public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); }
+ };
+
+ MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
+ size_t(gEmbeddedNNUESize));
+
+ istream stream(&buffer);
+ if (load_eval(eval_file, stream))
+ eval_file_loaded = eval_file;
+ }
+ }
}
+ /// verify_NNUE() verifies that the last net used was loaded successfully
void verify_NNUE() {
- std::string eval_file = std::string(Options["EvalFile"]);
+ string eval_file = string(Options["EvalFile"]);
+
if (useNNUE && eval_file_loaded != eval_file)
{
- std::cerr << "Use of NNUE evaluation, but the file " << eval_file << " was not loaded successfully. "
- << "These network evaluation parameters must be available, compatible with this version of the code. "
- << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << std::endl;
- std::exit(EXIT_FAILURE);
+ UCI::OptionsMap defaults;
+ UCI::init(defaults);
+
+ string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
+ string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
+ string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
+ string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
+ string msg5 = "The engine will be terminated now.";
+
+ sync_cout << "info string ERROR: " << msg1 << sync_endl;
+ sync_cout << "info string ERROR: " << msg2 << sync_endl;
+ sync_cout << "info string ERROR: " << msg3 << sync_endl;
+ sync_cout << "info string ERROR: " << msg4 << sync_endl;
+ sync_cout << "info string ERROR: " << msg5 << sync_endl;
+
+ exit(EXIT_FAILURE);
}
if (useNNUE)
- sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
+ sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
else
- sync_cout << "info string classical evaluation enabled." << sync_endl;
+ sync_cout << "info string classical evaluation enabled" << sync_endl;
}
}
constexpr Value LazyThreshold1 = Value(1400);
constexpr Value LazyThreshold2 = Value(1300);
constexpr Value SpaceThreshold = Value(12222);
- constexpr Value NNUEThreshold = Value(520);
+ constexpr Value NNUEThreshold1 = Value(550);
+ constexpr Value NNUEThreshold2 = Value(150);
// KingAttackWeights[PieceType] contains king attack weights by piece type
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
// Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
// pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
- constexpr Score Outpost[] = { S(56, 36), S(30, 23) };
+ constexpr Score Outpost[] = { S(56, 34), S(31, 23) };
// PassedRank[Rank] contains a bonus according to the rank of a passed pawn
constexpr Score PassedRank[RANK_NB] = {
- S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260)
+ S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260)
};
// RookOnFile[semiopen/open] contains bonuses for each rook when there is
// no (friendly) pawn on the rook file.
- constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) };
+ constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) };
// ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
// which piece type attacks which one. Attacks on lesser pieces which are
// pawn-defended are not considered.
constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
- S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161)
+ S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
};
constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
- S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41)
+ S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
};
// Assorted bonuses and penalties
attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]);
// Init our king safety tables
- Square s = make_square(Utility::clamp(file_of(ksq), FILE_B, FILE_G),
- Utility::clamp(rank_of(ksq), RANK_2, RANK_7));
+ Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G),
+ std::clamp(rank_of(ksq), RANK_2, RANK_7));
kingRing[Us] = attacks_bb<KING>(s) | s;
kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them));
Square blockSq = s + Up;
// Adjust bonus based on the king's proximity
- bonus += make_score(0, ( (king_proximity(Them, blockSq) * 19) / 4
- - king_proximity(Us, blockSq) * 2) * w);
+ bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4
+ - king_proximity(Us, blockSq) * 2) * w);
// If blockSq is not the queening square then consider also a second push
if (r != RANK_7)
// Evaluation::space() computes a space evaluation for a given side, aiming to improve game
- // play in the opening. It is based on the number of safe squares on the 4 central files
+ // play in the opening. It is based on the number of safe squares on the four central files
// on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice.
// Finally, the space bonus is multiplied by a weight which decreases according to occupancy.
// Now apply the bonus: note that we find the attacking side by extracting the
// sign of the midgame or endgame values, and that we carefully cap the bonus
// so that the midgame and endgame scores do not change sign after the bonus.
- int u = ((mg > 0) - (mg < 0)) * Utility::clamp(complexity + 50, -abs(mg), 0);
+ int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0);
int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg));
mg += u;
// Side to move point of view
v = (pos.side_to_move() == WHITE ? v : -v) + Tempo;
- // Damp down the evaluation linearly when shuffling
- v = v * (100 - pos.rule50_count()) / 100;
-
return v;
}
Value Eval::evaluate(const Position& pos) {
- if (Eval::useNNUE)
- {
- Value v = eg_value(pos.psq_score());
- // Take NNUE eval only on balanced positions
- if (abs(v) < NNUEThreshold)
- return NNUE::evaluate(pos) + Tempo;
- }
- return Evaluation<NO_TRACE>(pos).value();
+ // Use classical eval if there is a large imbalance
+ // If there is a moderate imbalance, use classical eval with probability (1/8),
+ // as derived from the node counter.
+ bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
+ bool classical = !Eval::useNNUE
+ || useClassical
+ || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
+ Value v = classical ? Evaluation<NO_TRACE>(pos).value()
+ : NNUE::evaluate(pos) * 5 / 4 + Tempo;
+
+ if ( useClassical
+ && Eval::useNNUE
+ && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
+ v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
+
+ // Damp down the evaluation linearly when shuffling
+ v = v * (100 - pos.rule50_count()) / 100;
+
+ // Guarantee evaluation does not hit the tablebase range
+ v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
+
+ return v;
}
/// trace() is like evaluate(), but instead of returning a value, it returns
Value v;
+ std::memset(scores, 0, sizeof(scores));
+
+ pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
+
+ v = Evaluation<TRACE>(pos).value();
+
+ ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
+ << " Term | White | Black | Total \n"
+ << " | MG EG | MG EG | MG EG \n"
+ << " ------------+-------------+-------------+------------\n"
+ << " Material | " << Term(MATERIAL)
+ << " Imbalance | " << Term(IMBALANCE)
+ << " Pawns | " << Term(PAWN)
+ << " Knights | " << Term(KNIGHT)
+ << " Bishops | " << Term(BISHOP)
+ << " Rooks | " << Term(ROOK)
+ << " Queens | " << Term(QUEEN)
+ << " Mobility | " << Term(MOBILITY)
+ << " King safety | " << Term(KING)
+ << " Threats | " << Term(THREAT)
+ << " Passed | " << Term(PASSED)
+ << " Space | " << Term(SPACE)
+ << " Winnable | " << Term(WINNABLE)
+ << " ------------+-------------+-------------+------------\n"
+ << " Total | " << Term(TOTAL);
+
+ v = pos.side_to_move() == WHITE ? v : -v;
+
+ ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";
+
if (Eval::useNNUE)
{
v = NNUE::evaluate(pos);
- }
- else
- {
- std::memset(scores, 0, sizeof(scores));
-
- pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
-
- v = Evaluation<TRACE>(pos).value();
-
- ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
- << " Term | White | Black | Total \n"
- << " | MG EG | MG EG | MG EG \n"
- << " ------------+-------------+-------------+------------\n"
- << " Material | " << Term(MATERIAL)
- << " Imbalance | " << Term(IMBALANCE)
- << " Pawns | " << Term(PAWN)
- << " Knights | " << Term(KNIGHT)
- << " Bishops | " << Term(BISHOP)
- << " Rooks | " << Term(ROOK)
- << " Queens | " << Term(QUEEN)
- << " Mobility | " << Term(MOBILITY)
- << " King safety | " << Term(KING)
- << " Threats | " << Term(THREAT)
- << " Passed | " << Term(PASSED)
- << " Space | " << Term(SPACE)
- << " Winnable | " << Term(WINNABLE)
- << " ------------+-------------+-------------+------------\n"
- << " Total | " << Term(TOTAL);
+ v = pos.side_to_move() == WHITE ? v : -v;
+ ss << "\nNNUE evaluation: " << to_cp(v) << " (white side)\n";
}
+ v = evaluate(pos);
v = pos.side_to_move() == WHITE ? v : -v;
-
- ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n";
+ ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n";
return ss.str();
}
void init_NNUE();
void verify_NNUE();
+ // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
+ // for the build process (profile-build and fishtest) to work. Do not change the
+ // name of the macro, as it is used in the Makefile.
+ #define EvalFileDefaultName "nn-308d71810dff.nnue"
+
namespace NNUE {
Value evaluate(const Position& pos);
Value compute_eval(const Position& pos);
void update_eval(const Position& pos);
- bool load_eval_file(const std::string& evalFile);
+ bool load_eval(std::string streamName, std::istream& stream);
} // namespace NNUE
--- /dev/null
+The file "incbin.h" is free and unencumbered software released into
+the public domain by Dale Weiler, see:
+ <https://github.com/graphitemaster/incbin>
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
--- /dev/null
+/**
+ * @file incbin.h
+ * @author Dale Weiler
+ * @brief Utility for including binary files
+ *
+ * Facilities for including binary files into the current translation unit and
+ * making use from them externally in other translation units.
+ */
+#ifndef INCBIN_HDR
+#define INCBIN_HDR
+#include <limits.h>
+#if defined(__AVX512BW__) || \
+ defined(__AVX512CD__) || \
+ defined(__AVX512DQ__) || \
+ defined(__AVX512ER__) || \
+ defined(__AVX512PF__) || \
+ defined(__AVX512VL__) || \
+ defined(__AVX512F__)
+# define INCBIN_ALIGNMENT_INDEX 6
+#elif defined(__AVX__) || \
+ defined(__AVX2__)
+# define INCBIN_ALIGNMENT_INDEX 5
+#elif defined(__SSE__) || \
+ defined(__SSE2__) || \
+ defined(__SSE3__) || \
+ defined(__SSSE3__) || \
+ defined(__SSE4_1__) || \
+ defined(__SSE4_2__) || \
+ defined(__neon__)
+# define INCBIN_ALIGNMENT_INDEX 4
+#elif ULONG_MAX != 0xffffffffu
+# define INCBIN_ALIGNMENT_INDEX 3
+# else
+# define INCBIN_ALIGNMENT_INDEX 2
+#endif
+
+/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
+#define INCBIN_ALIGN_SHIFT_0 1
+#define INCBIN_ALIGN_SHIFT_1 2
+#define INCBIN_ALIGN_SHIFT_2 4
+#define INCBIN_ALIGN_SHIFT_3 8
+#define INCBIN_ALIGN_SHIFT_4 16
+#define INCBIN_ALIGN_SHIFT_5 32
+#define INCBIN_ALIGN_SHIFT_6 64
+
+/* Actual alignment value */
+#define INCBIN_ALIGNMENT \
+ INCBIN_CONCATENATE( \
+ INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
+ INCBIN_ALIGNMENT_INDEX)
+
+/* Stringize */
+#define INCBIN_STR(X) \
+ #X
+#define INCBIN_STRINGIZE(X) \
+ INCBIN_STR(X)
+/* Concatenate */
+#define INCBIN_CAT(X, Y) \
+ X ## Y
+#define INCBIN_CONCATENATE(X, Y) \
+ INCBIN_CAT(X, Y)
+/* Deferred macro expansion */
+#define INCBIN_EVAL(X) \
+ X
+#define INCBIN_INVOKE(N, ...) \
+ INCBIN_EVAL(N(__VA_ARGS__))
+
+/* Green Hills uses a different directive for including binary data */
+#if defined(__ghs__)
+# if (__ghs_asm == 2)
+# define INCBIN_MACRO ".file"
+/* Or consider the ".myrawdata" entry in the ld file */
+# else
+# define INCBIN_MACRO "\tINCBIN"
+# endif
+#else
+# define INCBIN_MACRO ".incbin"
+#endif
+
+#ifndef _MSC_VER
+# define INCBIN_ALIGN \
+ __attribute__((aligned(INCBIN_ALIGNMENT)))
+#else
+# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
+#endif
+
+#if defined(__arm__) || /* GNU C and RealView */ \
+ defined(__arm) || /* Diab */ \
+ defined(_ARM) /* ImageCraft */
+# define INCBIN_ARM
+#endif
+
+#ifdef __GNUC__
+/* Utilize .balign where supported */
+# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+# define INCBIN_ALIGN_BYTE ".balign 1\n"
+#elif defined(INCBIN_ARM)
+/*
+ * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
+ * the shift count. This is the value passed to `.align'
+ */
+# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
+# define INCBIN_ALIGN_BYTE ".align 0\n"
+#else
+/* We assume other inline assembler's treat `.align' as `.balign' */
+# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+# define INCBIN_ALIGN_BYTE ".align 1\n"
+#endif
+
+/* INCBIN_CONST is used by incbin.c generated files */
+#if defined(__cplusplus)
+# define INCBIN_EXTERNAL extern "C"
+# define INCBIN_CONST extern const
+#else
+# define INCBIN_EXTERNAL extern
+# define INCBIN_CONST const
+#endif
+
+/**
+ * @brief Optionally override the linker section into which data is emitted.
+ *
+ * @warning If you use this facility, you'll have to deal with platform-specific linker output
+ * section naming on your own
+ *
+ * Overriding the default linker output section, e.g for esp8266/Arduino:
+ * @code
+ * #define INCBIN_OUTPUT_SECTION ".irom.text"
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ * // Data is emitted into program memory that never gets copied to RAM
+ * @endcode
+ */
+#if !defined(INCBIN_OUTPUT_SECTION)
+# if defined(__APPLE__)
+# define INCBIN_OUTPUT_SECTION ".const_data"
+# else
+# define INCBIN_OUTPUT_SECTION ".rodata"
+# endif
+#endif
+
+#if defined(__APPLE__)
+/* The directives are different for Apple branded compilers */
+# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
+# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+# define INCBIN_INT ".long "
+# define INCBIN_MANGLE "_"
+# define INCBIN_BYTE ".byte "
+# define INCBIN_TYPE(...)
+#else
+# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
+# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+# if defined(__ghs__)
+# define INCBIN_INT ".word "
+# else
+# define INCBIN_INT ".int "
+# endif
+# if defined(__USER_LABEL_PREFIX__)
+# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
+# else
+# define INCBIN_MANGLE ""
+# endif
+# if defined(INCBIN_ARM)
+/* On arm assemblers, `@' is used as a line comment token */
+# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
+# elif defined(__MINGW32__) || defined(__MINGW64__)
+/* Mingw doesn't support this directive either */
+# define INCBIN_TYPE(NAME)
+# else
+/* It's safe to use `@' on other architectures */
+# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
+# endif
+# define INCBIN_BYTE ".byte "
+#endif
+
+/* List of style types used for symbol names */
+#define INCBIN_STYLE_CAMEL 0
+#define INCBIN_STYLE_SNAKE 1
+
+/**
+ * @brief Specify the prefix to use for symbol names.
+ *
+ * By default this is `g', producing symbols of the form:
+ * @code
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char gFooData[];
+ * // const unsigned char *const gFooEnd;
+ * // const unsigned int gFooSize;
+ * @endcode
+ *
+ * If however you specify a prefix before including: e.g:
+ * @code
+ * #define INCBIN_PREFIX incbin
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols instead:
+ * // const unsigned char incbinFooData[];
+ * // const unsigned char *const incbinFooEnd;
+ * // const unsigned int incbinFooSize;
+ * @endcode
+ */
+#if !defined(INCBIN_PREFIX)
+# define INCBIN_PREFIX g
+#endif
+
+/**
+ * @brief Specify the style used for symbol names.
+ *
+ * Possible options are
+ * - INCBIN_STYLE_CAMEL "CamelCase"
+ * - INCBIN_STYLE_SNAKE "snake_case"
+ *
+ * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form:
+ * @code
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>FooData[];
+ * // const unsigned char *const <prefix>FooEnd;
+ * // const unsigned int <prefix>FooSize;
+ * @endcode
+ *
+ * If however you specify a style before including: e.g:
+ * @code
+ * #define INCBIN_STYLE INCBIN_STYLE_SNAKE
+ * #include "incbin.h"
+ * INCBIN(foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>foo_data[];
+ * // const unsigned char *const <prefix>foo_end;
+ * // const unsigned int <prefix>foo_size;
+ * @endcode
+ */
+#if !defined(INCBIN_STYLE)
+# define INCBIN_STYLE INCBIN_STYLE_CAMEL
+#endif
+
+/* Style lookup tables */
+#define INCBIN_STYLE_0_DATA Data
+#define INCBIN_STYLE_0_END End
+#define INCBIN_STYLE_0_SIZE Size
+#define INCBIN_STYLE_1_DATA _data
+#define INCBIN_STYLE_1_END _end
+#define INCBIN_STYLE_1_SIZE _size
+
+/* Style lookup: returning identifier */
+#define INCBIN_STYLE_IDENT(TYPE) \
+ INCBIN_CONCATENATE( \
+ INCBIN_STYLE_, \
+ INCBIN_CONCATENATE( \
+ INCBIN_EVAL(INCBIN_STYLE), \
+ INCBIN_CONCATENATE(_, TYPE)))
+
+/* Style lookup: returning string literal */
+#define INCBIN_STYLE_STRING(TYPE) \
+ INCBIN_STRINGIZE( \
+ INCBIN_STYLE_IDENT(TYPE)) \
+
+/* Generate the global labels by indirectly invoking the macro with our style
+ * type and concatenating the name against them. */
+#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
+ INCBIN_INVOKE( \
+ INCBIN_GLOBAL, \
+ INCBIN_CONCATENATE( \
+ NAME, \
+ INCBIN_INVOKE( \
+ INCBIN_STYLE_IDENT, \
+ TYPE))) \
+ INCBIN_INVOKE( \
+ INCBIN_TYPE, \
+ INCBIN_CONCATENATE( \
+ NAME, \
+ INCBIN_INVOKE( \
+ INCBIN_STYLE_IDENT, \
+ TYPE)))
+
+/**
+ * @brief Externally reference binary data included in another translation unit.
+ *
+ * Produces three external symbols that reference the binary data included in
+ * another translation unit.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name given for the binary data
+ *
+ * @code
+ * INCBIN_EXTERN(Foo);
+ *
+ * // Now you have the following symbols:
+ * // extern const unsigned char <prefix>FooData[];
+ * // extern const unsigned char *const <prefix>FooEnd;
+ * // extern const unsigned int <prefix>FooSize;
+ * @endcode
+ */
+#define INCBIN_EXTERN(NAME) \
+ INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \
+ INCBIN_CONCATENATE( \
+ INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+ INCBIN_STYLE_IDENT(DATA))[]; \
+ INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \
+ INCBIN_CONCATENATE( \
+ INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+ INCBIN_STYLE_IDENT(END)); \
+ INCBIN_EXTERNAL const unsigned int \
+ INCBIN_CONCATENATE( \
+ INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+ INCBIN_STYLE_IDENT(SIZE))
+
+/**
+ * @brief Include a binary file into the current translation unit.
+ *
+ * Includes a binary file into the current translation unit, producing three symbols
+ * for objects that encode the data and size respectively.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name to associate with this binary data (as an identifier.)
+ * @param FILENAME The file to include (as a string literal.)
+ *
+ * @code
+ * INCBIN(Icon, "icon.png");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>IconData[];
+ * // const unsigned char *const <prefix>IconEnd;
+ * // const unsigned int <prefix>IconSize;
+ * @endcode
+ *
+ * @warning This must be used in global scope
+ * @warning The identifiers may be different if INCBIN_STYLE is not default
+ *
+ * To externally reference the data included by this in another translation unit
+ * please @see INCBIN_EXTERN.
+ */
+#ifdef _MSC_VER
+#define INCBIN(NAME, FILENAME) \
+ INCBIN_EXTERN(NAME)
+#else
+#define INCBIN(NAME, FILENAME) \
+ __asm__(INCBIN_SECTION \
+ INCBIN_GLOBAL_LABELS(NAME, DATA) \
+ INCBIN_ALIGN_HOST \
+ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
+ INCBIN_MACRO " \"" FILENAME "\"\n" \
+ INCBIN_GLOBAL_LABELS(NAME, END) \
+ INCBIN_ALIGN_BYTE \
+ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
+ INCBIN_BYTE "1\n" \
+ INCBIN_GLOBAL_LABELS(NAME, SIZE) \
+ INCBIN_ALIGN_HOST \
+ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
+ INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
+ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
+ INCBIN_ALIGN_HOST \
+ ".text\n" \
+ ); \
+ INCBIN_EXTERN(NAME)
+
+#endif
+#endif
std::cout << engine_info() << std::endl;
+ CommandLine::init(argc, argv);
UCI::init(Options);
Tune::init();
PSQT::init();
Value npm_w = pos.non_pawn_material(WHITE);
Value npm_b = pos.non_pawn_material(BLACK);
- Value npm = Utility::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit);
+ Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit);
// Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME]
e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit));
#include <sys/mman.h>
#endif
+#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
+#define POSIXALIGNEDALLOC
+#include <stdlib.h>
+#endif
+
#include "misc.h"
#include "thread.h"
} // namespace
+
/// engine_info() returns the full name of the current Stockfish version. This
/// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
/// the program was compiled) or "Stockfish <Version>", depending on whether
compiler += "\nCompilation settings include: ";
compiler += (Is64Bit ? " 64bit" : " 32bit");
+ #if defined(USE_VNNI)
+ compiler += " VNNI";
+ #endif
#if defined(USE_AVX512)
compiler += " AVX512";
#endif
+ compiler += (HasPext ? " BMI2" : "");
#if defined(USE_AVX2)
compiler += " AVX2";
#endif
- #if defined(USE_SSE42)
- compiler += " SSE42";
- #endif
#if defined(USE_SSE41)
compiler += " SSE41";
#endif
#if defined(USE_SSSE3)
compiler += " SSSE3";
#endif
- #if defined(USE_SSE3)
- compiler += " SSE3";
+ #if defined(USE_SSE2)
+ compiler += " SSE2";
+ #endif
+ compiler += (HasPopCnt ? " POPCNT" : "");
+ #if defined(USE_MMX)
+ compiler += " MMX";
+ #endif
+ #if defined(USE_NEON)
+ compiler += " NEON";
#endif
- compiler += (HasPext ? " BMI2" : "");
- compiler += (HasPopCnt ? " POPCNT" : "");
+
#if !defined(NDEBUG)
compiler += " DEBUG";
#endif
#endif
-/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
-/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free.
-///
+
+/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation
+/// does not guarantee the availability of aligned_alloc(). Memory allocated with
+/// std_aligned_alloc() must be freed with std_aligned_free().
void* std_aligned_alloc(size_t alignment, size_t size) {
-#if defined(__APPLE__)
- return aligned_alloc(alignment, size);
+
+#if defined(POSIXALIGNEDALLOC)
+ void *mem;
+ return posix_memalign(&mem, alignment, size) ? nullptr : mem;
#elif defined(_WIN32)
return _mm_malloc(size, alignment);
#else
}
void std_aligned_free(void* ptr) {
-#if defined(__APPLE__)
+
+#if defined(POSIXALIGNEDALLOC)
free(ptr);
#elif defined(_WIN32)
_mm_free(ptr);
#endif
}
-/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages.
+/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
/// The returned pointer is the aligned one, while the mem argument is the one that needs
/// to be passed to free. With c++17 some of this functionality could be simplified.
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
if (posix_memalign(&mem, alignment, size))
mem = nullptr;
+#if defined(MADV_HUGEPAGE)
madvise(mem, allocSize, MADV_HUGEPAGE);
+#endif
return mem;
}
#endif
} // namespace WinProcGroup
+
+#ifdef _WIN32
+#include <direct.h>
+#define GETCWD _getcwd
+#else
+#include <unistd.h>
+#define GETCWD getcwd
+#endif
+
+namespace CommandLine {
+
+string argv0; // path+name of the executable binary, as given by argv[0]
+string binaryDirectory; // path of the executable directory
+string workingDirectory; // path of the working directory
+string pathSeparator; // Separator for our current OS
+
+void init(int argc, char* argv[]) {
+ (void)argc;
+ string separator;
+
+ // extract the path+name of the executable binary
+ argv0 = argv[0];
+
+#ifdef _WIN32
+ pathSeparator = "\\";
+ #ifdef _MSC_VER
+ // Under windows argv[0] may not have the extension. Also _get_pgmptr() had
+ // issues in some windows 10 versions, so check returned values carefully.
+ char* pgmptr = nullptr;
+ if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
+ argv0 = pgmptr;
+ #endif
+#else
+ pathSeparator = "/";
+#endif
+
+ // extract the working directory
+ workingDirectory = "";
+ char buff[40000];
+ char* cwd = GETCWD(buff, 40000);
+ if (cwd)
+ workingDirectory = cwd;
+
+ // extract the binary directory path from argv0
+ binaryDirectory = argv0;
+ size_t pos = binaryDirectory.find_last_of("\\/");
+ if (pos == std::string::npos)
+ binaryDirectory = "." + pathSeparator;
+ else
+ binaryDirectory.resize(pos + 1);
+
+ // pattern replacement: "./" at the start of path is replaced by the working directory
+ if (binaryDirectory.find("." + pathSeparator) == 0)
+ binaryDirectory.replace(0, 1, workingDirectory);
+}
+
+
+} // namespace CommandLine
void dbg_print();
typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
-
static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
-
inline TimePoint now() {
return std::chrono::duration_cast<std::chrono::milliseconds>
(std::chrono::steady_clock::now().time_since_epoch()).count();
#define sync_cout std::cout << IO_LOCK
#define sync_endl std::endl << IO_UNLOCK
-namespace Utility {
-
-/// Clamp a value between lo and hi. Available in c++17.
-template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
- return v < lo ? lo : v > hi ? hi : v;
-}
-
-}
/// xorshift64star Pseudo-Random Number Generator
/// This class is based on original code written and dedicated
void bindThisThread(size_t idx);
}
+namespace CommandLine {
+ void init(int argc, char* argv[]);
+
+ extern std::string binaryDirectory; // path of the executable directory
+ extern std::string workingDirectory; // path of the working directory
+}
+
#endif // #ifndef MISC_H_INCLUDED
*moveList++ = make_move(ksq, pop_lsb(&b));
if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING))
- for(CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } )
+ for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } )
if (!pos.castling_impeded(cr) && pos.can_castle(cr))
*moveList++ = make<CASTLING>(ksq, pos.castling_rook_square(cr));
}
--endMoves;
++stage;
- /* fallthrough */
+ [[fallthrough]];
case REFUTATION:
if (select<Next>([&](){ return *cur != MOVE_NONE
&& pos.pseudo_legal(*cur); }))
return *(cur - 1);
++stage;
- /* fallthrough */
+ [[fallthrough]];
case QUIET_INIT:
if (!skipQuiets)
}
++stage;
- /* fallthrough */
+ [[fallthrough]];
case QUIET:
if ( !skipQuiets
endMoves = endBadCaptures;
++stage;
- /* fallthrough */
+ [[fallthrough]];
case BAD_CAPTURE:
return select<Next>([](){ return true; });
score<EVASIONS>();
++stage;
- /* fallthrough */
+ [[fallthrough]];
case EVASION:
return select<Best>([](){ return true; });
return MOVE_NONE;
++stage;
- /* fallthrough */
+ [[fallthrough]];
case QCHECK_INIT:
cur = moves;
endMoves = generate<QUIET_CHECKS>(pos, cur);
++stage;
- /* fallthrough */
+ [[fallthrough]];
case QCHECK:
return select<Next>([](){ return true; });
/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
typedef Stats<int16_t, 10692, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
-/// At higher depths LowPlyHistory records successful quiet moves near the root and quiet
-/// moves which are/were in the PV (ttPv)
-/// It is cleared with each new search and filled during iterative deepening
+/// At higher depths LowPlyHistory records successful quiet moves near the root
+/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new
+/// search and filled during iterative deepening.
constexpr int MAX_LPH = 4;
typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
// Code for calculating NNUE evaluation function
-#include <fstream>
#include <iostream>
#include <set>
#include "evaluate_nnue.h"
-ExtPieceSquare kpp_board_index[PIECE_NB] = {
- // convention: W - us, B - them
- // viewed from other side, W and B are reversed
- { PS_NONE, PS_NONE },
- { PS_W_PAWN, PS_B_PAWN },
- { PS_W_KNIGHT, PS_B_KNIGHT },
- { PS_W_BISHOP, PS_B_BISHOP },
- { PS_W_ROOK, PS_B_ROOK },
- { PS_W_QUEEN, PS_B_QUEEN },
- { PS_W_KING, PS_B_KING },
- { PS_NONE, PS_NONE },
- { PS_NONE, PS_NONE },
- { PS_B_PAWN, PS_W_PAWN },
- { PS_B_KNIGHT, PS_W_KNIGHT },
- { PS_B_BISHOP, PS_W_BISHOP },
- { PS_B_ROOK, PS_W_ROOK },
- { PS_B_QUEEN, PS_W_QUEEN },
- { PS_B_KING, PS_W_KING },
- { PS_NONE, PS_NONE }
-};
-
-
namespace Eval::NNUE {
+ uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
+ // convention: W - us, B - them
+ // viewed from other side, W and B are reversed
+ { PS_NONE, PS_NONE },
+ { PS_W_PAWN, PS_B_PAWN },
+ { PS_W_KNIGHT, PS_B_KNIGHT },
+ { PS_W_BISHOP, PS_B_BISHOP },
+ { PS_W_ROOK, PS_B_ROOK },
+ { PS_W_QUEEN, PS_B_QUEEN },
+ { PS_W_KING, PS_B_KING },
+ { PS_NONE, PS_NONE },
+ { PS_NONE, PS_NONE },
+ { PS_B_PAWN, PS_W_PAWN },
+ { PS_B_KNIGHT, PS_W_KNIGHT },
+ { PS_B_BISHOP, PS_W_BISHOP },
+ { PS_B_ROOK, PS_W_ROOK },
+ { PS_B_QUEEN, PS_W_QUEEN },
+ { PS_B_KING, PS_W_KING },
+ { PS_NONE, PS_NONE }
+ };
+
// Input feature converter
AlignedPtr<FeatureTransformer> feature_transformer;
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
std::uint32_t header;
- stream.read(reinterpret_cast<char*>(&header), sizeof(header));
+ header = read_little_endian<std::uint32_t>(stream);
if (!stream || header != T::GetHashValue()) return false;
return pointer->ReadParameters(stream);
}
}
// Read network header
- bool ReadHeader(std::istream& stream,
- std::uint32_t* hash_value, std::string* architecture) {
-
+ bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
+ {
std::uint32_t version, size;
- stream.read(reinterpret_cast<char*>(&version), sizeof(version));
- stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
- stream.read(reinterpret_cast<char*>(&size), sizeof(size));
+
+ version = read_little_endian<std::uint32_t>(stream);
+ *hash_value = read_little_endian<std::uint32_t>(stream);
+ size = read_little_endian<std::uint32_t>(stream);
if (!stream || version != kVersion) return false;
architecture->resize(size);
stream.read(&(*architecture)[0], size);
return stream && stream.peek() == std::ios::traits_type::eof();
}
- // Proceed with the difference calculation if possible
- static void UpdateAccumulatorIfPossible(const Position& pos) {
-
- feature_transformer->UpdateAccumulatorIfPossible(pos);
- }
-
- // Calculate the evaluation value
- static Value ComputeScore(const Position& pos, bool refresh) {
-
- auto& accumulator = pos.state()->accumulator;
- if (!refresh && accumulator.computed_score) {
- return accumulator.score;
- }
+ // Evaluation function. Perform differential calculation.
+ Value evaluate(const Position& pos) {
alignas(kCacheLineSize) TransformedFeatureType
transformed_features[FeatureTransformer::kBufferSize];
- feature_transformer->Transform(pos, transformed_features, refresh);
+ feature_transformer->Transform(pos, transformed_features);
alignas(kCacheLineSize) char buffer[Network::kBufferSize];
const auto output = network->Propagate(transformed_features, buffer);
- auto score = static_cast<Value>(output[0] / FV_SCALE);
-
- accumulator.score = score;
- accumulator.computed_score = true;
- return accumulator.score;
+ return static_cast<Value>(output[0] / FV_SCALE);
}
- // Load the evaluation function file
- bool load_eval_file(const std::string& evalFile) {
+ // Load eval, from a file stream or a memory stream
+ bool load_eval(std::string streamName, std::istream& stream) {
Initialize();
- fileName = evalFile;
-
- std::ifstream stream(evalFile, std::ios::binary);
-
- const bool result = ReadParameters(stream);
-
- return result;
- }
-
- // Evaluation function. Perform differential calculation.
- Value evaluate(const Position& pos) {
- Value v = ComputeScore(pos, false);
- v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
-
- return v;
- }
-
- // Evaluation function. Perform full calculation.
- Value compute_eval(const Position& pos) {
- return ComputeScore(pos, true);
- }
-
- // Proceed with the difference calculation if possible
- void update_eval(const Position& pos) {
- UpdateAccumulatorIfPossible(pos);
+ fileName = streamName;
+ return ReadParameters(stream);
}
} // namespace Eval::NNUE
reset[perspective] = false;
switch (trigger) {
case TriggerEvent::kFriendKingMoved:
- reset[perspective] =
- dp.pieceId[0] == PIECE_ID_KING + perspective;
+ reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
break;
default:
assert(false);
namespace Eval::NNUE::Features {
- // Find the index of the feature quantity from the king position and PieceSquare
- template <Side AssociatedKing>
- inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
- return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
+ // Orient a square according to perspective (rotates by 180 for black)
+ inline Square orient(Color perspective, Square s) {
+ return Square(int(s) ^ (bool(perspective) * 63));
}
- // Get pieces information
+ // Find the index of the feature quantity from the king position and PieceSquare
template <Side AssociatedKing>
- inline void HalfKP<AssociatedKing>::GetPieces(
- const Position& pos, Color perspective,
- PieceSquare** pieces, Square* sq_target_k) {
+ inline IndexType HalfKP<AssociatedKing>::MakeIndex(
+ Color perspective, Square s, Piece pc, Square ksq) {
- *pieces = (perspective == BLACK) ?
- pos.eval_list()->piece_list_fb() :
- pos.eval_list()->piece_list_fw();
- const PieceId target = (AssociatedKing == Side::kFriend) ?
- static_cast<PieceId>(PIECE_ID_KING + perspective) :
- static_cast<PieceId>(PIECE_ID_KING + ~perspective);
- *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
+ return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq);
}
// Get a list of indices for active features
void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
- // Do nothing if array size is small to avoid compiler warning
- if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
- PieceSquare* pieces;
- Square sq_target_k;
- GetPieces(pos, perspective, &pieces, &sq_target_k);
- for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
- if (pieces[i] != PS_NONE) {
- active->push_back(MakeIndex(sq_target_k, pieces[i]));
- }
+ Square ksq = orient(perspective, pos.square<KING>(perspective));
+ Bitboard bb = pos.pieces() & ~pos.pieces(KING);
+ while (bb) {
+ Square s = pop_lsb(&bb);
+ active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
}
}
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
- PieceSquare* pieces;
- Square sq_target_k;
- GetPieces(pos, perspective, &pieces, &sq_target_k);
+ Square ksq = orient(perspective, pos.square<KING>(perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
- if (dp.pieceId[i] >= PIECE_ID_KING) continue;
- const auto old_p = static_cast<PieceSquare>(
- dp.old_piece[i].from[perspective]);
- if (old_p != PS_NONE) {
- removed->push_back(MakeIndex(sq_target_k, old_p));
- }
- const auto new_p = static_cast<PieceSquare>(
- dp.new_piece[i].from[perspective]);
- if (new_p != PS_NONE) {
- added->push_back(MakeIndex(sq_target_k, new_p));
- }
+ Piece pc = dp.piece[i];
+ if (type_of(pc) == KING) continue;
+ if (dp.from[i] != SQ_NONE)
+ removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
+ if (dp.to[i] != SQ_NONE)
+ added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
}
}
static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
// Maximum number of simultaneously active features
- static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
+ static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
- // Index of a feature for a given king position and another piece on some square
- static IndexType MakeIndex(Square sq_k, PieceSquare p);
-
private:
- // Get pieces information
- static void GetPieces(const Position& pos, Color perspective,
- PieceSquare** pieces, Square* sq_target_k);
+ // Index of a feature for a given king position and another piece on some square
+ static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
};
} // namespace Eval::NNUE::Features
// Read network parameters
bool ReadParameters(std::istream& stream) {
if (!previous_layer_.ReadParameters(stream)) return false;
- stream.read(reinterpret_cast<char*>(biases_),
- kOutputDimensions * sizeof(BiasType));
- stream.read(reinterpret_cast<char*>(weights_),
- kOutputDimensions * kPaddedInputDimensions *
- sizeof(WeightType));
+ for (std::size_t i = 0; i < kOutputDimensions; ++i)
+ biases_[i] = read_little_endian<BiasType>(stream);
+ for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
+ weights_[i] = read_little_endian<WeightType>(stream);
return !stream.fail();
}
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
- const __m512i kOnes = _mm512_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m512i*>(input);
+ #if !defined(USE_VNNI)
+ const __m512i kOnes = _mm512_set1_epi16(1);
+ #endif
#elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
- const __m256i kOnes = _mm256_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m256i*>(input);
+ #if !defined(USE_VNNI)
+ const __m256i kOnes = _mm256_set1_epi16(1);
+ #endif
- #elif defined(USE_SSSE3)
+ #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+ #ifndef USE_SSSE3
+ const __m128i kZeros = _mm_setzero_si128();
+ #else
const __m128i kOnes = _mm_set1_epi16(1);
+ #endif
const auto input_vector = reinterpret_cast<const __m128i*>(input);
+ #elif defined(USE_MMX)
+ constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+ const __m64 kZeros = _mm_setzero_si64();
+ const auto input_vector = reinterpret_cast<const __m64*>(input);
+
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
__m512i sum = _mm512_setzero_si512();
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+ #if defined(USE_VNNI)
+ sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#else
- __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
- #endif
-
+ __m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
product = _mm512_madd_epi16(product, kOnes);
sum = _mm512_add_epi32(sum, product);
+ #endif
}
- output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
// and we have to do one more 256bit chunk.
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
{
- const auto iv_256 = reinterpret_cast<const __m256i*>(input);
- const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
- int j = kNumChunks * 2;
-
- #if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2.
- __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
+ const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
+ const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
+ #if defined(USE_VNNI)
+ __m256i product256 = _mm256_dpbusd_epi32(
+ _mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
+ sum = _mm512_inserti32x8(sum, product256, 0);
#else
- __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
+ __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
+ sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
#endif
-
- sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
- sum256 = _mm256_hadd_epi32(sum256, sum256);
- sum256 = _mm256_hadd_epi32(sum256, sum256);
- const __m128i lo = _mm256_extracti128_si256(sum256, 0);
- const __m128i hi = _mm256_extracti128_si256(sum256, 1);
- output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
}
+ output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
#elif defined(USE_AVX2)
__m256i sum = _mm256_setzero_si256();
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
- __m256i product = _mm256_maddubs_epi16(
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
- // compiled with g++ in MSYS2 crashes here because the output memory is not aligned
- // even though alignas is specified.
- _mm256_loadu_si256
+ #if defined(USE_VNNI)
+ sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
#else
- _mm256_load_si256
- #endif
-
- (&input_vector[j]), _mm256_load_si256(&row[j]));
+ __m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
product = _mm256_madd_epi16(product, kOnes);
sum = _mm256_add_epi32(sum, product);
+ #endif
}
- sum = _mm256_hadd_epi32(sum, sum);
- sum = _mm256_hadd_epi32(sum, sum);
- const __m128i lo = _mm256_extracti128_si256(sum, 0);
- const __m128i hi = _mm256_extracti128_si256(sum, 1);
- output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
+ __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
+ sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
+ sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
+ output[i] = _mm_cvtsi128_si32(sum128) + biases_[i];
#elif defined(USE_SSSE3)
- __m128i sum = _mm_cvtsi32_si128(biases_[i]);
+ __m128i sum = _mm_setzero_si128();
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
- for (IndexType j = 0; j < kNumChunks; ++j) {
- __m128i product = _mm_maddubs_epi16(
- _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+ for (int j = 0; j < (int)kNumChunks - 1; j += 2) {
+ __m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+ product0 = _mm_madd_epi16(product0, kOnes);
+ sum = _mm_add_epi32(sum, product0);
+ __m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1]));
+ product1 = _mm_madd_epi16(product1, kOnes);
+ sum = _mm_add_epi32(sum, product1);
+ }
+ if (kNumChunks & 0x1) {
+ __m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1]));
product = _mm_madd_epi16(product, kOnes);
sum = _mm_add_epi32(sum, product);
}
- sum = _mm_hadd_epi32(sum, sum);
- sum = _mm_hadd_epi32(sum, sum);
+ sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
+ sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
+ output[i] = _mm_cvtsi128_si32(sum) + biases_[i];
+
+ #elif defined(USE_SSE2)
+ __m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
+ __m128i sum_hi = kZeros;
+ const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+ for (IndexType j = 0; j < kNumChunks; ++j) {
+ __m128i row_j = _mm_load_si128(&row[j]);
+ __m128i input_j = _mm_load_si128(&input_vector[j]);
+ __m128i row_signs = _mm_cmpgt_epi8(kZeros, row_j);
+ __m128i extended_row_lo = _mm_unpacklo_epi8(row_j, row_signs);
+ __m128i extended_row_hi = _mm_unpackhi_epi8(row_j, row_signs);
+ __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
+ __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
+ __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
+ __m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi);
+ sum_lo = _mm_add_epi32(sum_lo, product_lo);
+ sum_hi = _mm_add_epi32(sum_hi, product_hi);
+ }
+ __m128i sum = _mm_add_epi32(sum_lo, sum_hi);
+ __m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
+ sum = _mm_add_epi32(sum, sum_high_64);
+ __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
+ sum = _mm_add_epi32(sum, sum_second_32);
output[i] = _mm_cvtsi128_si32(sum);
+ #elif defined(USE_MMX)
+ __m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
+ __m64 sum_hi = kZeros;
+ const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
+ for (IndexType j = 0; j < kNumChunks; ++j) {
+ __m64 row_j = row[j];
+ __m64 input_j = input_vector[j];
+ __m64 row_signs = _mm_cmpgt_pi8(kZeros, row_j);
+ __m64 extended_row_lo = _mm_unpacklo_pi8(row_j, row_signs);
+ __m64 extended_row_hi = _mm_unpackhi_pi8(row_j, row_signs);
+ __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
+ __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
+ __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
+ __m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi);
+ sum_lo = _mm_add_pi32(sum_lo, product_lo);
+ sum_hi = _mm_add_pi32(sum_hi, product_hi);
+ }
+ __m64 sum = _mm_add_pi32(sum_lo, sum_hi);
+ sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
+ output[i] = _mm_cvtsi64_si32(sum);
+
#elif defined(USE_NEON)
int32x4_t sum = {biases_[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
#endif
}
+ #if defined(USE_MMX)
+ _mm_empty();
+ #endif
return output;
}
const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
- // compiled with g++ in MSYS2 crashes here because the output memory is not aligned
- // even though alignas is specified.
- _mm256_loadu_si256
- #else
- _mm256_load_si256
- #endif
-
- (&in[i * 4 + 0]),
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- _mm256_loadu_si256
- #else
- _mm256_load_si256
- #endif
-
- (&in[i * 4 + 1])), kWeightScaleBits);
+ _mm256_loadA_si256(&in[i * 4 + 0]),
+ _mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits);
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- _mm256_loadu_si256
- #else
- _mm256_load_si256
- #endif
-
- (&in[i * 4 + 2]),
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- _mm256_loadu_si256
- #else
- _mm256_load_si256
- #endif
-
- (&in[i * 4 + 3])), kWeightScaleBits);
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- _mm256_storeu_si256
- #else
- _mm256_store_si256
- #endif
-
- (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+ _mm256_loadA_si256(&in[i * 4 + 2]),
+ _mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits);
+ _mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_packs_epi16(words0, words1), kZero), kOffsets));
}
constexpr IndexType kStart = kNumChunks * kSimdWidth;
- #elif defined(USE_SSSE3)
+ #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
#ifdef USE_SSE41
}
constexpr IndexType kStart = kNumChunks * kSimdWidth;
+ #elif defined(USE_MMX)
+ constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+ const __m64 k0x80s = _mm_set1_pi8(-128);
+ const auto in = reinterpret_cast<const __m64*>(input);
+ const auto out = reinterpret_cast<__m64*>(output);
+ for (IndexType i = 0; i < kNumChunks; ++i) {
+ const __m64 words0 = _mm_srai_pi16(
+ _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]),
+ kWeightScaleBits);
+ const __m64 words1 = _mm_srai_pi16(
+ _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]),
+ kWeightScaleBits);
+ const __m64 packedbytes = _mm_packs_pi16(words0, words1);
+ out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
+ }
+ _mm_empty();
+ constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
namespace Eval::NNUE {
// Class that holds the result of affine transformation of input features
- struct alignas(32) Accumulator {
+ struct alignas(kCacheLineSize) Accumulator {
std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
- Value score;
bool computed_accumulation;
- bool computed_score;
};
} // namespace Eval::NNUE
#ifndef NNUE_COMMON_H_INCLUDED
#define NNUE_COMMON_H_INCLUDED
+#include <cstring>
+#include <iostream>
+
#if defined(USE_AVX2)
#include <immintrin.h>
#elif defined(USE_SSE2)
#include <emmintrin.h>
+#elif defined(USE_MMX)
+#include <mmintrin.h>
+
#elif defined(USE_NEON)
#include <arm_neon.h>
#endif
+// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary
+// compiled with older g++ crashes because the output memory is not aligned
+// even though alignas is specified.
+#if defined(USE_AVX2)
+#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
+#define _mm256_loadA_si256 _mm256_loadu_si256
+#define _mm256_storeA_si256 _mm256_storeu_si256
+#else
+#define _mm256_loadA_si256 _mm256_load_si256
+#define _mm256_storeA_si256 _mm256_store_si256
+#endif
+#endif
+
+#if defined(USE_AVX512)
+#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
+#define _mm512_loadA_si512 _mm512_loadu_si512
+#define _mm512_storeA_si512 _mm512_storeu_si512
+#else
+#define _mm512_loadA_si512 _mm512_load_si512
+#define _mm512_storeA_si512 _mm512_store_si512
+#endif
+#endif
+
namespace Eval::NNUE {
// Version of the evaluation file
#elif defined(USE_SSE2)
constexpr std::size_t kSimdWidth = 16;
+ #elif defined(USE_MMX)
+ constexpr std::size_t kSimdWidth = 8;
+
#elif defined(USE_NEON)
constexpr std::size_t kSimdWidth = 16;
#endif
constexpr std::size_t kMaxSimdWidth = 32;
+ // unique number for each piece type on each square
+ enum {
+ PS_NONE = 0,
+ PS_W_PAWN = 1,
+ PS_B_PAWN = 1 * SQUARE_NB + 1,
+ PS_W_KNIGHT = 2 * SQUARE_NB + 1,
+ PS_B_KNIGHT = 3 * SQUARE_NB + 1,
+ PS_W_BISHOP = 4 * SQUARE_NB + 1,
+ PS_B_BISHOP = 5 * SQUARE_NB + 1,
+ PS_W_ROOK = 6 * SQUARE_NB + 1,
+ PS_B_ROOK = 7 * SQUARE_NB + 1,
+ PS_W_QUEEN = 8 * SQUARE_NB + 1,
+ PS_B_QUEEN = 9 * SQUARE_NB + 1,
+ PS_W_KING = 10 * SQUARE_NB + 1,
+ PS_END = PS_W_KING, // pieces without kings (pawns included)
+ PS_B_KING = 11 * SQUARE_NB + 1,
+ PS_END2 = 12 * SQUARE_NB + 1
+ };
+
+ extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
+
// Type of input feature after conversion
using TransformedFeatureType = std::uint8_t;
using IndexType = std::uint32_t;
// Round n up to be a multiple of base
template <typename IntType>
constexpr IntType CeilToMultiple(IntType n, IntType base) {
- return (n + base - 1) / base * base;
+ return (n + base - 1) / base * base;
+ }
+
+ // read_little_endian() is our utility to read an integer (signed or unsigned, any size)
+ // from a stream in little-endian order. We swap the byte order after the read if
+ // necessary to return a result with the byte ordering of the compiling machine.
+ template <typename IntType>
+ inline IntType read_little_endian(std::istream& stream) {
+
+ IntType result;
+ std::uint8_t u[sizeof(IntType)];
+ typename std::make_unsigned<IntType>::type v = 0;
+
+ stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
+ for (std::size_t i = 0; i < sizeof(IntType); ++i)
+ v = (v << 8) | u[sizeof(IntType) - i - 1];
+
+ std::memcpy(&result, &v, sizeof(IntType));
+ return result;
}
} // namespace Eval::NNUE
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
+
return RawFeatures::kHashValue ^ kOutputDimensions;
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
- stream.read(reinterpret_cast<char*>(biases_),
- kHalfDimensions * sizeof(BiasType));
- stream.read(reinterpret_cast<char*>(weights_),
- kHalfDimensions * kInputDimensions * sizeof(WeightType));
+
+ for (std::size_t i = 0; i < kHalfDimensions; ++i)
+ biases_[i] = read_little_endian<BiasType>(stream);
+ for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
+ weights_[i] = read_little_endian<WeightType>(stream);
return !stream.fail();
}
// Proceed with the difference calculation if possible
bool UpdateAccumulatorIfPossible(const Position& pos) const {
+
const auto now = pos.state();
- if (now->accumulator.computed_accumulation) {
+ if (now->accumulator.computed_accumulation)
return true;
- }
+
const auto prev = now->previous;
if (prev && prev->accumulator.computed_accumulation) {
UpdateAccumulator(pos);
return true;
}
+
return false;
}
// Convert input features
- void Transform(const Position& pos, OutputType* output, bool refresh) const {
- if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+ void Transform(const Position& pos, OutputType* output) const {
+
+ if (!UpdateAccumulatorIfPossible(pos))
RefreshAccumulator(pos);
- }
+
const auto& accumulation = pos.state()->accumulator.accumulation;
#if defined(USE_AVX2)
constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256();
- #elif defined(USE_SSSE3)
+ #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#ifdef USE_SSE41
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
+ #elif defined(USE_MMX)
+ constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+ const __m64 k0x80s = _mm_set1_pi8(-128);
+
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
#if defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
- __m256i sum0 =
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
- // compiled with g++ in MSYS2 crashes here because the output memory is not aligned
- // even though alignas is specified.
- _mm256_loadu_si256
- #else
- _mm256_load_si256
- #endif
-
- (&reinterpret_cast<const __m256i*>(
- accumulation[perspectives[p]][0])[j * 2 + 0]);
- __m256i sum1 =
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- _mm256_loadu_si256
- #else
- _mm256_load_si256
- #endif
-
- (&reinterpret_cast<const __m256i*>(
- accumulation[perspectives[p]][0])[j * 2 + 1]);
-
- #if defined(__MINGW32__) || defined(__MINGW64__)
- _mm256_storeu_si256
- #else
- _mm256_store_si256
- #endif
-
- (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
+ __m256i sum0 = _mm256_loadA_si256(
+ &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
+ __m256i sum1 = _mm256_loadA_si256(
+ &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
+ _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
}
- #elif defined(USE_SSSE3)
+ #elif defined(USE_SSE2)
auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
);
}
+ #elif defined(USE_MMX)
+ auto out = reinterpret_cast<__m64*>(&output[offset]);
+ for (IndexType j = 0; j < kNumChunks; ++j) {
+ __m64 sum0 = *(&reinterpret_cast<const __m64*>(
+ accumulation[perspectives[p]][0])[j * 2 + 0]);
+ __m64 sum1 = *(&reinterpret_cast<const __m64*>(
+ accumulation[perspectives[p]][0])[j * 2 + 1]);
+ const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
+ out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
+ }
+
#elif defined(USE_NEON)
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
#endif
}
+ #if defined(USE_MMX)
+ _mm_empty();
+ #endif
}
private:
// Calculate cumulative value without using difference calculation
void RefreshAccumulator(const Position& pos) const {
+
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
Features::IndexList active_indices[2];
kHalfDimensions * sizeof(BiasType));
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
+ #if defined(USE_AVX512)
+ auto accumulation = reinterpret_cast<__m512i*>(
+ &accumulator.accumulation[perspective][i][0]);
+ auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
+ constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+ for (IndexType j = 0; j < kNumChunks; ++j)
+ _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
- #if defined(USE_AVX2)
+ #elif defined(USE_AVX2)
auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
- for (IndexType j = 0; j < kNumChunks; ++j) {
- #if defined(__MINGW32__) || defined(__MINGW64__)
- _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
- #else
- accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
- #endif
- }
+ for (IndexType j = 0; j < kNumChunks; ++j)
+ _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
#elif defined(USE_SSE2)
auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
- }
+
+ #elif defined(USE_MMX)
+ auto accumulation = reinterpret_cast<__m64*>(
+ &accumulator.accumulation[perspective][i][0]);
+ auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
+ constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+ for (IndexType j = 0; j < kNumChunks; ++j)
+ accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
#elif defined(USE_NEON)
auto accumulation = reinterpret_cast<int16x8_t*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
- }
#else
- for (IndexType j = 0; j < kHalfDimensions; ++j) {
+ for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
- }
#endif
}
}
+ #if defined(USE_MMX)
+ _mm_empty();
+ #endif
accumulator.computed_accumulation = true;
- accumulator.computed_score = false;
}
// Calculate cumulative value using difference calculation
void UpdateAccumulator(const Position& pos) const {
+
const auto prev_accumulator = pos.state()->previous->accumulator;
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]);
+ #elif defined(USE_MMX)
+ constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+ auto accumulation = reinterpret_cast<__m64*>(
+ &accumulator.accumulation[perspective][i][0]);
+
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<int16x8_t*>(
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
- }
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
- }
+
+ #elif defined(USE_MMX)
+ auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
+ for (IndexType j = 0; j < kNumChunks; ++j)
+ accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vsubq_s16(accumulation[j], column[j]);
- }
#else
- for (IndexType j = 0; j < kHalfDimensions; ++j) {
- accumulator.accumulation[perspective][i][j] -=
- weights_[offset + j];
- }
+ for (IndexType j = 0; j < kHalfDimensions; ++j)
+ accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
#endif
}
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
- }
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
- }
+
+ #elif defined(USE_MMX)
+ auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
+ for (IndexType j = 0; j < kNumChunks; ++j)
+ accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
- for (IndexType j = 0; j < kNumChunks; ++j) {
+ for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
- }
#else
- for (IndexType j = 0; j < kHalfDimensions; ++j) {
- accumulator.accumulation[perspective][i][j] +=
- weights_[offset + j];
- }
+ for (IndexType j = 0; j < kHalfDimensions; ++j)
+ accumulator.accumulation[perspective][i][j] += weights_[offset + j];
#endif
}
}
}
+ #if defined(USE_MMX)
+ _mm_empty();
+ #endif
accumulator.computed_accumulation = true;
- accumulator.computed_score = false;
}
using BiasType = std::int16_t;
Score bonus = make_score(5, 5);
- File center = Utility::clamp(file_of(ksq), FILE_B, FILE_G);
+ File center = std::clamp(file_of(ksq), FILE_B, FILE_G);
for (File f = File(center - 1); f <= File(center + 1); ++f)
{
b = ourPawns & file_bb(f);
std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
st = si;
- // Each piece on board gets a unique ID used to track the piece later
- PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
-
ss >> std::noskipws;
// 1. Piece placement
else if (token == '/')
sq += 2 * SOUTH;
- else if ((idx = PieceToChar.find(token)) != string::npos)
- {
- auto pc = Piece(idx);
- put_piece(pc, sq);
-
- if (Eval::useNNUE)
- {
- // Kings get a fixed ID, other pieces get ID in order of placement
- piece_id =
- (idx == W_KING) ? PIECE_ID_WKING :
- (idx == B_KING) ? PIECE_ID_BKING :
- next_piece_id++;
- evalList.put_piece(piece_id, sq, pc);
- }
-
+ else if ((idx = PieceToChar.find(token)) != string::npos) {
+ put_piece(Piece(idx), sq);
++sq;
}
}
// Used by NNUE
st->accumulator.computed_accumulation = false;
- st->accumulator.computed_score = false;
- PieceId dp0 = PIECE_ID_NONE;
- PieceId dp1 = PIECE_ID_NONE;
auto& dp = st->dirtyPiece;
dp.dirty_num = 1;
if (Eval::useNNUE)
{
- dp.dirty_num = 2; // 2 pieces moved
- dp1 = piece_id_on(capsq);
- dp.pieceId[1] = dp1;
- dp.old_piece[1] = evalList.piece_with_id(dp1);
- evalList.put_piece(dp1, capsq, NO_PIECE);
- dp.new_piece[1] = evalList.piece_with_id(dp1);
+ dp.dirty_num = 2; // 1 piece moved, 1 piece captured
+ dp.piece[1] = captured;
+ dp.from[1] = capsq;
+ dp.to[1] = SQ_NONE;
}
// Update board and piece lists
{
if (Eval::useNNUE)
{
- dp0 = piece_id_on(from);
- dp.pieceId[0] = dp0;
- dp.old_piece[0] = evalList.piece_with_id(dp0);
- evalList.put_piece(dp0, to, pc);
- dp.new_piece[0] = evalList.piece_with_id(dp0);
+ dp.piece[0] = pc;
+ dp.from[0] = from;
+ dp.to[0] = to;
}
move_piece(from, to);
if (Eval::useNNUE)
{
- dp0 = piece_id_on(to);
- evalList.put_piece(dp0, to, promotion);
- dp.new_piece[0] = evalList.piece_with_id(dp0);
+ // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
+ dp.to[0] = SQ_NONE;
+ dp.piece[dp.dirty_num] = promotion;
+ dp.from[dp.dirty_num] = SQ_NONE;
+ dp.to[dp.dirty_num] = to;
+ dp.dirty_num++;
}
// Update hash keys
{
move_piece(to, from); // Put the piece back at the source square
- if (Eval::useNNUE)
- {
- PieceId dp0 = st->dirtyPiece.pieceId[0];
- evalList.put_piece(dp0, from, pc);
- }
-
if (st->capturedPiece)
{
Square capsq = to;
}
put_piece(st->capturedPiece, capsq); // Restore the captured piece
-
- if (Eval::useNNUE)
- {
- PieceId dp1 = st->dirtyPiece.pieceId[1];
- assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE);
- assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE);
- evalList.put_piece(dp1, capsq, st->capturedPiece);
- }
}
}
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
- if (Eval::useNNUE)
+ if (Do && Eval::useNNUE)
{
- PieceId dp0, dp1;
auto& dp = st->dirtyPiece;
- dp.dirty_num = 2; // 2 pieces moved
-
- if (Do)
- {
- dp0 = piece_id_on(from);
- dp1 = piece_id_on(rfrom);
- dp.pieceId[0] = dp0;
- dp.old_piece[0] = evalList.piece_with_id(dp0);
- evalList.put_piece(dp0, to, make_piece(us, KING));
- dp.new_piece[0] = evalList.piece_with_id(dp0);
- dp.pieceId[1] = dp1;
- dp.old_piece[1] = evalList.piece_with_id(dp1);
- evalList.put_piece(dp1, rto, make_piece(us, ROOK));
- dp.new_piece[1] = evalList.piece_with_id(dp1);
- }
- else
- {
- dp0 = piece_id_on(to);
- dp1 = piece_id_on(rto);
- evalList.put_piece(dp0, from, make_piece(us, KING));
- evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
- }
+ dp.piece[0] = make_piece(us, KING);
+ dp.from[0] = from;
+ dp.to[0] = to;
+ dp.piece[1] = make_piece(us, ROOK);
+ dp.from[1] = rfrom;
+ dp.to[1] = rto;
+ dp.dirty_num = 2;
}
// Remove both pieces first since squares could overlap in Chess960
if (Eval::useNNUE)
{
std::memcpy(&newSt, st, sizeof(StateInfo));
- st->accumulator.computed_score = false;
}
else
std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
// Don't allow pinned pieces to attack (except the king) as long as
// there are pinners on their original square.
- if (st->pinners[~stm] & occupied)
- stmAttackers &= ~st->blockersForKing[stm];
+ if (pinners(~stm) & occupied)
+ stmAttackers &= ~blockers_for_king(stm);
if (!stmAttackers)
break;
Bitboard checkers() const;
Bitboard blockers_for_king(Color c) const;
Bitboard check_squares(PieceType pt) const;
+ Bitboard pinners(Color c) const;
bool is_discovery_check_on_king(Color c, Move m) const;
// Attacks to/from a given square
// Used by NNUE
StateInfo* state() const;
- const EvalList* eval_list() const;
private:
// Initialization helpers (used while setting up a position)
template<bool Do>
void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
- // ID of a piece on a given square
- PieceId piece_id_on(Square sq) const;
-
// Data members
Piece board[SQUARE_NB];
Bitboard byTypeBB[PIECE_TYPE_NB];
Thread* thisThread;
StateInfo* st;
bool chess960;
-
- // List of pieces used in NNUE evaluation function
- EvalList evalList;
};
namespace PSQT {
return st->blockersForKing[c];
}
+inline Bitboard Position::pinners(Color c) const {
+ return st->pinners[c];
+}
+
inline Bitboard Position::check_squares(PieceType pt) const {
return st->checkSquares[pt];
}
return st;
}
-inline const EvalList* Position::eval_list() const {
-
- return &evalList;
-}
-
-inline PieceId Position::piece_id_on(Square sq) const
-{
-
- assert(piece_on(sq) != NO_PIECE);
-
- PieceId pid = evalList.piece_id_list[sq];
- assert(is_ok(pid));
-
- return pid;
-}
-
#endif // #ifndef POSITION_H_INCLUDED
constexpr uint64_t TtHitAverageResolution = 1024;
// Razor and futility margins
- constexpr int RazorMargin = 527;
+ constexpr int RazorMargin = 510;
Value futility_margin(Depth d, bool improving) {
- return Value(227 * (d - improving));
+ return Value(223 * (d - improving));
}
// Reductions lookup table, initialized at startup
Depth reduction(bool i, Depth d, int mn) {
int r = Reductions[d] * Reductions[mn];
- return (r + 570) / 1024 + (!i && r > 1018);
+ return (r + 509) / 1024 + (!i && r > 894);
}
constexpr int futility_move_count(bool improving, Depth depth) {
// History and stats update bonus, based on depth
int stat_bonus(Depth d) {
- return d > 15 ? 27 : 17 * d * d + 133 * d - 134;
+ return d > 13 ? 29 : 17 * d * d + 134 * d - 134;
}
// Add a small random component to draw evaluations to avoid 3fold-blindness
void Search::init() {
for (int i = 1; i < MAX_MOVES; ++i)
- Reductions[i] = int((24.8 + std::log(Threads.size())) * std::log(i));
+ Reductions[i] = int((22.0 + std::log(Threads.size())) * std::log(i));
}
// for match (TC 60+0.6) results spanning a wide range of k values.
PRNG rng(now());
double floatLevel = Options["UCI_LimitStrength"] ?
- Utility::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
+ std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
double(Options["Skill Level"]);
int intLevel = int(floatLevel) +
((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0);
if (rootDepth >= 4)
{
Value prev = rootMoves[pvIdx].previousScore;
- delta = Value(19);
+ delta = Value(17);
alpha = std::max(prev - delta,-VALUE_INFINITE);
beta = std::min(prev + delta, VALUE_INFINITE);
// Adjust contempt based on root move's previousScore (dynamic contempt)
- int dct = ct + (110 - ct / 2) * prev / (abs(prev) + 140);
+ int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149);
contempt = (us == WHITE ? make_score(dct, dct / 2)
: -make_score(dct, dct / 2));
&& !Threads.stop
&& !mainThread->stopOnPonderhit)
{
- double fallingEval = (296 + 6 * (mainThread->bestPreviousScore - bestValue)
- + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 725.0;
- fallingEval = Utility::clamp(fallingEval, 0.5, 1.5);
+ double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue)
+ + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0;
+ fallingEval = std::clamp(fallingEval, 0.5, 1.5);
// If the bestMove is stable over several iterations, reduce time accordingly
- timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.92 : 0.95;
- double reduction = (1.47 + mainThread->previousTimeReduction) / (2.22 * timeReduction);
+ timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95;
+ double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction);
// Use part of the gained time from a previous stable move for the current move
for (Thread* th : Threads)
}
else if ( Threads.increaseDepth
&& !mainThread->ponder
- && Time.elapsed() > totalTime * 0.56)
+ && Time.elapsed() > totalTime * 0.58)
Threads.increaseDepth = false;
else
Threads.increaseDepth = true;
Key posKey;
Move ttMove, move, excludedMove, bestMove;
Depth extension, newDepth;
- Value bestValue, value, ttValue, eval, maxValue, probcutBeta;
- bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture;
+ Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
+ bool formerPv, givesCheck, improving, didLMR, priorCapture;
bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
ttCapture, singularQuietLMR;
Piece movedPiece;
assert(0 <= ss->ply && ss->ply < MAX_PLY);
(ss+1)->ply = ss->ply + 1;
+ (ss+1)->ttPv = false;
(ss+1)->excludedMove = bestMove = MOVE_NONE;
(ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE;
Square prevSq = to_sq((ss-1)->currentMove);
// position key in case of an excluded move.
excludedMove = ss->excludedMove;
posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove);
- tte = TT.probe(posKey, ttHit);
- ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+ tte = TT.probe(posKey, ss->ttHit);
+ ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
- : ttHit ? tte->move() : MOVE_NONE;
- ttPv = PvNode || (ttHit && tte->is_pv());
- formerPv = ttPv && !PvNode;
+ : ss->ttHit ? tte->move() : MOVE_NONE;
+ if (!excludedMove)
+ ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
+ formerPv = ss->ttPv && !PvNode;
- if ( ttPv
+ if ( ss->ttPv
&& depth > 12
&& ss->ply - 1 < MAX_LPH
&& !priorCapture
// thisThread->ttHitAverage can be used to approximate the running average of ttHit
thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow
- + TtHitAverageResolution * ttHit;
+ + TtHitAverageResolution * ss->ttHit;
// At non-PV nodes we check for an early TT cutoff
if ( !PvNode
- && ttHit
+ && ss->ttHit
&& tte->depth() >= depth
&& ttValue != VALUE_NONE // Possible in case of TT access race
&& (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
if ( b == BOUND_EXACT
|| (b == BOUND_LOWER ? value >= beta : value <= alpha))
{
- tte->save(posKey, value_to_tt(value, ss->ply), ttPv, b,
+ tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b,
std::min(MAX_PLY - 1, depth + 6),
MOVE_NONE, VALUE_NONE);
improving = false;
goto moves_loop;
}
- else if (ttHit)
+ else if (ss->ttHit)
{
// Never assume anything about values stored in TT
ss->staticEval = eval = tte->eval();
else
{
if ((ss-1)->currentMove != MOVE_NULL)
- {
- int bonus = -(ss-1)->statScore / 512;
-
- ss->staticEval = eval = evaluate(pos) + bonus;
- }
+ ss->staticEval = eval = evaluate(pos);
else
ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
- tte->save(posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
+ tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
}
// Step 7. Razoring (~1 Elo)
&& eval <= alpha - RazorMargin)
return qsearch<NT>(pos, ss, alpha, beta);
- improving = (ss-2)->staticEval == VALUE_NONE ? (ss->staticEval > (ss-4)->staticEval
- || (ss-4)->staticEval == VALUE_NONE) : ss->staticEval > (ss-2)->staticEval;
+ improving = (ss-2)->staticEval == VALUE_NONE
+ ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE
+ : ss->staticEval > (ss-2)->staticEval;
// Step 8. Futility pruning: child node (~50 Elo)
if ( !PvNode
// Step 9. Null move search with verification search (~40 Elo)
if ( !PvNode
&& (ss-1)->currentMove != MOVE_NULL
- && (ss-1)->statScore < 23824
+ && (ss-1)->statScore < 22977
&& eval >= beta
&& eval >= ss->staticEval
- && ss->staticEval >= beta - 33 * depth - 33 * improving + 112 * ttPv + 311
+ && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182
&& !excludedMove
&& pos.non_pawn_material(us)
&& (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
assert(eval - beta >= 0);
// Null move dynamic reduction based on depth and value
- Depth R = (737 + 77 * depth) / 246 + std::min(int(eval - beta) / 192, 3);
+ Depth R = (817 + 71 * depth) / 213 + std::min(int(eval - beta) / 192, 3);
ss->currentMove = MOVE_NULL;
ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
}
}
- probcutBeta = beta + 176 - 49 * improving;
+ probCutBeta = beta + 176 - 49 * improving;
// Step 10. ProbCut (~10 Elo)
// If we have a good enough capture and a reduced search returns a value
if ( !PvNode
&& depth > 4
&& abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
- && !( ttHit
+ // if value from transposition table is lower than probCutBeta, don't attempt probCut
+ // there and in further interactions with transposition table cutoff depth is set to depth - 3
+ // because probCut search has depth set to depth - 4 but we also do a move before it
+ // so effective depth is equal to depth - 3
+ && !( ss->ttHit
&& tte->depth() >= depth - 3
&& ttValue != VALUE_NONE
- && ttValue < probcutBeta))
+ && ttValue < probCutBeta))
{
- if ( ttHit
+ // if ttMove is a capture and value from transposition table is good enough produce probCut
+ // cutoff without digging into actual probCut search
+ if ( ss->ttHit
&& tte->depth() >= depth - 3
&& ttValue != VALUE_NONE
- && ttValue >= probcutBeta
+ && ttValue >= probCutBeta
&& ttMove
&& pos.capture_or_promotion(ttMove))
- return probcutBeta;
+ return probCutBeta;
- assert(probcutBeta < VALUE_INFINITE);
- MovePicker mp(pos, ttMove, probcutBeta - ss->staticEval, &captureHistory);
+ assert(probCutBeta < VALUE_INFINITE);
+ MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
int probCutCount = 0;
+ bool ttPv = ss->ttPv;
+ ss->ttPv = false;
while ( (move = mp.next_move()) != MOVE_NONE
&& probCutCount < 2 + 2 * cutNode)
pos.do_move(move, st);
// Perform a preliminary qsearch to verify that the move holds
- value = -qsearch<NonPV>(pos, ss+1, -probcutBeta, -probcutBeta+1);
+ value = -qsearch<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1);
// If the qsearch held, perform the regular search
- if (value >= probcutBeta)
- value = -search<NonPV>(pos, ss+1, -probcutBeta, -probcutBeta+1, depth - 4, !cutNode);
+ if (value >= probCutBeta)
+ value = -search<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode);
pos.undo_move(move);
- if (value >= probcutBeta)
+ if (value >= probCutBeta)
{
- if ( !(ttHit
+ // if transposition table doesn't have equal or more deep info write probCut data into it
+ if ( !(ss->ttHit
&& tte->depth() >= depth - 3
&& ttValue != VALUE_NONE))
tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
return value;
}
}
+ ss->ttPv = ttPv;
}
- // Step 11. Internal iterative deepening (~1 Elo)
- if (depth >= 7 && !ttMove)
- {
- search<NT>(pos, ss, alpha, beta, depth - 7, cutNode);
-
- tte = TT.probe(posKey, ttHit);
- ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
- ttMove = ttHit ? tte->move() : MOVE_NONE;
- }
+ // Step 11. If the position is not in TT, decrease depth by 2
+ if ( PvNode
+ && depth >= 6
+ && !ttMove)
+ depth -= 2;
moves_loop: // When in check, search starts from here
continue;
// Futility pruning: parent node (~5 Elo)
- if ( lmrDepth < 8
+ if ( lmrDepth < 7
&& !ss->inCheck
- && ss->staticEval + 284 + 188 * lmrDepth <= alpha
+ && ss->staticEval + 283 + 170 * lmrDepth <= alpha
&& (*contHist[0])[movedPiece][to_sq(move)]
+ (*contHist[1])[movedPiece][to_sq(move)]
+ (*contHist[3])[movedPiece][to_sq(move)]
- + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 28388)
+ + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 27376)
continue;
// Prune moves with negative SEE (~20 Elo)
- if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 17)) * lmrDepth * lmrDepth)))
+ if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
continue;
}
else
&& !(PvNode && abs(bestValue) < 2)
&& PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
&& !ss->inCheck
- && ss->staticEval + 267 + 391 * lmrDepth
+ && ss->staticEval + 169 + 244 * lmrDepth
+ PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
continue;
// See based pruning
- if (!pos.see_ge(move, Value(-202) * depth)) // (~25 Elo)
+ if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo)
continue;
}
}
// then that move is singular and should be extended. To verify this we do
// a reduced search on all the other moves but the ttMove and if the
// result is lower than ttValue minus a margin, then we will extend the ttMove.
- if ( depth >= 6
+ if ( depth >= 7
&& move == ttMove
&& !rootNode
&& !excludedMove // Avoid recursive singular search
/* && ttValue != VALUE_NONE Already implicit in the next condition */
&& abs(ttValue) < VALUE_KNOWN_WIN
&& (tte->bound() & BOUND_LOWER)
- && tte->depth() >= depth - 3
- && pos.legal(move))
+ && tte->depth() >= depth - 3)
{
Value singularBeta = ttValue - ((formerPv + 4) * depth) / 2;
Depth singularDepth = (depth - 1 + 3 * formerPv) / 2;
&& (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move)))
extension = 1;
- // Passed pawn extension
- else if ( move == ss->killers[0]
- && pos.advanced_pawn_push(move)
- && pos.pawn_passed(us, to_sq(move)))
- extension = 1;
-
// Last captures extension
else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg
&& pos.non_pawn_material() <= 2 * RookValueMg)
extension = 1;
// Castling extension
- if (type_of(move) == CASTLING)
+ if ( type_of(move) == CASTLING
+ && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2)
extension = 1;
// Late irreversible move extension
// Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
// re-searched at full depth.
if ( depth >= 3
- && moveCount > 1 + 2 * rootNode
- && (!rootNode || thisThread->best_move_count(move) == 0)
+ && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
&& ( !captureOrPromotion
|| moveCountPruning
|| ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
|| cutNode
- || thisThread->ttHitAverage < 415 * TtHitAverageResolution * TtHitAverageWindow / 1024))
+ || thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024))
{
Depth r = reduction(improving, depth, moveCount);
- // Decrease reduction at non-check cut nodes for second move at low depths
- if ( cutNode
- && depth <= 10
- && moveCount <= 2
- && !ss->inCheck)
- r--;
-
// Decrease reduction if the ttHit running average is large
- if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024)
+ if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
r--;
// Reduction if other threads are searching this position
r++;
// Decrease reduction if position is or has been on the PV (~10 Elo)
- if (ttPv)
+ if (ss->ttPv)
r -= 2;
if (moveCountPruning && !formerPv)
// Decrease reduction if ttMove has been singularly extended (~3 Elo)
if (singularQuietLMR)
- r -= 1 + formerPv;
+ r--;
if (!captureOrPromotion)
{
// hence break make_move(). (~2 Elo)
else if ( type_of(move) == NORMAL
&& !pos.see_ge(reverse_move(move)))
- r -= 2 + ttPv - (type_of(movedPiece) == PAWN);
+ r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN);
ss->statScore = thisThread->mainHistory[us][from_to(move)]
+ (*contHist[0])[movedPiece][to_sq(move)]
+ (*contHist[1])[movedPiece][to_sq(move)]
+ (*contHist[3])[movedPiece][to_sq(move)]
- - 4826;
+ - 5287;
// Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
- if (ss->statScore >= -100 && (ss-1)->statScore < -112)
+ if (ss->statScore >= -106 && (ss-1)->statScore < -104)
r--;
- else if ((ss-1)->statScore >= -125 && ss->statScore < -138)
+ else if ((ss-1)->statScore >= -119 && ss->statScore < -140)
r++;
// Decrease/increase reduction for moves with a good/bad history (~30 Elo)
- r -= ss->statScore / 14615;
+ r -= ss->statScore / 14884;
}
else
{
// Unless giving check, this capture is likely bad
if ( !givesCheck
- && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 211 * depth <= alpha)
+ && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
r++;
}
- Depth d = Utility::clamp(newDepth - r, 1, newDepth);
+ Depth d = std::clamp(newDepth - r, 1, newDepth);
value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, d, true);
if (PvNode)
bestValue = std::min(bestValue, maxValue);
+ // If no good move is found and the previous position was ttPv, then the previous
+ // opponent move is probably good and the new position is added to the search tree.
+ if (bestValue <= alpha)
+ ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3);
+ // Otherwise, a counter move has been found and if the position is the last leaf
+ // in the search tree, remove the position from the search tree.
+ else if (depth > 3)
+ ss->ttPv = ss->ttPv && (ss+1)->ttPv;
+
if (!excludedMove && !(rootNode && thisThread->pvIdx))
- tte->save(posKey, value_to_tt(bestValue, ss->ply), ttPv,
+ tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv,
bestValue >= beta ? BOUND_LOWER :
PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER,
depth, bestMove, ss->staticEval);
Move ttMove, move, bestMove;
Depth ttDepth;
Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha;
- bool ttHit, pvHit, givesCheck, captureOrPromotion;
+ bool pvHit, givesCheck, captureOrPromotion;
int moveCount;
if (PvNode)
: DEPTH_QS_NO_CHECKS;
// Transposition table lookup
posKey = pos.key();
- tte = TT.probe(posKey, ttHit);
- ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
- ttMove = ttHit ? tte->move() : MOVE_NONE;
- pvHit = ttHit && tte->is_pv();
+ tte = TT.probe(posKey, ss->ttHit);
+ ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+ ttMove = ss->ttHit ? tte->move() : MOVE_NONE;
+ pvHit = ss->ttHit && tte->is_pv();
if ( !PvNode
- && ttHit
+ && ss->ttHit
&& tte->depth() >= ttDepth
&& ttValue != VALUE_NONE // Only in case of TT access race
&& (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
}
else
{
- if (ttHit)
+ if (ss->ttHit)
{
// Never assume anything about values stored in TT
if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE)
// Stand pat. Return immediately if static value is at least beta
if (bestValue >= beta)
{
- if (!ttHit)
+ if (!ss->ttHit)
tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER,
DEPTH_NONE, MOVE_NONE, ss->staticEval);
if (PvNode && bestValue > alpha)
alpha = bestValue;
- futilityBase = bestValue + 141;
+ futilityBase = bestValue + 145;
}
const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
{
assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push
+ // moveCount pruning
+ if (moveCount > 2)
+ continue;
+
futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))];
if (futilityValue <= alpha)
}
// Do not search moves with negative SEE values
- if ( !ss->inCheck && !pos.see_ge(move))
+ if ( !ss->inCheck
+ && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move))
+ && !pos.see_ge(move))
continue;
// Speculative prefetch as early as possible
[pos.moved_piece(move)]
[to_sq(move)];
+ if ( !captureOrPromotion
+ && moveCount
+ && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
+ && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
+ continue;
+
// Make and search the move
pos.do_move(move, st, givesCheck);
value = -qsearch<NT>(pos, ss+1, -beta, -alpha, depth - 1);
else
captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1;
- // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted
- if ( ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0]))
+ // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted
+ if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0]))
&& !pos.captured_piece())
update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1);
}
if (depth > 11 && ss->ply < MAX_LPH)
- thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 6);
+ thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
}
// When playing with strength handicap, choose best move among a set of RootMoves
{
bool updated = rootMoves[i].score != -VALUE_INFINITE;
- if (depth == 1 && !updated)
+ if (depth == 1 && !updated && i > 0)
continue;
- Depth d = updated ? depth : depth - 1;
+ Depth d = updated ? depth : std::max(1, depth - 1);
Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore;
+ if (v == -VALUE_INFINITE)
+ v = VALUE_ZERO;
+
bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
v = tb ? rootMoves[i].tbScore : v;
if (RootInTB)
{
// Sort moves according to TB rank
- std::sort(rootMoves.begin(), rootMoves.end(),
+ std::stable_sort(rootMoves.begin(), rootMoves.end(),
[](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } );
// Probe during search only if DTZ is not available and we are winning
int statScore;
int moveCount;
bool inCheck;
+ bool ttPv;
+ bool ttHit;
};
*mapping = statbuf.st_size;
*baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
+#if defined(MADV_RANDOM)
madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
+#endif
::close(fd);
if (*baseAddress == MAP_FAILED)
if (entry->hasPawns) {
idx = LeadPawnIdx[leadPawnsCnt][squares[0]];
- std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
+ std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
for (int i = 1; i < leadPawnsCnt; ++i)
idx += Binomial[i][MapPawns[squares[i]]];
while (d->groupLen[++next])
{
- std::sort(groupSq, groupSq + d->groupLen[next]);
+ std::stable_sort(groupSq, groupSq + d->groupLen[next]);
uint64_t n = 0;
// Map down a square if "comes later" than a square in the previous
}
-/// Thread::bestMoveCount(Move move) return best move counter for the given root move
-
-int Thread::best_move_count(Move move) const {
-
- auto rm = std::find(rootMoves.begin() + pvIdx,
- rootMoves.begin() + pvLast, move);
-
- return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0;
-}
-
-
/// Thread::clear() reset histories, usually before a new game
void Thread::clear() {
// We use Position::set() to set root position across threads. But there are
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
- // be deduced from a fen string, so set() clears them and to not lose the info
- // we need to backup and later restore setupStates->back(). Note that setupStates
- // is shared by threads but is accessed in read-only mode.
- StateInfo tmp = setupStates->back();
-
+ // be deduced from a fen string, so set() clears them and they are set from
+ // setupStates->back() later. The rootState is per thread, earlier states are shared
+ // since they are read-only.
for (Thread* th : *this)
{
th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
th->rootDepth = th->completedDepth = 0;
th->rootMoves = rootMoves;
- th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th);
+ th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
+ th->rootState = setupStates->back();
}
- setupStates->back() = tmp;
-
main()->start_searching();
}
void idle_loop();
void start_searching();
void wait_for_search_finished();
- int best_move_count(Move move) const;
Pawns::Table pawnsTable;
Material::Table materialTable;
std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
Position rootPos;
+ StateInfo rootState;
Search::RootMoves rootMoves;
Depth rootDepth, completedDepth;
CounterMoveHistory counterMoves;
/// The implementation calls pthread_create() with the stack size parameter
/// equal to the linux 8MB default, on platforms that support it.
-#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__)
+#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS)
#include <pthread.h>
TimePoint slowMover = TimePoint(Options["Slow Mover"]);
TimePoint npmsec = TimePoint(Options["nodestime"]);
- // opt_scale is a percentage of available time to use for the current move.
- // max_scale is a multiplier applied to optimumTime.
- double opt_scale, max_scale;
+ // optScale is a percentage of available time to use for the current move.
+ // maxScale is a multiplier applied to optimumTime.
+ double optScale, maxScale;
// If we have to play in 'nodes as time' mode, then convert from time
// to nodes, and use resulting values in time management formulas.
// game time for the current move, so also cap to 20% of available game time.
if (limits.movestogo == 0)
{
- opt_scale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0,
+ optScale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0,
0.2 * limits.time[us] / double(timeLeft));
- max_scale = std::min(7.0, 4.0 + ply / 12.0);
+ maxScale = std::min(7.0, 4.0 + ply / 12.0);
}
// x moves in y seconds (+ z increment)
else
{
- opt_scale = std::min((0.8 + ply / 128.0) / mtg,
+ optScale = std::min((0.8 + ply / 128.0) / mtg,
0.8 * limits.time[us] / double(timeLeft));
- max_scale = std::min(6.3, 1.5 + 0.11 * mtg);
+ maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
}
// Never use more than 80% of the available time for this move
- optimumTime = TimePoint(opt_scale * timeLeft);
- maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, max_scale * optimumTime));
+ optimumTime = TimePoint(optScale * timeLeft);
+ maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime));
if (Options["Ponder"])
optimumTime += optimumTime / 4;
if (m || (uint16_t)k != key16)
move16 = (uint16_t)m;
- // Overwrite less valuable entries
- if ((uint16_t)k != key16
- || d - DEPTH_OFFSET > depth8 - 4
- || b == BOUND_EXACT)
+ // Overwrite less valuable entries (cheapest checks first)
+ if (b == BOUND_EXACT
+ || (uint16_t)k != key16
+ || d - DEPTH_OFFSET > depth8 - 4)
{
- assert(d >= DEPTH_OFFSET);
+ assert(d > DEPTH_OFFSET);
+ assert(d < 256 + DEPTH_OFFSET);
key16 = (uint16_t)k;
+ depth8 = (uint8_t)(d - DEPTH_OFFSET);
+ genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
value16 = (int16_t)v;
eval16 = (int16_t)ev;
- genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
- depth8 = (uint8_t)(d - DEPTH_OFFSET);
}
}
const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster
for (int i = 0; i < ClusterSize; ++i)
- if (!tte[i].key16 || tte[i].key16 == key16)
+ if (tte[i].key16 == key16 || !tte[i].depth8)
{
tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh
- return found = (bool)tte[i].key16, &tte[i];
+ return found = (bool)tte[i].depth8, &tte[i];
}
// Find an entry to be replaced according to the replacement strategy
int cnt = 0;
for (int i = 0; i < 1000; ++i)
for (int j = 0; j < ClusterSize; ++j)
- cnt += (table[i].entry[j].genBound8 & 0xF8) == generation8;
+ cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & 0xF8) == generation8;
return cnt / ClusterSize;
}
/// TTEntry struct is the 10 bytes transposition table entry, defined as below:
///
/// key 16 bit
-/// move 16 bit
-/// value 16 bit
-/// eval value 16 bit
+/// depth 8 bit
/// generation 5 bit
/// pv node 1 bit
/// bound type 2 bit
-/// depth 8 bit
+/// move 16 bit
+/// value 16 bit
+/// eval value 16 bit
struct TTEntry {
friend class TranspositionTable;
uint16_t key16;
+ uint8_t depth8;
+ uint8_t genBound8;
uint16_t move16;
int16_t value16;
int16_t eval16;
- uint8_t genBound8;
- uint8_t depth8;
};
PIECE_NB = 16
};
-// An ID used to track the pieces. Max. 32 pieces on board.
-enum PieceId {
- PIECE_ID_ZERO = 0,
- PIECE_ID_KING = 30,
- PIECE_ID_WKING = 30,
- PIECE_ID_BKING = 31,
- PIECE_ID_NONE = 32
-};
-
-inline PieceId operator++(PieceId& d, int) {
-
- PieceId x = d;
- d = PieceId(int(d) + 1);
- return x;
-}
-
constexpr Value PieceValue[PHASE_NB][PIECE_NB] = {
{ VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO,
VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO },
DEPTH_QS_RECAPTURES = -5,
DEPTH_NONE = -6,
- DEPTH_OFFSET = DEPTH_NONE
+
+ DEPTH_OFFSET = -7 // value used only for TT entry occupancy check
};
enum Square : int {
RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB
};
-// unique number for each piece type on each square
-enum PieceSquare : uint32_t {
- PS_NONE = 0,
- PS_W_PAWN = 1,
- PS_B_PAWN = 1 * SQUARE_NB + 1,
- PS_W_KNIGHT = 2 * SQUARE_NB + 1,
- PS_B_KNIGHT = 3 * SQUARE_NB + 1,
- PS_W_BISHOP = 4 * SQUARE_NB + 1,
- PS_B_BISHOP = 5 * SQUARE_NB + 1,
- PS_W_ROOK = 6 * SQUARE_NB + 1,
- PS_B_ROOK = 7 * SQUARE_NB + 1,
- PS_W_QUEEN = 8 * SQUARE_NB + 1,
- PS_B_QUEEN = 9 * SQUARE_NB + 1,
- PS_W_KING = 10 * SQUARE_NB + 1,
- PS_END = PS_W_KING, // pieces without kings (pawns included)
- PS_B_KING = 11 * SQUARE_NB + 1,
- PS_END2 = 12 * SQUARE_NB + 1
-};
-
-struct ExtPieceSquare {
- PieceSquare from[COLOR_NB];
-};
-
-// Array for finding the PieceSquare corresponding to the piece on the board
-extern ExtPieceSquare kpp_board_index[PIECE_NB];
-
-constexpr bool is_ok(PieceId pid);
-constexpr Square rotate180(Square sq);
-
-// Structure holding which tracked piece (PieceId) is where (PieceSquare)
-class EvalList {
-
-public:
- // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2
- static const int MAX_LENGTH = 32;
-
- // Array that holds the piece id for the pieces on the board
- PieceId piece_id_list[SQUARE_NB];
-
- // List of pieces, separate from White and Black POV
- PieceSquare* piece_list_fw() const { return const_cast<PieceSquare*>(pieceListFw); }
- PieceSquare* piece_list_fb() const { return const_cast<PieceSquare*>(pieceListFb); }
-
- // Place the piece pc with piece_id on the square sq on the board
- void put_piece(PieceId piece_id, Square sq, Piece pc)
- {
- assert(is_ok(piece_id));
- if (pc != NO_PIECE)
- {
- pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq);
- pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq));
- piece_id_list[sq] = piece_id;
- }
- else
- {
- pieceListFw[piece_id] = PS_NONE;
- pieceListFb[piece_id] = PS_NONE;
- piece_id_list[sq] = piece_id;
- }
- }
-
- // Convert the specified piece_id piece to ExtPieceSquare type and return it
- ExtPieceSquare piece_with_id(PieceId piece_id) const
- {
- ExtPieceSquare eps;
- eps.from[WHITE] = pieceListFw[piece_id];
- eps.from[BLACK] = pieceListFb[piece_id];
- return eps;
- }
-
-private:
- PieceSquare pieceListFw[MAX_LENGTH];
- PieceSquare pieceListFb[MAX_LENGTH];
-};
-
-// For differential evaluation of pieces that changed since last turn
+// Keep track of what a move changes on the board (used by NNUE)
struct DirtyPiece {
// Number of changed pieces
int dirty_num;
- // The ids of changed pieces, max. 2 pieces can change in one move
- PieceId pieceId[2];
+ // Max 3 pieces can change in one move. A promotion with capture moves
+ // both the pawn and the captured piece to SQ_NONE and the piece promoted
+ // to from SQ_NONE to the capture square.
+ Piece piece[3];
- // What changed from the piece with that piece number
- ExtPieceSquare old_piece[2];
- ExtPieceSquare new_piece[2];
+ // From and to squares, which may be SQ_NONE
+ Square from[3];
+ Square to[3];
};
/// Score enum stores a middlegame and an endgame value in a single integer (enum).
ENABLE_FULL_OPERATORS_ON(Direction)
ENABLE_INCR_OPERATORS_ON(Piece)
-ENABLE_INCR_OPERATORS_ON(PieceSquare)
-ENABLE_INCR_OPERATORS_ON(PieceId)
ENABLE_INCR_OPERATORS_ON(PieceType)
ENABLE_INCR_OPERATORS_ON(Square)
ENABLE_INCR_OPERATORS_ON(File)
return Color(pc >> 3);
}
-constexpr bool is_ok(PieceId pid) {
- return pid < PIECE_ID_NONE;
-}
-
constexpr bool is_ok(Square s) {
return s >= SQ_A1 && s <= SQ_H8;
}
return Square(m & 0x3F);
}
-// Return relative square when turning the board 180 degrees
-constexpr Square rotate180(Square sq) {
- return (Square)(sq ^ 0x3F);
-}
-
constexpr int from_to(Move m) {
return m & 0xFFF;
}
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
// Transform eval to centipawns with limited range
- double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
+ double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
// Return win rate in per mille (rounded to nearest)
return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
#include <ostream>
#include <sstream>
+#include "evaluate.h"
#include "misc.h"
#include "search.h"
#include "thread.h"
o["SyzygyProbeDepth"] << Option(1, 1, 100);
o["Syzygy50MoveRule"] << Option(true);
o["SyzygyProbeLimit"] << Option(7, 0, 7);
- o["Use NNUE"] << Option(false, on_use_NNUE);
- o["EvalFile"] << Option("nn-9931db908a9b.nnue", on_eval_file);
+ o["Use NNUE"] << Option(true, on_use_NNUE);
+ o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file);
o["RPCServerAddress"] << Option("<empty>", on_rpc_server_address);
}
"go depth 10" \
"go movetime 1000" \
"go wtime 8000 btime 8000 winc 500 binc 500" \
- "bench 128 $threads 10 default depth"
+ "bench 128 $threads 8 default depth"
do
echo "$prefix $exeprefix ./stockfish $args $postfix"
# more general testing, following an uci protocol exchange
cat << EOF > game.exp
- set timeout 10
+ set timeout 240
spawn $exeprefix ./stockfish
send "uci\n"
expect "bestmove"
send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
- send "go depth 30\n"
+ send "go depth 20\n"
expect "bestmove"
send "quit\n"
send "uci\n"
send "setoption name SyzygyPath value ../tests/syzygy/\n"
expect "info string Found 35 tablebases" {} timeout {exit 1}
- send "bench 128 1 10 default depth\n"
+ send "bench 128 1 8 default depth\n"
send "quit\n"
expect eof