From ba15781be8f80746b6daed6588191297f38737fa Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Mon, 9 Jan 2017 10:30:57 +0100 Subject: [PATCH] New shell scripts for testing, used for travis CI (#957) Perform more complex verification and validation. - signature.sh : extract and optionally compare Bench/Signature/Node count. - perft.sh : verify perft counts for a number of positions. - instrumented.sh : run a few commands or uci sequences through valgrind/sanitizer instrumented binaries. - reprosearch.sh : verify reproducibility of search. These script can be used from directly from the command line in the src directory. Update travis script to use these shell scripts. No functional change. --- .travis.yml | 43 ++++++++++++++--------- tests/instrumented.sh | 82 +++++++++++++++++++++++++++++++++++++++++++ tests/perft.sh | 32 +++++++++++++++++ tests/reprosearch.sh | 61 ++++++++++++++++++++++++++++++++ tests/signature.sh | 26 ++++++++++++++ 5 files changed, 227 insertions(+), 17 deletions(-) create mode 100755 tests/instrumented.sh create mode 100755 tests/perft.sh create mode 100755 tests/reprosearch.sh create mode 100755 tests/signature.sh diff --git a/.travis.yml b/.travis.yml index 64cc2b63..a0673b85 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,20 +44,29 @@ before_script: - cd src script: - - make clean && make build ARCH=x86-64 && ./stockfish bench 2>&1 >/dev/null | grep 'Nodes searched' | tee bench1 - - make clean && make build ARCH=x86-32 && ./stockfish bench 2>&1 >/dev/null | grep 'Nodes searched' | tee bench2 - - echo "Checking for same bench numbers..." - - diff bench1 bench2 > result - - test ! -s result - # verify perft numbers (positions from https://chessprogramming.wikispaces.com/Perft+Results) - - printf ' set timeout 10\n lassign $argv pos depth result\n spawn ./stockfish\n send "position $pos\\n perft $depth\\n"\n expect "Nodes searched ? $result" {} timeout {exit 1} \n send "quit\\n"\n expect eof\n' > perft.exp - - expect perft.exp startpos 5 4865609 > /dev/null - - expect perft.exp "fen r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq -" 5 193690690 > /dev/null - - expect perft.exp "fen 8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - -" 6 11030083 > /dev/null - - expect perft.exp "fen r3k2r/Pppp1ppp/1b3nbN/nP6/BBP1P3/q4N2/Pp1P2PP/R2Q1RK1 w kq - 0 1" 5 15833292 > /dev/null - - expect perft.exp "fen rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8" 5 89941194 > /dev/null - - expect perft.exp "fen r4rk1/1pp1qppp/p1np1n2/2b1p1B1/2B1P1b1/P1NP1N2/1PP1QPPP/R4RK1 w - - 0 10" 5 164075551 > /dev/null - # if valgrind is available check the build is without error, reduce depth to speedup testing, but not too shallow to catch more cases. - - if [ -x "$(command -v valgrind )" ] ; then make clean && make ARCH=x86-64 debug=yes build && valgrind --error-exitcode=42 ./stockfish bench 128 1 10 default depth 1>/dev/null ; fi - # use g++-6 as a proxy for having sanitizers ... might need revision as they become available for more recent versions of clang/gcc than trusty provides - - if [[ "$COMPILER" == "g++-6" ]]; then make clean && make ARCH=x86-64 sanitize=yes build && ! ./stockfish bench 2>&1 | grep "runtime error:" ; fi + # + # checking bench for various build types + # + # obtain reference + - make clean && make ARCH=x86-64 optimize=no debug=yes build > /dev/null && export benchref=$(../tests/signature.sh) + - echo "Reference bench:" $benchref + # verify against reference + - make clean && make ARCH=x86-64 build > /dev/null && ../tests/signature.sh $benchref + - make clean && make ARCH=x86-32 build > /dev/null && ../tests/signature.sh $benchref + # + # perft + # + - make clean && make ARCH=x86-64 build > /dev/null && ../tests/perft.sh + # + # reproducible search + # + - make clean && make ARCH=x86-64 build > /dev/null && ../tests/reprosearch.sh + # + # valgrind + # + - if [ -x "$(command -v valgrind )" ]; then make clean && make ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi + # + # sanitizer + # + # use g++-6 as a proxy for having sanitizers, might need revision as they become available for more recent versions of clang/gcc + - if [[ "$COMPILER" == "g++-6" ]]; then make clean && make ARCH=x86-64 sanitize=yes build > /dev/null && ../tests/instrumented.sh --sanitizer; fi diff --git a/tests/instrumented.sh b/tests/instrumented.sh new file mode 100755 index 00000000..a6950e16 --- /dev/null +++ b/tests/instrumented.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# check for errors under valgrind or sanitizers. + +error() +{ + echo "instrumented testing failed on line $1" + exit 1 +} +trap 'error ${LINENO}' ERR + +# define suitable post and prefixes for testing options +case $1 in + --valgrind) + echo "valgrind testing started" + prefix='' + exeprefix='valgrind --error-exitcode=42' + postfix='1>/dev/null' + ;; + --sanitizer) + echo "sanitizer testing started" + prefix='!' + exeprefix='' + postfix='2>&1 | grep "runtime error:"' + ;; + *) + echo "unknown testing started" + prefix='' + exeprefix='' + postfix='' + ;; +esac + +# simple command line testing +for args in "eval" \ + "go nodes 1000" \ + "go depth 10" \ + "go movetime 1000" \ + "go wtime 8000 btime 8000 winc 500 binc 500" \ + "bench 128 1 10 default depth" +do + + echo "$prefix $exeprefix ./stockfish $args $postfix" + eval "$prefix $exeprefix ./stockfish $args $postfix" + +done + +# more general testing, following an uci protocol exchange +cat << EOF > game.exp + set timeout 10 + spawn $exeprefix ./stockfish + + send "uci\n" + expect "uciok" + + send "ucinewgame\n" + send "position startpos\n" + send "go nodes 1000\n" + expect "bestmove" + + send "position startpos moves e2e4 e7e6\n" + send "go nodes 1000\n" + expect "bestmove" + + send "quit\n" + expect eof + + # return error code of the spawned program, useful for valgrind + lassign [wait] pid spawnid os_error_flag value + exit \$value +EOF + +for exps in game.exp +do + + echo "$prefix expect game.exp $postfix" + eval "$prefix expect game.exp $postfix" + +done + +rm game.exp + +echo "instrumented testing OK" diff --git a/tests/perft.sh b/tests/perft.sh new file mode 100755 index 00000000..f3d5b88b --- /dev/null +++ b/tests/perft.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# verify perft numbers (positions from https://chessprogramming.wikispaces.com/Perft+Results) + +error() +{ + echo "perft testing failed on line $1" + exit 1 +} +trap 'error ${LINENO}' ERR + +echo "perft testing started" + +cat << EOF > perft.exp + set timeout 10 + lassign \$argv pos depth result + spawn ./stockfish + send "position \$pos\\n perft \$depth\\n" + expect "Nodes searched ? \$result" {} timeout {exit 1} + send "quit\\n" + expect eof +EOF + +expect perft.exp startpos 5 4865609 > /dev/null +expect perft.exp "fen r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq -" 5 193690690 > /dev/null +expect perft.exp "fen 8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - -" 6 11030083 > /dev/null +expect perft.exp "fen r3k2r/Pppp1ppp/1b3nbN/nP6/BBP1P3/q4N2/Pp1P2PP/R2Q1RK1 w kq - 0 1" 5 15833292 > /dev/null +expect perft.exp "fen rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8" 5 89941194 > /dev/null +expect perft.exp "fen r4rk1/1pp1qppp/p1np1n2/2b1p1B1/2B1P1b1/P1NP1N2/1PP1QPPP/R4RK1 w - - 0 10" 5 164075551 > /dev/null + +rm perft.exp + +echo "perft testing OK" diff --git a/tests/reprosearch.sh b/tests/reprosearch.sh new file mode 100755 index 00000000..9fd847ff --- /dev/null +++ b/tests/reprosearch.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# verify reproducible search + +error() +{ + echo "reprosearch testing failed on line $1" + exit 1 +} +trap 'error ${LINENO}' ERR + +echo "reprosearch testing started" + +# repeat two short games, separated by ucinewgame. +# with go nodes $nodes they should result in exactly +# the same node count for each iteration. +cat << EOF > repeat.exp + set timeout 10 + spawn ./stockfish + lassign \$argv nodes + + send "uci\n" + expect "uciok" + + send "ucinewgame\n" + send "position startpos\n" + send "go nodes \$nodes\n" + expect "bestmove" + + send "position startpos moves e2e4 e7e6\n" + send "go nodes \$nodes\n" + expect "bestmove" + + send "ucinewgame\n" + send "position startpos\n" + send "go nodes \$nodes\n" + expect "bestmove" + + send "position startpos moves e2e4 e7e6\n" + send "go nodes \$nodes\n" + expect "bestmove" + + send "quit\n" + expect eof +EOF + +# to increase the likelyhood of finding a non-reproducible case, +# the allowed number of nodes are varied systematically +for i in `seq 1 20` +do + + nodes=$((100*3**i/2**i)) + echo "reprosearch testing with $nodes nodes" + + # each line should appear exactly an even number of times + expect repeat.exp $nodes 2>&1 | grep -o "nodes [0-9]*" | sort | uniq -c | awk '{if ($1%2!=0) exit(1)}' + +done + +rm repeat.exp + +echo "reprosearch testing OK" diff --git a/tests/signature.sh b/tests/signature.sh new file mode 100755 index 00000000..2eb52c89 --- /dev/null +++ b/tests/signature.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# obtain and optionally verify Bench / signature +# if no reference is given, the output is deliberately limited to just the signature + +error() +{ + echo "running bench for signature failed on line $1" + exit 1 +} +trap 'error ${LINENO}' ERR + +# obtain + +signature=`./stockfish bench 2>&1 | grep "Nodes searched : " | awk '{print $4}'` + +if [ $# -gt 0 ]; then + # compare to given reference + if [ "$1" != "$signature" ]; then + echo "signature mismatch: reference $1 obtained $signature" + else + echo "signature OK: $signature" + fi +else + # just report signature + echo $signature +fi -- 2.39.2