From ba15781be8f80746b6daed6588191297f38737fa Mon Sep 17 00:00:00 2001
From: Joost VandeVondele <Joost.VandeVondele@gmail.com>
Date: Mon, 9 Jan 2017 10:30:57 +0100
Subject: [PATCH] New shell scripts for testing, used for travis CI (#957)

Perform more complex verification and validation.

- signature.sh : extract and optionally compare Bench/Signature/Node count.
- perft.sh : verify perft counts for a number of positions.
- instrumented.sh : run a few commands or uci sequences through valgrind/sanitizer instrumented binaries.
- reprosearch.sh : verify reproducibility of search.

These script can be used from directly from the command line in the src directory.

Update travis script to use these shell scripts.

No functional change.
---
 .travis.yml           | 43 ++++++++++++++---------
 tests/instrumented.sh | 82 +++++++++++++++++++++++++++++++++++++++++++
 tests/perft.sh        | 32 +++++++++++++++++
 tests/reprosearch.sh  | 61 ++++++++++++++++++++++++++++++++
 tests/signature.sh    | 26 ++++++++++++++
 5 files changed, 227 insertions(+), 17 deletions(-)
 create mode 100755 tests/instrumented.sh
 create mode 100755 tests/perft.sh
 create mode 100755 tests/reprosearch.sh
 create mode 100755 tests/signature.sh

diff --git a/.travis.yml b/.travis.yml
index 64cc2b63..a0673b85 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -44,20 +44,29 @@ before_script:
   - cd src
 
 script:
-  - make clean && make build ARCH=x86-64 && ./stockfish bench 2>&1 >/dev/null | grep 'Nodes searched' | tee bench1
-  - make clean && make build ARCH=x86-32 && ./stockfish bench 2>&1 >/dev/null | grep 'Nodes searched' | tee bench2
-  - echo "Checking for same bench numbers..."
-  - diff bench1 bench2 > result
-  - test ! -s result
-  # verify perft numbers (positions from https://chessprogramming.wikispaces.com/Perft+Results)
-  - printf ' set timeout 10\n lassign $argv pos depth result\n spawn ./stockfish\n send "position $pos\\n perft $depth\\n"\n expect "Nodes searched  ? $result" {} timeout {exit 1} \n send "quit\\n"\n expect eof\n' > perft.exp
-  - expect perft.exp startpos 5 4865609 > /dev/null
-  - expect perft.exp "fen r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq -" 5 193690690 > /dev/null
-  - expect perft.exp "fen 8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - -" 6 11030083 > /dev/null
-  - expect perft.exp "fen r3k2r/Pppp1ppp/1b3nbN/nP6/BBP1P3/q4N2/Pp1P2PP/R2Q1RK1 w kq - 0 1" 5 15833292 > /dev/null
-  - expect perft.exp "fen rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8" 5 89941194 > /dev/null
-  - expect perft.exp "fen r4rk1/1pp1qppp/p1np1n2/2b1p1B1/2B1P1b1/P1NP1N2/1PP1QPPP/R4RK1 w - - 0 10" 5 164075551 > /dev/null
-  # if valgrind is available check the build is without error, reduce depth to speedup testing, but not too shallow to catch more cases.
-  - if [ -x "$(command -v valgrind )" ] ; then make clean && make ARCH=x86-64 debug=yes build && valgrind --error-exitcode=42 ./stockfish bench 128 1 10 default depth 1>/dev/null ; fi
-  # use g++-6 as a proxy for having sanitizers ... might need revision as they become available for more recent versions of clang/gcc than trusty provides
-  - if [[ "$COMPILER" == "g++-6" ]]; then make clean && make ARCH=x86-64 sanitize=yes build && ! ./stockfish bench 2>&1 | grep "runtime error:" ; fi
+  #
+  # checking bench for various build types
+  #
+  # obtain reference
+  - make clean && make ARCH=x86-64 optimize=no debug=yes build > /dev/null && export benchref=$(../tests/signature.sh)
+  - echo "Reference bench:" $benchref
+  # verify against reference
+  - make clean && make ARCH=x86-64 build > /dev/null && ../tests/signature.sh $benchref
+  - make clean && make ARCH=x86-32 build > /dev/null && ../tests/signature.sh $benchref
+  #
+  # perft
+  #
+  - make clean && make ARCH=x86-64 build > /dev/null && ../tests/perft.sh
+  #
+  # reproducible search
+  #
+  - make clean && make ARCH=x86-64 build > /dev/null && ../tests/reprosearch.sh
+  #
+  # valgrind
+  #
+  - if [ -x "$(command -v valgrind )" ]; then make clean && make ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
+  #
+  # sanitizer
+  #
+  # use g++-6 as a proxy for having sanitizers, might need revision as they become available for more recent versions of clang/gcc
+  - if [[ "$COMPILER" == "g++-6" ]]; then make clean && make ARCH=x86-64 sanitize=yes build > /dev/null && ../tests/instrumented.sh --sanitizer; fi
diff --git a/tests/instrumented.sh b/tests/instrumented.sh
new file mode 100755
index 00000000..a6950e16
--- /dev/null
+++ b/tests/instrumented.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# check for errors under valgrind or sanitizers.
+
+error()
+{
+  echo "instrumented testing failed on line $1"
+  exit 1
+}
+trap 'error ${LINENO}' ERR
+
+# define suitable post and prefixes for testing options
+case $1 in
+  --valgrind)
+    echo "valgrind testing started"
+    prefix=''
+    exeprefix='valgrind --error-exitcode=42'
+    postfix='1>/dev/null'
+  ;;
+  --sanitizer)
+    echo "sanitizer testing started"
+    prefix='!'
+    exeprefix=''
+    postfix='2>&1 | grep "runtime error:"'
+  ;;
+  *)
+    echo "unknown testing started"
+    prefix=''
+    exeprefix=''
+    postfix=''
+  ;;
+esac
+
+# simple command line testing
+for args in "eval" \
+            "go nodes 1000" \
+            "go depth 10" \
+            "go movetime 1000" \
+            "go wtime 8000 btime 8000 winc 500 binc 500" \
+            "bench 128 1 10 default depth"
+do
+
+   echo "$prefix $exeprefix ./stockfish $args $postfix"
+   eval "$prefix $exeprefix ./stockfish $args $postfix"
+
+done
+
+# more general testing, following an uci protocol exchange
+cat << EOF > game.exp
+ set timeout 10
+ spawn $exeprefix ./stockfish
+
+ send "uci\n"
+ expect "uciok"
+
+ send "ucinewgame\n"
+ send "position startpos\n"
+ send "go nodes 1000\n"
+ expect "bestmove"
+
+ send "position startpos moves e2e4 e7e6\n"
+ send "go nodes 1000\n"
+ expect "bestmove"
+
+ send "quit\n"
+ expect eof
+
+ # return error code of the spawned program, useful for valgrind
+ lassign [wait] pid spawnid os_error_flag value
+ exit \$value
+EOF
+
+for exps in game.exp
+do
+
+  echo "$prefix expect game.exp $postfix"
+  eval "$prefix expect game.exp $postfix"
+
+done
+
+rm game.exp
+
+echo "instrumented testing OK"
diff --git a/tests/perft.sh b/tests/perft.sh
new file mode 100755
index 00000000..f3d5b88b
--- /dev/null
+++ b/tests/perft.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# verify perft numbers (positions from https://chessprogramming.wikispaces.com/Perft+Results)
+
+error()
+{
+  echo "perft testing failed on line $1"
+  exit 1
+}
+trap 'error ${LINENO}' ERR
+
+echo "perft testing started"
+
+cat << EOF > perft.exp
+   set timeout 10
+   lassign \$argv pos depth result
+   spawn ./stockfish
+   send "position \$pos\\n perft \$depth\\n"
+   expect "Nodes searched  ? \$result" {} timeout {exit 1}
+   send "quit\\n"
+   expect eof
+EOF
+
+expect perft.exp startpos 5 4865609 > /dev/null
+expect perft.exp "fen r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq -" 5 193690690 > /dev/null
+expect perft.exp "fen 8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - -" 6 11030083 > /dev/null
+expect perft.exp "fen r3k2r/Pppp1ppp/1b3nbN/nP6/BBP1P3/q4N2/Pp1P2PP/R2Q1RK1 w kq - 0 1" 5 15833292 > /dev/null
+expect perft.exp "fen rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8" 5 89941194 > /dev/null
+expect perft.exp "fen r4rk1/1pp1qppp/p1np1n2/2b1p1B1/2B1P1b1/P1NP1N2/1PP1QPPP/R4RK1 w - - 0 10" 5 164075551 > /dev/null
+
+rm perft.exp
+
+echo "perft testing OK"
diff --git a/tests/reprosearch.sh b/tests/reprosearch.sh
new file mode 100755
index 00000000..9fd847ff
--- /dev/null
+++ b/tests/reprosearch.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# verify reproducible search
+
+error()
+{
+  echo "reprosearch testing failed on line $1"
+  exit 1
+}
+trap 'error ${LINENO}' ERR
+
+echo "reprosearch testing started"
+
+# repeat two short games, separated by ucinewgame. 
+# with go nodes $nodes they should result in exactly
+# the same node count for each iteration.
+cat << EOF > repeat.exp
+ set timeout 10
+ spawn ./stockfish
+ lassign \$argv nodes
+
+ send "uci\n"
+ expect "uciok"
+
+ send "ucinewgame\n"
+ send "position startpos\n"
+ send "go nodes \$nodes\n"
+ expect "bestmove"
+
+ send "position startpos moves e2e4 e7e6\n"
+ send "go nodes \$nodes\n"
+ expect "bestmove"
+
+ send "ucinewgame\n"
+ send "position startpos\n"
+ send "go nodes \$nodes\n"
+ expect "bestmove"
+
+ send "position startpos moves e2e4 e7e6\n"
+ send "go nodes \$nodes\n"
+ expect "bestmove"
+
+ send "quit\n"
+ expect eof
+EOF
+
+# to increase the likelyhood of finding a non-reproducible case,
+# the allowed number of nodes are varied systematically
+for i in `seq 1 20`
+do
+
+  nodes=$((100*3**i/2**i))
+  echo "reprosearch testing with $nodes nodes"
+
+  # each line should appear exactly an even number of times
+  expect repeat.exp $nodes 2>&1 | grep -o "nodes [0-9]*" | sort | uniq -c | awk '{if ($1%2!=0) exit(1)}'
+
+done
+
+rm repeat.exp
+
+echo "reprosearch testing OK"
diff --git a/tests/signature.sh b/tests/signature.sh
new file mode 100755
index 00000000..2eb52c89
--- /dev/null
+++ b/tests/signature.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# obtain and optionally verify Bench / signature
+# if no reference is given, the output is deliberately limited to just the signature
+
+error()
+{
+  echo "running bench for signature failed on line $1"
+  exit 1
+}
+trap 'error ${LINENO}' ERR
+
+# obtain
+
+signature=`./stockfish bench 2>&1 | grep "Nodes searched  : " | awk '{print $4}'`
+
+if [ $# -gt 0 ]; then
+   # compare to given reference
+   if [ "$1" != "$signature" ]; then
+      echo "signature mismatch: reference $1 obtained $signature"
+   else
+      echo "signature OK: $signature"
+   fi
+else
+   # just report signature
+   echo $signature
+fi
-- 
2.39.2