From e9b5b1e5b285542f44f0c52ee0b470b22ac7381b Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 13 Dec 2014 02:02:46 +0100 Subject: [PATCH] Store and merge the file number information. Still unused in the UI. --- binloader.cpp | 11 +++++++++-- binlookup.cpp | 3 ++- binmerger.cpp | 2 ++ build-book.sh | 7 +++++-- count.proto | 3 ++- opening-stats.pl | 2 +- parallel-parse-pgn.sh | 3 ++- www/opening-stats.pl | 2 +- 8 files changed, 24 insertions(+), 9 deletions(-) diff --git a/binloader.cpp b/binloader.cpp index 5bf2502..8660e8e 100644 --- a/binloader.cpp +++ b/binloader.cpp @@ -25,6 +25,7 @@ struct Element { Result result; int opening_num, white_elo, black_elo; time_t timestamp; + int file_num; long start_position; bool operator< (const Element& other) const { @@ -67,7 +68,7 @@ int main(int argc, char **argv) break; } - int opening_num, white_elo, black_elo; + int opening_num, white_elo, black_elo, file_num; time_t timestamp; long start_position; if (fread(&white_elo, sizeof(white_elo), 1, fp) != 1) { @@ -90,6 +91,11 @@ int main(int argc, char **argv) //exit(1); break; } + if (fread(&file_num, sizeof(file_num), 1, fp) != 1) { + perror("fread()"); + //exit(1); + break; + } if (fread(&start_position, sizeof(start_position), 1, fp) != 1) { perror("fread()"); //exit(1); @@ -110,7 +116,7 @@ int main(int argc, char **argv) } int bucket = hash_key_to_bucket(bpfen.data(), bpfen.size(), num_buckets); - elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp, start_position}); + elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp, file_num, start_position}); ++num_elems; } fclose(fp); @@ -153,6 +159,7 @@ int main(int argc, char **argv) c.set_first_timestamp(e.timestamp); } c.set_opening_num(e.opening_num); + c.set_pgn_file_num(e.file_num); c.set_pgn_start_position(e.start_position); } if (!moves.count(e.move)) { diff --git a/binlookup.cpp b/binlookup.cpp index 179515b..afc446c 100644 --- a/binlookup.cpp +++ b/binlookup.cpp @@ -61,11 +61,12 @@ int main(int argc, char **argv) while (mtbl_iter_next(it, &key, &len_key, &val, &len_val)) { Count c; c.ParseFromArray(val, len_val); - printf("%d %d %d %u %f %f %d %ld %ld", + printf("%d %d %d %u %f %f %d %ld %d %ld", c.white(), c.draw(), c.black(), c.opening_num(), double(c.sum_white_elo()) / c.num_elo(), double(c.sum_black_elo()) / c.num_elo(), c.num_elo(), c.first_timestamp(), + c.pgn_file_num(), c.pgn_start_position()); for (int j = 0; j < c.move_size(); ++j) { printf(" %s", c.move(j).c_str()); diff --git a/binmerger.cpp b/binmerger.cpp index 7e34159..94e4780 100644 --- a/binmerger.cpp +++ b/binmerger.cpp @@ -32,12 +32,14 @@ void merge_count(void* userdata, if (c0.has_first_timestamp()) { c.set_first_timestamp(c0.first_timestamp()); } + c.set_pgn_file_num(c0.pgn_file_num()); c.set_pgn_start_position(c0.pgn_start_position()); } else { c.set_opening_num(c1.opening_num()); if (c1.has_first_timestamp()) { c.set_first_timestamp(c1.first_timestamp()); } + c.set_pgn_file_num(c1.pgn_file_num()); c.set_pgn_start_position(c1.pgn_start_position()); } diff --git a/build-book.sh b/build-book.sh index 7d49663..e6e8c09 100755 --- a/build-book.sh +++ b/build-book.sh @@ -5,12 +5,15 @@ set -e export SHARDS=40 export PARALLEL_LOADS=20 # Reduce if you have problems with OOM -rm -f part-*.bin part-*.mtbl part-*.mtbl.part???? open.mtbl.new open.mtbl.part???? open.mtbl.part????.new 2>/dev/null +rm -f pgnnames.txt part-*.bin part-*.mtbl part-*.mtbl.part???? open.mtbl.new open.mtbl.part???? open.mtbl.part????.new 2>/dev/null +PGNNUM=0 for FILE in "$@"; do date | tr -d "\n" echo " $FILE" - ./parallel-parse-pgn.sh "$FILE" + ./parallel-parse-pgn.sh "$FILE" "$PGNNUM" + echo "$FILE" >> pgnnames.txt + PGNNUM=$(( PGNNUM + 1 )) done date diff --git a/count.proto b/count.proto index 9ee2859..39da3e0 100644 --- a/count.proto +++ b/count.proto @@ -15,8 +15,9 @@ message Count { // First timestamp this position/move was seen, and the byte offset // into the PGN file for that game. optional int64 first_timestamp = 8 [default=32503680000]; + optional int32 pgn_file_num = 9; optional int64 pgn_start_position = 10; // Moves seen from this position. - repeated string move = 9; + repeated string move = 11; }; diff --git a/opening-stats.pl b/opening-stats.pl index a90ca0b..27cdee0 100755 --- a/opening-stats.pl +++ b/opening-stats.pl @@ -21,7 +21,7 @@ my $line = <$chld_out>; print $line; chomp $line; -my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_start_position, @moves) = split / /, $line; +my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_file_number, $pgn_start_position, @moves) = split / /, $line; # Explore one move out. for my $move (@moves) { diff --git a/parallel-parse-pgn.sh b/parallel-parse-pgn.sh index cfb61bc..39d43d3 100755 --- a/parallel-parse-pgn.sh +++ b/parallel-parse-pgn.sh @@ -1,10 +1,11 @@ #! /bin/sh FILE=$1 +PGNNUM=$2 for X in $( seq 0 39 ); do ( START=$( ./find-pgn-split-point.sh "$FILE" $X 40 ) END=$( ./find-pgn-split-point.sh "$FILE" $(( X + 1 )) 40 ) - ~/nmu/pgn-extract/pgn-extract --startpos $START --endpos $END -e -Wsessebin "$FILE" >> part-$X.bin 2>/dev/null + ~/nmu/pgn-extract/pgn-extract --startpos $START --endpos $END --startfilenum $PGNNUM -e -Wsessebin "$FILE" >> part-$X.bin 2>/dev/null ) & done wait diff --git a/www/opening-stats.pl b/www/opening-stats.pl index 3a02cf9..d303435 100755 --- a/www/opening-stats.pl +++ b/www/opening-stats.pl @@ -21,7 +21,7 @@ my $hex = unpack('H*', $pos->bitpacked_fen); print $chld_in $hex, "\n"; chomp (my $line = <$chld_out>); -my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_start_position, @moves) = split / /, $line; +my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_file_number, $pgn_start_position, @moves) = split / /, $line; my $opening = $openings{$opening_num} // 'A00: Start position'; # Explore one move out. -- 2.39.2