From f7f8ba0f6969452da0b1d4bc6f4c8490006f7dbe Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 12 Dec 2014 02:09:20 +0100 Subject: [PATCH] Key the table by position, not position+move. Makes for more complex exploration, but is generally much more sane (e.g. we can store root games for each position in the future). --- Makefile | 2 +- binloader.cpp | 38 ++++++++++++++++++------ binlookup.cpp | 71 +++++++++++++++++++++++++++++++------------- binmerger.cpp | 13 ++++++++ count.proto | 3 ++ opening-stats.pl | 21 ++++++++++++- www/opening-stats.pl | 34 ++++++++++++--------- 7 files changed, 137 insertions(+), 45 deletions(-) diff --git a/Makefile b/Makefile index bf4dc77..cd85e4f 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ all: binloader binlookup binmerger binloader: binloader.o hash.o count.pb.o binmerger: binmerger.o count.pb.o -binlookup: binlookup.o count.pb.o +binlookup: binlookup.o hash.o count.pb.o binloader.o: binloader.cpp count.pb.h binmerger.o: binmerger.cpp count.pb.h diff --git a/binloader.cpp b/binloader.cpp index fdad082..6472f10 100644 --- a/binloader.cpp +++ b/binloader.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "count.pb.h" #include "hash.h" @@ -19,13 +20,14 @@ using namespace std; enum Result { WHITE = 0, DRAW, BLACK }; struct Element { - string bpfen_and_move; + string bpfen; + string move; Result result; int opening_num, white_elo, black_elo; time_t timestamp; bool operator< (const Element& other) const { - return bpfen_and_move < other.bpfen_and_move; + return bpfen < other.bpfen; } }; @@ -49,9 +51,9 @@ int main(int argc, char **argv) break; } - string bpfen_and_move; - bpfen_and_move.resize(l); - if (fread(&bpfen_and_move[0], l, 1, fp) != 1) { + string bpfen; + bpfen.resize(l); + if (fread(&bpfen[0], l, 1, fp) != 1) { perror("fread()"); // exit(1); break; @@ -87,8 +89,20 @@ int main(int argc, char **argv) break; } - int bucket = hash_key_to_bucket(bpfen_and_move.data(), bpfen_and_move.size(), num_buckets); - elems[bucket].emplace_back(Element {move(bpfen_and_move), Result(r), opening_num, white_elo, black_elo, timestamp}); + l = getc(fp); + if (l == -1) { + break; + } + string move; + move.resize(l); + if (fread(&move[0], l, 1, fp) != 1) { + perror("fread()"); + // exit(1); + break; + } + + int bucket = hash_key_to_bucket(bpfen.data(), bpfen.size(), num_buckets); + elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp}); ++num_elems; } fclose(fp); @@ -111,6 +125,7 @@ int main(int argc, char **argv) mtbl_writer_options_set_compression(wopt, MTBL_COMPRESSION_SNAPPY); mtbl_writer* mtbl = mtbl_writer_init(filename, wopt); Count c; + unordered_set moves; for (size_t j = 0; j < elems[i].size(); ++j) { const Element &e = elems[i][j]; if (e.result == WHITE) { @@ -131,12 +146,17 @@ int main(int argc, char **argv) } c.set_opening_num(e.opening_num); } - if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) { + if (!moves.count(e.move)) { + moves.insert(e.move); + c.add_move(e.move); + } + if (j == elems[i].size() - 1 || e.bpfen != elems[i][j + 1].bpfen) { c.SerializeToString(&buf); mtbl_writer_add(mtbl, - (const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(), + (const uint8_t *)e.bpfen.data(), e.bpfen.size(), (const uint8_t *)buf.data(), buf.size()); c = Count(); + moves.clear(); } } mtbl_writer_destroy(&mtbl); diff --git a/binlookup.cpp b/binlookup.cpp index 3c969e2..958a7bf 100644 --- a/binlookup.cpp +++ b/binlookup.cpp @@ -7,46 +7,77 @@ #include #include #include "count.pb.h" +#include "hash.h" using namespace std; int main(int argc, char **argv) { int num_buckets = atoi(argv[2]); - const char *hex_prefix = argv[3]; - const int prefix_len = strlen(hex_prefix) / 2; - uint8_t *prefix = new uint8_t[prefix_len]; - - for (int i = 0; i < prefix_len; ++i) { - char x[3]; - x[0] = hex_prefix[i * 2 + 0]; - x[1] = hex_prefix[i * 2 + 1]; - x[2] = 0; - int k; - sscanf(x, "%02x", &k); - prefix[i] = k; + mtbl_reader** mtbls = new mtbl_reader*[num_buckets]; + const mtbl_source** srcs = new const mtbl_source*[num_buckets]; + for (int i = 0; i < num_buckets; ++i) { + mtbls[i] = NULL; + srcs[i] = NULL; } - for (int i = 0; i < num_buckets; ++i) { - char filename[256]; - snprintf(filename, sizeof(filename), "%s.part%04d", argv[1], i); + while (!feof(stdin)) { + char hex_bpfen[256]; + if (fgets(hex_bpfen, sizeof(hex_bpfen), stdin) == NULL) { + break; + } + char *ptr = strchr(hex_bpfen, '\n'); + if (ptr != NULL) { + *ptr = 0; + } + + const int bpfen_len = strlen(hex_bpfen) / 2; + uint8_t *bpfen = new uint8_t[bpfen_len]; + + for (int i = 0; i < bpfen_len; ++i) { + char x[3]; + x[0] = hex_bpfen[i * 2 + 0]; + x[1] = hex_bpfen[i * 2 + 1]; + x[2] = 0; + int k; + sscanf(x, "%02x", &k); + bpfen[i] = k; + } - mtbl_reader* mtbl = mtbl_reader_init(filename, NULL); - const mtbl_source *src = mtbl_reader_source(mtbl); - mtbl_iter *it = mtbl_source_get_prefix(src, prefix, prefix_len); + int bucket = hash_key_to_bucket((const char *)bpfen, bpfen_len, num_buckets); + if (mtbls[bucket] == NULL) { + char filename[256]; + snprintf(filename, sizeof(filename), "%s.part%04d", argv[1], bucket); + + mtbls[bucket] = mtbl_reader_init(filename, NULL); + srcs[bucket] = mtbl_reader_source(mtbls[bucket]); + } + + mtbl_iter *it = mtbl_source_get(srcs[bucket], bpfen, bpfen_len); const uint8_t *key, *val; size_t len_key, len_val; while (mtbl_iter_next(it, &key, &len_key, &val, &len_val)) { - string move((char *)(key + prefix_len), len_key - prefix_len); Count c; c.ParseFromArray(val, len_val); - printf("%s %d %d %d %u %f %f %d %ld\n", move.c_str(), + printf("%d %d %d %u %f %f %d %ld", c.white(), c.draw(), c.black(), c.opening_num(), double(c.sum_white_elo()) / c.num_elo(), double(c.sum_black_elo()) / c.num_elo(), c.num_elo(), c.first_timestamp()); + for (int j = 0; j < c.move_size(); ++j) { + printf(" %s", c.move(j).c_str()); + } + printf("\n"); + } + fflush(stdout); + mtbl_iter_destroy(&it); + } + + for (int i = 0; i < num_buckets; ++i) { + if (mtbls[i] != NULL) { + mtbl_reader_destroy(&mtbls[i]); } } } diff --git a/binmerger.cpp b/binmerger.cpp index 6c16240..9bd969e 100644 --- a/binmerger.cpp +++ b/binmerger.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include "count.pb.h" @@ -38,6 +39,18 @@ void merge_count(void* userdata, } } + // Merge the moves, with deduplication. + unordered_set moves; + for (int i = 0; i < c0.move_size(); ++i) { + moves.insert(c0.move(i)); + c.add_move(c0.move(i)); + } + for (int i = 0; i < c1.move_size(); ++i) { + if (!moves.count(c1.move(i))) { + c.add_move(c1.move(i)); + } + } + static string buf; // Keep allocated. c.SerializeToString(&buf); diff --git a/count.proto b/count.proto index c9321a5..7712f97 100644 --- a/count.proto +++ b/count.proto @@ -14,4 +14,7 @@ message Count { // First timestamp this position/move was seen. optional int64 first_timestamp = 8 [default=32503680000]; + + // Moves seen from this position. + repeated string move = 9; }; diff --git a/opening-stats.pl b/opening-stats.pl index 1c17f50..324b158 100755 --- a/opening-stats.pl +++ b/opening-stats.pl @@ -5,10 +5,29 @@ use CGI; use JSON::XS; use lib '..'; use Position; +use IPC::Open2; my $cgi = CGI->new; my $fen = $ARGV[0]; +my ($chld_out, $chld_in); +my $pid = IPC::Open2::open2($chld_out, $chld_in, "./binlookup", "./open.mtbl", "40"); + +# Root position. my $pos = Position->from_fen($fen); my $hex = unpack('H*', $pos->bitpacked_fen); -system("./binlookup", "./open.mtbl", "40", $hex); +print $chld_in $hex, "\n"; +my $line = <$chld_out>; + +print $line; +chomp $line; + +my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, @moves) = split / /, $line; +# Explore one move out. +for my $move (@moves) { + my ($np, $uci_move) = $pos->make_pretty_move($move); + my $hex = unpack('H*', $np->bitpacked_fen); + print $chld_in $hex, "\n"; + my $line = <$chld_out>; + print "$move $line"; +} diff --git a/www/opening-stats.pl b/www/opening-stats.pl index 33d3c08..b62a30b 100755 --- a/www/opening-stats.pl +++ b/www/opening-stats.pl @@ -5,24 +5,34 @@ use CGI; use JSON::XS; use lib '..'; use Position; +use IPC::Open2; our %openings = (); read_openings(); my $cgi = CGI->new; -my $fen = $cgi->param('fen'); +my ($chld_out, $chld_in); +my $pid = IPC::Open2::open2($chld_out, $chld_in, "../binlookup", "../open.mtbl", "40"); + +# Root position. Basically ignore everything except the opening (and later some root game stuff). +my $fen = $cgi->param('fen') // 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'; my $pos = Position->from_fen($fen); my $hex = unpack('H*', $pos->bitpacked_fen); -open my $fh, "-|", "../binlookup", "../open.mtbl", "40", $hex - or die "../binlookup: $!"; +print $chld_in $hex, "\n"; +my $line = <$chld_out>; -my $opening; +my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, @moves) = split / /, $line; +my $opening = $openings{$opening_num} // 'A00: Start position'; -my @moves = (); -while (<$fh>) { - chomp; - my ($move, $white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo) = split; - push @moves, { +# Explore one move out. +my @json_moves = (); +for my $move (@moves) { + my ($np, $uci_move) = $pos->make_pretty_move($move); + my $hex = unpack('H*', $np->bitpacked_fen); + print $chld_in $hex, "\n"; + my $line = <$chld_out>; + my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo) = split / /, $line; + push @json_moves, { move => $move, white => $white * 1, draw => $draw * 1, @@ -31,14 +41,10 @@ while (<$fh>) { black_avg_elo => $black_avg_elo * 1, num_elo => $num_elo * 1 }; - $opening = $openings{$opening_num} // 'A00: Start position'; } -close $fh; - -@moves = sort { num($b) <=> num($a) } @moves; print $cgi->header(-type=>'application/json'); -print JSON::XS::encode_json({ moves => \@moves, opening => $opening }); +print JSON::XS::encode_json({ moves => \@json_moves, opening => $opening }); sub num { my $x = shift; -- 2.39.2