]> git.sesse.net Git - remoteglot-book/commitdiff
Key the table by position, not position+move. Makes for more complex exploration...
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 12 Dec 2014 01:09:20 +0000 (02:09 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 12 Dec 2014 01:09:20 +0000 (02:09 +0100)
Makefile
binloader.cpp
binlookup.cpp
binmerger.cpp
count.proto
opening-stats.pl
www/opening-stats.pl

index bf4dc77bca52f4fc4f3587d4ea71dc8e65600412..cd85e4fdeebb10acbe1a71d2f8cf45561c642c3d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ all: binloader binlookup binmerger
 
 binloader: binloader.o hash.o count.pb.o
 binmerger: binmerger.o count.pb.o
-binlookup: binlookup.o count.pb.o
+binlookup: binlookup.o hash.o count.pb.o
 
 binloader.o: binloader.cpp count.pb.h
 binmerger.o: binmerger.cpp count.pb.h
index fdad082502a21c10223ae50748551d9fb3678896..6472f1009f73a0f0348e43ddd1656a4df86adfca 100644 (file)
@@ -9,6 +9,7 @@
 #include <utility>
 #include <memory>
 #include <string>
+#include <unordered_set>
 #include <string.h>
 #include "count.pb.h"
 #include "hash.h"
@@ -19,13 +20,14 @@ using namespace std;
 
 enum Result { WHITE = 0, DRAW, BLACK };
 struct Element {
-       string bpfen_and_move;
+       string bpfen;
+       string move;
        Result result;
        int opening_num, white_elo, black_elo;
        time_t timestamp;
 
        bool operator< (const Element& other) const {
-               return bpfen_and_move < other.bpfen_and_move;
+               return bpfen < other.bpfen;
        }
 };
 
@@ -49,9 +51,9 @@ int main(int argc, char **argv)
                                break;
                        }
                
-                       string bpfen_and_move;
-                       bpfen_and_move.resize(l);
-                       if (fread(&bpfen_and_move[0], l, 1, fp) != 1) {
+                       string bpfen;
+                       bpfen.resize(l);
+                       if (fread(&bpfen[0], l, 1, fp) != 1) {
                                perror("fread()");
                //              exit(1);
                                break;
@@ -87,8 +89,20 @@ int main(int argc, char **argv)
                                break;
                        }
 
-                       int bucket = hash_key_to_bucket(bpfen_and_move.data(), bpfen_and_move.size(), num_buckets);
-                       elems[bucket].emplace_back(Element {move(bpfen_and_move), Result(r), opening_num, white_elo, black_elo, timestamp});
+                       l = getc(fp);
+                       if (l == -1) {
+                               break;
+                       }
+                       string move;
+                       move.resize(l);
+                       if (fread(&move[0], l, 1, fp) != 1) {
+                               perror("fread()");
+               //              exit(1);
+                               break;
+                       }
+
+                       int bucket = hash_key_to_bucket(bpfen.data(), bpfen.size(), num_buckets);
+                       elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp});
                        ++num_elems;
                }
                fclose(fp);
@@ -111,6 +125,7 @@ int main(int argc, char **argv)
                mtbl_writer_options_set_compression(wopt, MTBL_COMPRESSION_SNAPPY);
                mtbl_writer* mtbl = mtbl_writer_init(filename, wopt);
                Count c;
+               unordered_set<string> moves;
                for (size_t j = 0; j < elems[i].size(); ++j) {
                        const Element &e = elems[i][j];
                        if (e.result == WHITE) {
@@ -131,12 +146,17 @@ int main(int argc, char **argv)
                                }
                                c.set_opening_num(e.opening_num);
                        }
-                       if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) {
+                       if (!moves.count(e.move)) {
+                               moves.insert(e.move);
+                               c.add_move(e.move);
+                       }
+                       if (j == elems[i].size() - 1 || e.bpfen != elems[i][j + 1].bpfen) {
                                c.SerializeToString(&buf);
                                mtbl_writer_add(mtbl,
-                                       (const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(),
+                                       (const uint8_t *)e.bpfen.data(), e.bpfen.size(),
                                        (const uint8_t *)buf.data(), buf.size());
                                c = Count();
+                               moves.clear();
                        }
                }
                mtbl_writer_destroy(&mtbl);
index 3c969e2dc2701e72238b7f303bff1f00694aad2e..958a7bfc6be82fce622ee02e825457c3e8d335f9 100644 (file)
@@ -7,46 +7,77 @@
 #include <string>
 #include <string.h>
 #include "count.pb.h"
+#include "hash.h"
 
 using namespace std;
 
 int main(int argc, char **argv)
 {
        int num_buckets = atoi(argv[2]);
-       const char *hex_prefix = argv[3];
-       const int prefix_len = strlen(hex_prefix) / 2;
-       uint8_t *prefix = new uint8_t[prefix_len];
-
-       for (int i = 0; i < prefix_len; ++i) {
-               char x[3];
-               x[0] = hex_prefix[i * 2 + 0];
-               x[1] = hex_prefix[i * 2 + 1];
-               x[2] = 0;
-               int k;
-               sscanf(x, "%02x", &k);
-               prefix[i] = k;
+       mtbl_reader** mtbls = new mtbl_reader*[num_buckets];
+       const mtbl_source** srcs = new const mtbl_source*[num_buckets];
+       for (int i = 0; i < num_buckets; ++i) {
+               mtbls[i] = NULL;
+               srcs[i] = NULL;
        }
 
-       for (int i = 0; i < num_buckets; ++i) {
-               char filename[256];
-               snprintf(filename, sizeof(filename), "%s.part%04d", argv[1], i);
+       while (!feof(stdin)) {
+               char hex_bpfen[256];
+               if (fgets(hex_bpfen, sizeof(hex_bpfen), stdin) == NULL) {
+                       break;
+               }
+               char *ptr = strchr(hex_bpfen, '\n');
+               if (ptr != NULL) {
+                       *ptr = 0;
+               }
+
+               const int bpfen_len = strlen(hex_bpfen) / 2;
+               uint8_t *bpfen = new uint8_t[bpfen_len];
+
+               for (int i = 0; i < bpfen_len; ++i) {
+                       char x[3];
+                       x[0] = hex_bpfen[i * 2 + 0];
+                       x[1] = hex_bpfen[i * 2 + 1];
+                       x[2] = 0;
+                       int k;
+                       sscanf(x, "%02x", &k);
+                       bpfen[i] = k;
+               }
 
-               mtbl_reader* mtbl = mtbl_reader_init(filename, NULL);
-               const mtbl_source *src = mtbl_reader_source(mtbl);
-               mtbl_iter *it = mtbl_source_get_prefix(src, prefix, prefix_len);
+               int bucket = hash_key_to_bucket((const char *)bpfen, bpfen_len, num_buckets);
+               if (mtbls[bucket] == NULL) {
+                       char filename[256];
+                       snprintf(filename, sizeof(filename), "%s.part%04d", argv[1], bucket);
+
+                       mtbls[bucket] = mtbl_reader_init(filename, NULL);
+                       srcs[bucket] = mtbl_reader_source(mtbls[bucket]);
+               }
+
+               mtbl_iter *it = mtbl_source_get(srcs[bucket], bpfen, bpfen_len);
 
                const uint8_t *key, *val;
                size_t len_key, len_val;
 
                while (mtbl_iter_next(it, &key, &len_key, &val, &len_val)) {
-                       string move((char *)(key + prefix_len), len_key - prefix_len);
                        Count c;
                        c.ParseFromArray(val, len_val);
-                       printf("%s %d %d %d %u %f %f %d %ld\n", move.c_str(),
+                       printf("%d %d %d %u %f %f %d %ld",
                                c.white(), c.draw(), c.black(), c.opening_num(),
                                double(c.sum_white_elo()) / c.num_elo(),
                                double(c.sum_black_elo()) / c.num_elo(),
                                c.num_elo(), c.first_timestamp());
+                       for (int j = 0; j < c.move_size(); ++j) {
+                               printf(" %s", c.move(j).c_str());
+                       }
+                       printf("\n");
+               }
+               fflush(stdout);
+               mtbl_iter_destroy(&it);
+       }
+
+       for (int i = 0; i < num_buckets; ++i) {
+               if (mtbls[i] != NULL) {
+                       mtbl_reader_destroy(&mtbls[i]);
                }
        }
 }
index 6c162404e416e1c347ee449d83ee82153297ebfb..9bd969ead0d3795ed0a495047ad5e2ae77232a0c 100644 (file)
@@ -2,6 +2,7 @@
 #include <mtbl.h>
 #include <memory>
 #include <string>
+#include <unordered_set>
 #include <string.h>
 #include <assert.h>
 #include "count.pb.h"
@@ -38,6 +39,18 @@ void merge_count(void* userdata,
                }
        }
 
+       // Merge the moves, with deduplication.
+       unordered_set<string> moves;
+       for (int i = 0; i < c0.move_size(); ++i) {
+               moves.insert(c0.move(i));
+               c.add_move(c0.move(i));
+       }
+       for (int i = 0; i < c1.move_size(); ++i) {
+               if (!moves.count(c1.move(i))) {
+                       c.add_move(c1.move(i));
+               }
+       }
+
        static string buf;  // Keep allocated.
        c.SerializeToString(&buf);
 
index c9321a59f3a875e24c5197a8402ed2327e9d799a..7712f97649ed84de2d6c8191c29b3a7aac57a1a0 100644 (file)
@@ -14,4 +14,7 @@ message Count {
 
        // First timestamp this position/move was seen. 
        optional int64 first_timestamp = 8 [default=32503680000];
+
+       // Moves seen from this position.
+       repeated string move = 9;
 };
index 1c17f50af253431e7baeb3e38163a5ed20bbc15a..324b1582fc20d0e569fd383b4df77414063f5d44 100755 (executable)
@@ -5,10 +5,29 @@ use CGI;
 use JSON::XS;
 use lib '..';
 use Position;
+use IPC::Open2;
 
 my $cgi = CGI->new;
 my $fen = $ARGV[0];
+my ($chld_out, $chld_in);
+my $pid = IPC::Open2::open2($chld_out, $chld_in, "./binlookup", "./open.mtbl", "40");
+
+# Root position.
 my $pos = Position->from_fen($fen);
 my $hex = unpack('H*', $pos->bitpacked_fen);
-system("./binlookup", "./open.mtbl", "40", $hex);
+print $chld_in $hex, "\n";
+my $line = <$chld_out>;
+
+print $line;
+chomp $line;
+
+my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, @moves) = split / /, $line;
 
+# Explore one move out.
+for my $move (@moves) {
+       my ($np, $uci_move) = $pos->make_pretty_move($move);
+       my $hex = unpack('H*', $np->bitpacked_fen);
+       print $chld_in $hex, "\n";
+       my $line = <$chld_out>;
+       print "$move $line";
+}
index 33d3c082dcd551a657a7c5847360c47d0a26046c..b62a30b605a23b787c00f5e4bf919bf852fb0c16 100755 (executable)
@@ -5,24 +5,34 @@ use CGI;
 use JSON::XS;
 use lib '..';
 use Position;
+use IPC::Open2;
 
 our %openings = ();
 read_openings();
 
 my $cgi = CGI->new;
-my $fen = $cgi->param('fen');
+my ($chld_out, $chld_in);
+my $pid = IPC::Open2::open2($chld_out, $chld_in, "../binlookup", "../open.mtbl", "40");
+
+# Root position. Basically ignore everything except the opening (and later some root game stuff).
+my $fen = $cgi->param('fen') // 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1';
 my $pos = Position->from_fen($fen);
 my $hex = unpack('H*', $pos->bitpacked_fen);
-open my $fh, "-|", "../binlookup", "../open.mtbl", "40", $hex
-       or die "../binlookup: $!";
+print $chld_in $hex, "\n";
+my $line = <$chld_out>;
 
-my $opening;
+my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, @moves) = split / /, $line;
+my $opening = $openings{$opening_num} // 'A00: Start position';
 
-my @moves = ();
-while (<$fh>) {
-       chomp;
-       my ($move, $white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo) = split;
-       push @moves, {
+# Explore one move out.
+my @json_moves = ();
+for my $move (@moves) {
+       my ($np, $uci_move) = $pos->make_pretty_move($move);
+       my $hex = unpack('H*', $np->bitpacked_fen);
+       print $chld_in $hex, "\n";
+       my $line = <$chld_out>;
+       my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo) = split / /, $line;
+       push @json_moves, {
                move => $move,
                white => $white * 1,
                draw => $draw * 1,
@@ -31,14 +41,10 @@ while (<$fh>) {
                black_avg_elo => $black_avg_elo * 1,
                num_elo => $num_elo * 1
        };
-       $opening = $openings{$opening_num} // 'A00: Start position';
 }
-close $fh;
-
-@moves = sort { num($b) <=> num($a) } @moves;
 
 print $cgi->header(-type=>'application/json');
-print JSON::XS::encode_json({ moves => \@moves, opening => $opening });
+print JSON::XS::encode_json({ moves => \@json_moves, opening => $opening });
 
 sub num {
        my $x = shift;