]> git.sesse.net Git - remoteglot-book/commitdiff
Store and merge the file number information. Still unused in the UI.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 13 Dec 2014 01:02:46 +0000 (02:02 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 13 Dec 2014 01:02:46 +0000 (02:02 +0100)
binloader.cpp
binlookup.cpp
binmerger.cpp
build-book.sh
count.proto
opening-stats.pl
parallel-parse-pgn.sh
www/opening-stats.pl

index 5bf2502c6867610fbb963c91d63aaf3f717312c8..8660e8e299bd93f70f19767d671af0e6d8288f80 100644 (file)
@@ -25,6 +25,7 @@ struct Element {
        Result result;
        int opening_num, white_elo, black_elo;
        time_t timestamp;
+       int file_num;
        long start_position;
 
        bool operator< (const Element& other) const {
@@ -67,7 +68,7 @@ int main(int argc, char **argv)
                                break;
                        }
 
-                       int opening_num, white_elo, black_elo;
+                       int opening_num, white_elo, black_elo, file_num;
                        time_t timestamp;
                        long start_position;
                        if (fread(&white_elo, sizeof(white_elo), 1, fp) != 1) {
@@ -90,6 +91,11 @@ int main(int argc, char **argv)
                                //exit(1);
                                break;
                        }
+                       if (fread(&file_num, sizeof(file_num), 1, fp) != 1) {
+                               perror("fread()");
+                               //exit(1);
+                               break;
+                       }
                        if (fread(&start_position, sizeof(start_position), 1, fp) != 1) {
                                perror("fread()");
                                //exit(1);
@@ -110,7 +116,7 @@ int main(int argc, char **argv)
                        }
 
                        int bucket = hash_key_to_bucket(bpfen.data(), bpfen.size(), num_buckets);
-                       elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp, start_position});
+                       elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp, file_num, start_position});
                        ++num_elems;
                }
                fclose(fp);
@@ -153,6 +159,7 @@ int main(int argc, char **argv)
                                        c.set_first_timestamp(e.timestamp);
                                }
                                c.set_opening_num(e.opening_num);
+                               c.set_pgn_file_num(e.file_num);
                                c.set_pgn_start_position(e.start_position);
                        }
                        if (!moves.count(e.move)) {
index 179515b44a0477a06bc2bcdc3946e4e678c4478e..afc446c73a7749ef49a9838f3cc17f1d1b1d93b9 100644 (file)
@@ -61,11 +61,12 @@ int main(int argc, char **argv)
                while (mtbl_iter_next(it, &key, &len_key, &val, &len_val)) {
                        Count c;
                        c.ParseFromArray(val, len_val);
-                       printf("%d %d %d %u %f %f %d %ld %ld",
+                       printf("%d %d %d %u %f %f %d %ld %d %ld",
                                c.white(), c.draw(), c.black(), c.opening_num(),
                                double(c.sum_white_elo()) / c.num_elo(),
                                double(c.sum_black_elo()) / c.num_elo(),
                                c.num_elo(), c.first_timestamp(),
+                               c.pgn_file_num(),
                                c.pgn_start_position());
                        for (int j = 0; j < c.move_size(); ++j) {
                                printf(" %s", c.move(j).c_str());
index 7e341592eec4d75c7f94e5702e53fd83926d26b0..94e47809d3f2f90837db6df9647ea7028e3d031e 100644 (file)
@@ -32,12 +32,14 @@ void merge_count(void* userdata,
                if (c0.has_first_timestamp()) {
                        c.set_first_timestamp(c0.first_timestamp());
                }
+               c.set_pgn_file_num(c0.pgn_file_num());
                c.set_pgn_start_position(c0.pgn_start_position());
        } else {
                c.set_opening_num(c1.opening_num());
                if (c1.has_first_timestamp()) {
                        c.set_first_timestamp(c1.first_timestamp());
                }
+               c.set_pgn_file_num(c1.pgn_file_num());
                c.set_pgn_start_position(c1.pgn_start_position());
        }
 
index 7d49663f5fb6f2c90a2bcff821287c6d130b0db2..e6e8c09afa4def12b1b1a07cb01868b634d215c7 100755 (executable)
@@ -5,12 +5,15 @@ set -e
 export SHARDS=40
 export PARALLEL_LOADS=20  # Reduce if you have problems with OOM
 
-rm -f part-*.bin part-*.mtbl part-*.mtbl.part???? open.mtbl.new open.mtbl.part???? open.mtbl.part????.new 2>/dev/null
+rm -f pgnnames.txt part-*.bin part-*.mtbl part-*.mtbl.part???? open.mtbl.new open.mtbl.part???? open.mtbl.part????.new 2>/dev/null
 
+PGNNUM=0
 for FILE in "$@"; do
        date | tr -d "\n"
        echo "  $FILE"
-       ./parallel-parse-pgn.sh "$FILE"
+       ./parallel-parse-pgn.sh "$FILE" "$PGNNUM"
+       echo "$FILE" >> pgnnames.txt
+       PGNNUM=$(( PGNNUM + 1 ))
 done
 date 
 
index 9ee285919a82142d6126b0ed9b81d58c50698a8c..39da3e0604236080f0dbae5a29833875a31f4d19 100644 (file)
@@ -15,8 +15,9 @@ message Count {
        // First timestamp this position/move was seen, and the byte offset
        // into the PGN file for that game.
        optional int64 first_timestamp = 8 [default=32503680000];
+       optional int32 pgn_file_num = 9;
        optional int64 pgn_start_position = 10;
 
        // Moves seen from this position.
-       repeated string move = 9;
+       repeated string move = 11;
 };
index a90ca0b543535c86c7caf1e07ede2ccda37c5e79..27cdee0a257de70d5a48e27a061204dfb1e7bfa2 100755 (executable)
@@ -21,7 +21,7 @@ my $line = <$chld_out>;
 print $line;
 chomp $line;
 
-my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_start_position, @moves) = split / /, $line;
+my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_file_number, $pgn_start_position, @moves) = split / /, $line;
 
 # Explore one move out.
 for my $move (@moves) {
index cfb61bcc6305fffee6efe016a9e61c92013f97d7..39d43d3b43abb91493ea38448866f0addc8eed29 100755 (executable)
@@ -1,10 +1,11 @@
 #! /bin/sh
 FILE=$1
+PGNNUM=$2
 for X in $( seq 0 39 ); do
        (
                START=$( ./find-pgn-split-point.sh "$FILE" $X 40 )
                END=$( ./find-pgn-split-point.sh "$FILE" $(( X + 1 )) 40 )
-               ~/nmu/pgn-extract/pgn-extract --startpos $START --endpos $END -e -Wsessebin "$FILE" >> part-$X.bin 2>/dev/null
+               ~/nmu/pgn-extract/pgn-extract --startpos $START --endpos $END --startfilenum $PGNNUM -e -Wsessebin "$FILE" >> part-$X.bin 2>/dev/null
        ) &
 done
 wait
index 3a02cf9fde6fcd917b5d3cbc6237c9a3949c4ce2..d3034359040ddd40296746a0e2a34e061948cd16 100755 (executable)
@@ -21,7 +21,7 @@ my $hex = unpack('H*', $pos->bitpacked_fen);
 print $chld_in $hex, "\n";
 chomp (my $line = <$chld_out>);
 
-my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_start_position, @moves) = split / /, $line;
+my ($white, $draw, $black, $opening_num, $white_avg_elo, $black_avg_elo, $num_elo, $timestamp, $pgn_file_number, $pgn_start_position, @moves) = split / /, $line;
 my $opening = $openings{$opening_num} // 'A00: Start position';
 
 # Explore one move out.