]> git.sesse.net Git - remoteglot-book/commitdiff
Switch value format to protobuf. Slightly smaller, easier to deal with extensions...
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 12 Dec 2014 00:06:51 +0000 (01:06 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 12 Dec 2014 00:06:51 +0000 (01:06 +0100)
.gitignore
Makefile
binloader.cpp
binlookup.cpp
binmerger.cpp
count.h [deleted file]
count.proto [new file with mode: 0644]

index d378a6a8f1ffc2d33c11b1869605bc8a0d60517e..3792bb70d9a53ab570ae88778a76dc9e0330c985 100644 (file)
@@ -7,3 +7,5 @@ eco.pgn
 openings.txt
 open.mtbl.part????
 *.o
+*.pb.cc
+*.pb.h
index 80d4c5ede91ac6f9ed1e6e7af6954bba1d1783ea..bf4dc77bca52f4fc4f3587d4ea71dc8e65600412 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,20 @@
 CXXFLAGS=-std=gnu++11 -O2 -g -Wall
-LDLIBS=-lmtbl -lfarmhash
+LDLIBS=-lmtbl -lfarmhash -lprotobuf
+PROTOC=protoc
+
 all: binloader binlookup binmerger
 
-binloader: binloader.o hash.o
+binloader: binloader.o hash.o count.pb.o
+binmerger: binmerger.o count.pb.o
+binlookup: binlookup.o count.pb.o
+
+binloader.o: binloader.cpp count.pb.h
+binmerger.o: binmerger.cpp count.pb.h
+binlookup.o: binlookup.cpp count.pb.h
+
+%.pb.cc %.pb.h : %.proto
+       $(PROTOC) --cpp_out=. $<
 
 .PHONY: clean
 clean:
-       $(RM) binloader binlookup binmerger
+       $(RM) binloader binlookup binmerger binloader.o binmerger.o binlookup.o hash.o count.pb.o count.pb.h count.pb.cc
index e472364e47b234e8a3a0ed02f03c47c8d47a7582..fdad082502a21c10223ae50748551d9fb3678896 100644 (file)
 #include <memory>
 #include <string>
 #include <string.h>
-#include "count.h"
+#include "count.pb.h"
 #include "hash.h"
 
+#define DUMMY_TIMESTAMP 32503680000
+
 using namespace std;
 
 enum Result { WHITE = 0, DRAW, BLACK };
@@ -100,6 +102,7 @@ int main(int argc, char **argv)
        }
 
        printf("Writing SSTables...\n");
+       string buf;  // Keep allocated.
        for (int i = 0; i < num_buckets; ++i) {
                char filename[256];
                snprintf(filename, sizeof(filename), "%s.part%04d", argv[argc - 2], i);
@@ -111,26 +114,28 @@ int main(int argc, char **argv)
                for (size_t j = 0; j < elems[i].size(); ++j) {
                        const Element &e = elems[i][j];
                        if (e.result == WHITE) {
-                               ++c.white;
+                               c.set_white(c.white() + 1);
                        } else if (e.result == DRAW) {
-                               ++c.draw;
+                               c.set_draw(c.draw() + 1);
                        } else if (e.result == BLACK) {
-                               ++c.black;
+                               c.set_black(c.black() + 1);
                        }
                        if (e.white_elo >= 100 && e.black_elo >= 100) {
-                               c.sum_white_elo += e.white_elo;
-                               c.sum_black_elo += e.black_elo;
-                               ++c.num_elo;
+                               c.set_sum_white_elo(c.sum_white_elo() + e.white_elo);
+                               c.set_sum_black_elo(c.sum_black_elo() + e.black_elo);
+                               c.set_num_elo(c.num_elo() + 1);
                        }
-                       if (c.first_timestamp == DUMMY_TIMESTAMP ||
-                           e.timestamp < c.first_timestamp) {
-                               c.first_timestamp = e.timestamp;
-                               c.opening_num = e.opening_num;
+                       if (!c.has_first_timestamp() || e.timestamp < c.first_timestamp()) {
+                               if (e.timestamp != DUMMY_TIMESTAMP) {
+                                       c.set_first_timestamp(e.timestamp);
+                               }
+                               c.set_opening_num(e.opening_num);
                        }
                        if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) {
+                               c.SerializeToString(&buf);
                                mtbl_writer_add(mtbl,
                                        (const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(),
-                                       (const uint8_t *)&c, sizeof(c));
+                                       (const uint8_t *)buf.data(), buf.size());
                                c = Count();
                        }
                }
index b3a0488b0672e240c93bff1fc1c3d450c9fbd6a9..3c969e2dc2701e72238b7f303bff1f00694aad2e 100644 (file)
@@ -6,7 +6,7 @@
 #include <memory>
 #include <string>
 #include <string.h>
-#include "count.h"
+#include "count.pb.h"
 
 using namespace std;
 
@@ -40,12 +40,13 @@ int main(int argc, char **argv)
 
                while (mtbl_iter_next(it, &key, &len_key, &val, &len_val)) {
                        string move((char *)(key + prefix_len), len_key - prefix_len);
-                       const Count* c = (Count *)val;
+                       Count c;
+                       c.ParseFromArray(val, len_val);
                        printf("%s %d %d %d %u %f %f %d %ld\n", move.c_str(),
-                               c->white, c->draw, c->black, c->opening_num,
-                               float(c->sum_white_elo) / c->num_elo,
-                               float(c->sum_black_elo) / c->num_elo,
-                               c->num_elo, c->first_timestamp);
+                               c.white(), c.draw(), c.black(), c.opening_num(),
+                               double(c.sum_white_elo()) / c.num_elo(),
+                               double(c.sum_black_elo()) / c.num_elo(),
+                               c.num_elo(), c.first_timestamp());
                }
        }
 }
index 620ef0afe5a25d1a55967de95c484f0e35925565..6c162404e416e1c347ee449d83ee82153297ebfb 100644 (file)
@@ -4,40 +4,46 @@
 #include <string>
 #include <string.h>
 #include <assert.h>
-#include "count.h"
+#include "count.pb.h"
 
 using namespace std;
 
-
 void merge_count(void* userdata,
                  const uint8_t *key, size_t len_key,
                 const uint8_t *val0, size_t len_val0,
                 const uint8_t *val1, size_t len_val1,
                 uint8_t **merged_val, size_t *len_merged_val)
 {
-       assert(len_val0 == sizeof(Count));
-       assert(len_val1 == sizeof(Count));
-
-       const Count* c0 = reinterpret_cast<const Count*>(val0);
-       const Count* c1 = reinterpret_cast<const Count*>(val1);
-       unique_ptr<Count> c((Count *)malloc(sizeof(Count)));  // Needs to be with malloc, per merger spec.
-
-       c->white = c0->white + c1->white;
-       c->draw = c0->draw + c1->draw;
-       c->black = c0->black + c1->black;
-       c->sum_white_elo = c0->sum_white_elo + c1->sum_white_elo;
-       c->sum_black_elo = c0->sum_black_elo + c1->sum_black_elo;
-       c->num_elo = c0->num_elo + c1->num_elo;
-       if (c0->first_timestamp <= c1->first_timestamp) {
-               c->opening_num = c0->opening_num;
-               c->first_timestamp = c0->first_timestamp;
+       Count c0, c1;
+       c0.ParseFromArray(val0, len_val0);
+       c1.ParseFromArray(val1, len_val1);
+
+       Count c;
+
+       c.set_white(c0.white() + c1.white());
+       c.set_draw(c0.draw() + c1.draw());
+       c.set_black(c0.black() + c1.black());
+       c.set_sum_white_elo(c0.sum_white_elo() + c1.sum_white_elo());
+       c.set_sum_black_elo(c0.sum_black_elo() + c1.sum_black_elo());
+       c.set_num_elo(c0.num_elo() + c1.num_elo());
+       if (c0.first_timestamp() <= c1.first_timestamp()) {
+               c.set_opening_num(c0.opening_num());
+               if (c0.has_first_timestamp()) {
+                       c.set_first_timestamp(c0.first_timestamp());
+               }
        } else {
-               c->opening_num = c1->opening_num;
-               c->first_timestamp = c1->first_timestamp;
+               c.set_opening_num(c1.opening_num());
+               if (c1.has_first_timestamp()) {
+                       c.set_first_timestamp(c1.first_timestamp());
+               }
        }
 
-       *merged_val = reinterpret_cast<uint8_t *>(c.release());
-       *len_merged_val = sizeof(Count);
+       static string buf;  // Keep allocated.
+       c.SerializeToString(&buf);
+
+       *merged_val = reinterpret_cast<uint8_t *>(malloc(buf.size()));
+       *len_merged_val = buf.size();
+       memcpy(*merged_val, buf.data(), buf.size());
 }
 
 int main(int argc, char **argv)
diff --git a/count.h b/count.h
deleted file mode 100644 (file)
index 9cb6a18..0000000
--- a/count.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#define DUMMY_TIMESTAMP 32503680000  // 3000-01-01 00:00:00 UTC.
-
-struct Count {
-       int white = 0;
-       int draw = 0;
-       int black = 0;
-       unsigned int opening_num = 0;
-       unsigned long long sum_white_elo = 0;
-       unsigned long long sum_black_elo = 0;
-       int num_elo = 0;
-       time_t first_timestamp = DUMMY_TIMESTAMP;
-};
diff --git a/count.proto b/count.proto
new file mode 100644 (file)
index 0000000..c9321a5
--- /dev/null
@@ -0,0 +1,17 @@
+message Count {
+       // Number of games.
+       optional int32 white = 1;
+       optional int32 draw = 2;
+       optional int32 black = 3;
+
+       // Opening number (32-bit hash value).
+       optional fixed32 opening_num = 4;
+
+       // Elo statistics for this position.
+       optional int64 sum_white_elo = 5;
+       optional int64 sum_black_elo = 6;
+       optional int32 num_elo = 7;
+
+       // First timestamp this position/move was seen. 
+       optional int64 first_timestamp = 8 [default=32503680000];
+};