From: Steinar H. Gunderson Date: Fri, 12 Dec 2014 00:06:51 +0000 (+0100) Subject: Switch value format to protobuf. Slightly smaller, easier to deal with extensions... X-Git-Url: https://git.sesse.net/?p=remoteglot-book;a=commitdiff_plain;h=19c49abb677f8cbf5b290e71dbcad382558d8cc5 Switch value format to protobuf. Slightly smaller, easier to deal with extensions that will come soon. --- diff --git a/.gitignore b/.gitignore index d378a6a..3792bb7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ eco.pgn openings.txt open.mtbl.part???? *.o +*.pb.cc +*.pb.h diff --git a/Makefile b/Makefile index 80d4c5e..bf4dc77 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,20 @@ CXXFLAGS=-std=gnu++11 -O2 -g -Wall -LDLIBS=-lmtbl -lfarmhash +LDLIBS=-lmtbl -lfarmhash -lprotobuf +PROTOC=protoc + all: binloader binlookup binmerger -binloader: binloader.o hash.o +binloader: binloader.o hash.o count.pb.o +binmerger: binmerger.o count.pb.o +binlookup: binlookup.o count.pb.o + +binloader.o: binloader.cpp count.pb.h +binmerger.o: binmerger.cpp count.pb.h +binlookup.o: binlookup.cpp count.pb.h + +%.pb.cc %.pb.h : %.proto + $(PROTOC) --cpp_out=. $< .PHONY: clean clean: - $(RM) binloader binlookup binmerger + $(RM) binloader binlookup binmerger binloader.o binmerger.o binlookup.o hash.o count.pb.o count.pb.h count.pb.cc diff --git a/binloader.cpp b/binloader.cpp index e472364..fdad082 100644 --- a/binloader.cpp +++ b/binloader.cpp @@ -10,9 +10,11 @@ #include #include #include -#include "count.h" +#include "count.pb.h" #include "hash.h" +#define DUMMY_TIMESTAMP 32503680000 + using namespace std; enum Result { WHITE = 0, DRAW, BLACK }; @@ -100,6 +102,7 @@ int main(int argc, char **argv) } printf("Writing SSTables...\n"); + string buf; // Keep allocated. for (int i = 0; i < num_buckets; ++i) { char filename[256]; snprintf(filename, sizeof(filename), "%s.part%04d", argv[argc - 2], i); @@ -111,26 +114,28 @@ int main(int argc, char **argv) for (size_t j = 0; j < elems[i].size(); ++j) { const Element &e = elems[i][j]; if (e.result == WHITE) { - ++c.white; + c.set_white(c.white() + 1); } else if (e.result == DRAW) { - ++c.draw; + c.set_draw(c.draw() + 1); } else if (e.result == BLACK) { - ++c.black; + c.set_black(c.black() + 1); } if (e.white_elo >= 100 && e.black_elo >= 100) { - c.sum_white_elo += e.white_elo; - c.sum_black_elo += e.black_elo; - ++c.num_elo; + c.set_sum_white_elo(c.sum_white_elo() + e.white_elo); + c.set_sum_black_elo(c.sum_black_elo() + e.black_elo); + c.set_num_elo(c.num_elo() + 1); } - if (c.first_timestamp == DUMMY_TIMESTAMP || - e.timestamp < c.first_timestamp) { - c.first_timestamp = e.timestamp; - c.opening_num = e.opening_num; + if (!c.has_first_timestamp() || e.timestamp < c.first_timestamp()) { + if (e.timestamp != DUMMY_TIMESTAMP) { + c.set_first_timestamp(e.timestamp); + } + c.set_opening_num(e.opening_num); } if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) { + c.SerializeToString(&buf); mtbl_writer_add(mtbl, (const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(), - (const uint8_t *)&c, sizeof(c)); + (const uint8_t *)buf.data(), buf.size()); c = Count(); } } diff --git a/binlookup.cpp b/binlookup.cpp index b3a0488..3c969e2 100644 --- a/binlookup.cpp +++ b/binlookup.cpp @@ -6,7 +6,7 @@ #include #include #include -#include "count.h" +#include "count.pb.h" using namespace std; @@ -40,12 +40,13 @@ int main(int argc, char **argv) while (mtbl_iter_next(it, &key, &len_key, &val, &len_val)) { string move((char *)(key + prefix_len), len_key - prefix_len); - const Count* c = (Count *)val; + Count c; + c.ParseFromArray(val, len_val); printf("%s %d %d %d %u %f %f %d %ld\n", move.c_str(), - c->white, c->draw, c->black, c->opening_num, - float(c->sum_white_elo) / c->num_elo, - float(c->sum_black_elo) / c->num_elo, - c->num_elo, c->first_timestamp); + c.white(), c.draw(), c.black(), c.opening_num(), + double(c.sum_white_elo()) / c.num_elo(), + double(c.sum_black_elo()) / c.num_elo(), + c.num_elo(), c.first_timestamp()); } } } diff --git a/binmerger.cpp b/binmerger.cpp index 620ef0a..6c16240 100644 --- a/binmerger.cpp +++ b/binmerger.cpp @@ -4,40 +4,46 @@ #include #include #include -#include "count.h" +#include "count.pb.h" using namespace std; - void merge_count(void* userdata, const uint8_t *key, size_t len_key, const uint8_t *val0, size_t len_val0, const uint8_t *val1, size_t len_val1, uint8_t **merged_val, size_t *len_merged_val) { - assert(len_val0 == sizeof(Count)); - assert(len_val1 == sizeof(Count)); - - const Count* c0 = reinterpret_cast(val0); - const Count* c1 = reinterpret_cast(val1); - unique_ptr c((Count *)malloc(sizeof(Count))); // Needs to be with malloc, per merger spec. - - c->white = c0->white + c1->white; - c->draw = c0->draw + c1->draw; - c->black = c0->black + c1->black; - c->sum_white_elo = c0->sum_white_elo + c1->sum_white_elo; - c->sum_black_elo = c0->sum_black_elo + c1->sum_black_elo; - c->num_elo = c0->num_elo + c1->num_elo; - if (c0->first_timestamp <= c1->first_timestamp) { - c->opening_num = c0->opening_num; - c->first_timestamp = c0->first_timestamp; + Count c0, c1; + c0.ParseFromArray(val0, len_val0); + c1.ParseFromArray(val1, len_val1); + + Count c; + + c.set_white(c0.white() + c1.white()); + c.set_draw(c0.draw() + c1.draw()); + c.set_black(c0.black() + c1.black()); + c.set_sum_white_elo(c0.sum_white_elo() + c1.sum_white_elo()); + c.set_sum_black_elo(c0.sum_black_elo() + c1.sum_black_elo()); + c.set_num_elo(c0.num_elo() + c1.num_elo()); + if (c0.first_timestamp() <= c1.first_timestamp()) { + c.set_opening_num(c0.opening_num()); + if (c0.has_first_timestamp()) { + c.set_first_timestamp(c0.first_timestamp()); + } } else { - c->opening_num = c1->opening_num; - c->first_timestamp = c1->first_timestamp; + c.set_opening_num(c1.opening_num()); + if (c1.has_first_timestamp()) { + c.set_first_timestamp(c1.first_timestamp()); + } } - *merged_val = reinterpret_cast(c.release()); - *len_merged_val = sizeof(Count); + static string buf; // Keep allocated. + c.SerializeToString(&buf); + + *merged_val = reinterpret_cast(malloc(buf.size())); + *len_merged_val = buf.size(); + memcpy(*merged_val, buf.data(), buf.size()); } int main(int argc, char **argv) diff --git a/count.h b/count.h deleted file mode 100644 index 9cb6a18..0000000 --- a/count.h +++ /dev/null @@ -1,12 +0,0 @@ -#define DUMMY_TIMESTAMP 32503680000 // 3000-01-01 00:00:00 UTC. - -struct Count { - int white = 0; - int draw = 0; - int black = 0; - unsigned int opening_num = 0; - unsigned long long sum_white_elo = 0; - unsigned long long sum_black_elo = 0; - int num_elo = 0; - time_t first_timestamp = DUMMY_TIMESTAMP; -}; diff --git a/count.proto b/count.proto new file mode 100644 index 0000000..c9321a5 --- /dev/null +++ b/count.proto @@ -0,0 +1,17 @@ +message Count { + // Number of games. + optional int32 white = 1; + optional int32 draw = 2; + optional int32 black = 3; + + // Opening number (32-bit hash value). + optional fixed32 opening_num = 4; + + // Elo statistics for this position. + optional int64 sum_white_elo = 5; + optional int64 sum_black_elo = 6; + optional int32 num_elo = 7; + + // First timestamp this position/move was seen. + optional int64 first_timestamp = 8 [default=32503680000]; +};