openings.txt
open.mtbl.part????
*.o
+*.pb.cc
+*.pb.h
CXXFLAGS=-std=gnu++11 -O2 -g -Wall
-LDLIBS=-lmtbl -lfarmhash
+LDLIBS=-lmtbl -lfarmhash -lprotobuf
+PROTOC=protoc
+
all: binloader binlookup binmerger
-binloader: binloader.o hash.o
+binloader: binloader.o hash.o count.pb.o
+binmerger: binmerger.o count.pb.o
+binlookup: binlookup.o count.pb.o
+
+binloader.o: binloader.cpp count.pb.h
+binmerger.o: binmerger.cpp count.pb.h
+binlookup.o: binlookup.cpp count.pb.h
+
+%.pb.cc %.pb.h : %.proto
+ $(PROTOC) --cpp_out=. $<
.PHONY: clean
clean:
- $(RM) binloader binlookup binmerger
+ $(RM) binloader binlookup binmerger binloader.o binmerger.o binlookup.o hash.o count.pb.o count.pb.h count.pb.cc
#include <memory>
#include <string>
#include <string.h>
-#include "count.h"
+#include "count.pb.h"
#include "hash.h"
+#define DUMMY_TIMESTAMP 32503680000
+
using namespace std;
enum Result { WHITE = 0, DRAW, BLACK };
}
printf("Writing SSTables...\n");
+ string buf; // Keep allocated.
for (int i = 0; i < num_buckets; ++i) {
char filename[256];
snprintf(filename, sizeof(filename), "%s.part%04d", argv[argc - 2], i);
for (size_t j = 0; j < elems[i].size(); ++j) {
const Element &e = elems[i][j];
if (e.result == WHITE) {
- ++c.white;
+ c.set_white(c.white() + 1);
} else if (e.result == DRAW) {
- ++c.draw;
+ c.set_draw(c.draw() + 1);
} else if (e.result == BLACK) {
- ++c.black;
+ c.set_black(c.black() + 1);
}
if (e.white_elo >= 100 && e.black_elo >= 100) {
- c.sum_white_elo += e.white_elo;
- c.sum_black_elo += e.black_elo;
- ++c.num_elo;
+ c.set_sum_white_elo(c.sum_white_elo() + e.white_elo);
+ c.set_sum_black_elo(c.sum_black_elo() + e.black_elo);
+ c.set_num_elo(c.num_elo() + 1);
}
- if (c.first_timestamp == DUMMY_TIMESTAMP ||
- e.timestamp < c.first_timestamp) {
- c.first_timestamp = e.timestamp;
- c.opening_num = e.opening_num;
+ if (!c.has_first_timestamp() || e.timestamp < c.first_timestamp()) {
+ if (e.timestamp != DUMMY_TIMESTAMP) {
+ c.set_first_timestamp(e.timestamp);
+ }
+ c.set_opening_num(e.opening_num);
}
if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) {
+ c.SerializeToString(&buf);
mtbl_writer_add(mtbl,
(const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(),
- (const uint8_t *)&c, sizeof(c));
+ (const uint8_t *)buf.data(), buf.size());
c = Count();
}
}
#include <memory>
#include <string>
#include <string.h>
-#include "count.h"
+#include "count.pb.h"
using namespace std;
while (mtbl_iter_next(it, &key, &len_key, &val, &len_val)) {
string move((char *)(key + prefix_len), len_key - prefix_len);
- const Count* c = (Count *)val;
+ Count c;
+ c.ParseFromArray(val, len_val);
printf("%s %d %d %d %u %f %f %d %ld\n", move.c_str(),
- c->white, c->draw, c->black, c->opening_num,
- float(c->sum_white_elo) / c->num_elo,
- float(c->sum_black_elo) / c->num_elo,
- c->num_elo, c->first_timestamp);
+ c.white(), c.draw(), c.black(), c.opening_num(),
+ double(c.sum_white_elo()) / c.num_elo(),
+ double(c.sum_black_elo()) / c.num_elo(),
+ c.num_elo(), c.first_timestamp());
}
}
}
#include <string>
#include <string.h>
#include <assert.h>
-#include "count.h"
+#include "count.pb.h"
using namespace std;
-
void merge_count(void* userdata,
const uint8_t *key, size_t len_key,
const uint8_t *val0, size_t len_val0,
const uint8_t *val1, size_t len_val1,
uint8_t **merged_val, size_t *len_merged_val)
{
- assert(len_val0 == sizeof(Count));
- assert(len_val1 == sizeof(Count));
-
- const Count* c0 = reinterpret_cast<const Count*>(val0);
- const Count* c1 = reinterpret_cast<const Count*>(val1);
- unique_ptr<Count> c((Count *)malloc(sizeof(Count))); // Needs to be with malloc, per merger spec.
-
- c->white = c0->white + c1->white;
- c->draw = c0->draw + c1->draw;
- c->black = c0->black + c1->black;
- c->sum_white_elo = c0->sum_white_elo + c1->sum_white_elo;
- c->sum_black_elo = c0->sum_black_elo + c1->sum_black_elo;
- c->num_elo = c0->num_elo + c1->num_elo;
- if (c0->first_timestamp <= c1->first_timestamp) {
- c->opening_num = c0->opening_num;
- c->first_timestamp = c0->first_timestamp;
+ Count c0, c1;
+ c0.ParseFromArray(val0, len_val0);
+ c1.ParseFromArray(val1, len_val1);
+
+ Count c;
+
+ c.set_white(c0.white() + c1.white());
+ c.set_draw(c0.draw() + c1.draw());
+ c.set_black(c0.black() + c1.black());
+ c.set_sum_white_elo(c0.sum_white_elo() + c1.sum_white_elo());
+ c.set_sum_black_elo(c0.sum_black_elo() + c1.sum_black_elo());
+ c.set_num_elo(c0.num_elo() + c1.num_elo());
+ if (c0.first_timestamp() <= c1.first_timestamp()) {
+ c.set_opening_num(c0.opening_num());
+ if (c0.has_first_timestamp()) {
+ c.set_first_timestamp(c0.first_timestamp());
+ }
} else {
- c->opening_num = c1->opening_num;
- c->first_timestamp = c1->first_timestamp;
+ c.set_opening_num(c1.opening_num());
+ if (c1.has_first_timestamp()) {
+ c.set_first_timestamp(c1.first_timestamp());
+ }
}
- *merged_val = reinterpret_cast<uint8_t *>(c.release());
- *len_merged_val = sizeof(Count);
+ static string buf; // Keep allocated.
+ c.SerializeToString(&buf);
+
+ *merged_val = reinterpret_cast<uint8_t *>(malloc(buf.size()));
+ *len_merged_val = buf.size();
+ memcpy(*merged_val, buf.data(), buf.size());
}
int main(int argc, char **argv)
+++ /dev/null
-#define DUMMY_TIMESTAMP 32503680000 // 3000-01-01 00:00:00 UTC.
-
-struct Count {
- int white = 0;
- int draw = 0;
- int black = 0;
- unsigned int opening_num = 0;
- unsigned long long sum_white_elo = 0;
- unsigned long long sum_black_elo = 0;
- int num_elo = 0;
- time_t first_timestamp = DUMMY_TIMESTAMP;
-};
--- /dev/null
+message Count {
+ // Number of games.
+ optional int32 white = 1;
+ optional int32 draw = 2;
+ optional int32 black = 3;
+
+ // Opening number (32-bit hash value).
+ optional fixed32 opening_num = 4;
+
+ // Elo statistics for this position.
+ optional int64 sum_white_elo = 5;
+ optional int64 sum_black_elo = 6;
+ optional int32 num_elo = 7;
+
+ // First timestamp this position/move was seen.
+ optional int64 first_timestamp = 8 [default=32503680000];
+};