X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=binloader.cpp;h=6472f1009f73a0f0348e43ddd1656a4df86adfca;hb=f7f8ba0f6969452da0b1d4bc6f4c8490006f7dbe;hp=e472364e47b234e8a3a0ed02f03c47c8d47a7582;hpb=8eb9465efeb8813c93bfab2a8f385549f45bd827;p=remoteglot-book diff --git a/binloader.cpp b/binloader.cpp index e472364..6472f10 100644 --- a/binloader.cpp +++ b/binloader.cpp @@ -9,21 +9,25 @@ #include #include #include +#include #include -#include "count.h" +#include "count.pb.h" #include "hash.h" +#define DUMMY_TIMESTAMP 32503680000 + using namespace std; enum Result { WHITE = 0, DRAW, BLACK }; struct Element { - string bpfen_and_move; + string bpfen; + string move; Result result; int opening_num, white_elo, black_elo; time_t timestamp; bool operator< (const Element& other) const { - return bpfen_and_move < other.bpfen_and_move; + return bpfen < other.bpfen; } }; @@ -47,9 +51,9 @@ int main(int argc, char **argv) break; } - string bpfen_and_move; - bpfen_and_move.resize(l); - if (fread(&bpfen_and_move[0], l, 1, fp) != 1) { + string bpfen; + bpfen.resize(l); + if (fread(&bpfen[0], l, 1, fp) != 1) { perror("fread()"); // exit(1); break; @@ -85,8 +89,20 @@ int main(int argc, char **argv) break; } - int bucket = hash_key_to_bucket(bpfen_and_move.data(), bpfen_and_move.size(), num_buckets); - elems[bucket].emplace_back(Element {move(bpfen_and_move), Result(r), opening_num, white_elo, black_elo, timestamp}); + l = getc(fp); + if (l == -1) { + break; + } + string move; + move.resize(l); + if (fread(&move[0], l, 1, fp) != 1) { + perror("fread()"); + // exit(1); + break; + } + + int bucket = hash_key_to_bucket(bpfen.data(), bpfen.size(), num_buckets); + elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp}); ++num_elems; } fclose(fp); @@ -100,6 +116,7 @@ int main(int argc, char **argv) } printf("Writing SSTables...\n"); + string buf; // Keep allocated. for (int i = 0; i < num_buckets; ++i) { char filename[256]; snprintf(filename, sizeof(filename), "%s.part%04d", argv[argc - 2], i); @@ -108,30 +125,38 @@ int main(int argc, char **argv) mtbl_writer_options_set_compression(wopt, MTBL_COMPRESSION_SNAPPY); mtbl_writer* mtbl = mtbl_writer_init(filename, wopt); Count c; + unordered_set moves; for (size_t j = 0; j < elems[i].size(); ++j) { const Element &e = elems[i][j]; if (e.result == WHITE) { - ++c.white; + c.set_white(c.white() + 1); } else if (e.result == DRAW) { - ++c.draw; + c.set_draw(c.draw() + 1); } else if (e.result == BLACK) { - ++c.black; + c.set_black(c.black() + 1); } if (e.white_elo >= 100 && e.black_elo >= 100) { - c.sum_white_elo += e.white_elo; - c.sum_black_elo += e.black_elo; - ++c.num_elo; + c.set_sum_white_elo(c.sum_white_elo() + e.white_elo); + c.set_sum_black_elo(c.sum_black_elo() + e.black_elo); + c.set_num_elo(c.num_elo() + 1); + } + if (!c.has_first_timestamp() || e.timestamp < c.first_timestamp()) { + if (e.timestamp != DUMMY_TIMESTAMP) { + c.set_first_timestamp(e.timestamp); + } + c.set_opening_num(e.opening_num); } - if (c.first_timestamp == DUMMY_TIMESTAMP || - e.timestamp < c.first_timestamp) { - c.first_timestamp = e.timestamp; - c.opening_num = e.opening_num; + if (!moves.count(e.move)) { + moves.insert(e.move); + c.add_move(e.move); } - if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) { + if (j == elems[i].size() - 1 || e.bpfen != elems[i][j + 1].bpfen) { + c.SerializeToString(&buf); mtbl_writer_add(mtbl, - (const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(), - (const uint8_t *)&c, sizeof(c)); + (const uint8_t *)e.bpfen.data(), e.bpfen.size(), + (const uint8_t *)buf.data(), buf.size()); c = Count(); + moves.clear(); } } mtbl_writer_destroy(&mtbl);