X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=binloader.cpp;h=6467cf8880fb5ca9f988b4baabcf512dae15fa99;hb=04d675c5a4a867b2c15e2ef64d5179353fd1489e;hp=2ed60cbe78250ff34eba9f7c358968f90c4ddb97;hpb=756a91677c1565b1bd555722ea795f911106104f;p=remoteglot-book diff --git a/binloader.cpp b/binloader.cpp index 2ed60cb..6467cf8 100644 --- a/binloader.cpp +++ b/binloader.cpp @@ -12,18 +12,36 @@ #include #include #include "count.pb.h" -#include "arena.h" #include "hash.h" #define DUMMY_TIMESTAMP 32503680000 using namespace std; -Arena arena; +static inline int memcmp_different_len(const void *s1, size_t n1, const void *s2, size_t n2) +{ + size_t shared_len = min(n1, n2); + if (shared_len >= 8) { + uint64_t a1 = *(const uint64_t *)s1; + uint64_t a2 = *(const uint64_t *)s2; + if (a1 != a2) { + a1 = __builtin_bswap64(a1); + a2 = __builtin_bswap64(a2); + return (a1 < a2) ? -1 : 1; + } + } + + int s = memcmp(s1, s2, shared_len); + if (s != 0) { + return s; + } + + return n2 - n1; +} enum Result { WHITE = 0, DRAW, BLACK }; struct Element { - char *bpfen; + char bpfen[32]; // includes prev_board_hash int bpfen_len; char move[8]; // Na1xc3+ Result result; @@ -31,23 +49,16 @@ struct Element { int file_num; time_t timestamp; long start_position; + int computer; bool operator< (const Element& other) const { - int shared_len = min(bpfen_len, other.bpfen_len); - int s = memcmp(bpfen, other.bpfen, shared_len); - if (s < 0) { - return true; - } else if (s > 0) { - return false; - } else { - return bpfen_len < other.bpfen_len; - } + int s = memcmp_different_len(bpfen, bpfen_len, other.bpfen, other.bpfen_len); + return s < 0; } }; struct ShardData { vector elems; - unique_ptr arena; // Used to allocate bpfen. int num_written_subshards = 0; }; @@ -75,6 +86,7 @@ void write_subshard(const char *basename, ShardData* shard, int bucket) } else if (e.result == BLACK) { c.set_black(c.black() + 1); } + c.set_computer(c.computer() + e.computer); if (e.white_elo >= 100 && e.black_elo >= 100) { c.set_sum_white_elo(c.sum_white_elo() + e.white_elo); c.set_sum_black_elo(c.sum_black_elo() + e.black_elo); @@ -88,7 +100,7 @@ void write_subshard(const char *basename, ShardData* shard, int bucket) c.set_pgn_file_num(e.file_num); c.set_pgn_start_position(e.start_position); } - if (!moves.count(e.move)) { + if (strlen(e.move) > 0 && !moves.count(e.move)) { moves.insert(e.move); c.add_move(e.move); } @@ -96,9 +108,10 @@ void write_subshard(const char *basename, ShardData* shard, int bucket) e.bpfen_len != shard->elems[i + 1].bpfen_len || memcmp(e.bpfen, shard->elems[i + 1].bpfen, e.bpfen_len) != 0) { c.SerializeToString(&buf); - mtbl_writer_add(mtbl, + mtbl_res res = mtbl_writer_add(mtbl, (const uint8_t *)e.bpfen, e.bpfen_len, (const uint8_t *)buf.data(), buf.size()); + assert(res == mtbl_res_success); c = Count(); moves.clear(); } @@ -106,7 +119,6 @@ void write_subshard(const char *basename, ShardData* shard, int bucket) mtbl_writer_destroy(&mtbl); shard->elems.clear(); - shard->arena.reset(new Arena); } int main(int argc, char **argv) @@ -119,7 +131,6 @@ int main(int argc, char **argv) for (int i = 0; i < num_buckets; ++i) { shards[i].elems.reserve(num_pos_per_subshard); - shards[i].arena.reset(new Arena); } size_t num_elems = 0; @@ -141,83 +152,84 @@ int main(int argc, char **argv) if (bpfen_len == -1) { break; } - if (bpfen_len >= int(sizeof(bpfen))) { + if (bpfen_len <= 0) { + fprintf(stderr, "Underlong BPFEN (%d bytes)\n", bpfen_len); + exit(1); + } + if (bpfen_len >= 32) { fprintf(stderr, "Overlong BPFEN (%d bytes)\n", bpfen_len); - // exit(1); - break; + exit(1); } if (fread(bpfen, bpfen_len, 1, fp) != 1) { - perror("fread()"); - // exit(1); - break; + perror("fread(bpfen)"); + exit(1); } int r = getc(fp); if (r == -1) { perror("getc()"); - //exit(1); - break; + exit(1); } int opening_num, white_elo, black_elo, file_num; time_t timestamp; long start_position; if (fread(&white_elo, sizeof(white_elo), 1, fp) != 1) { - perror("fread()"); - //exit(1); - break; + perror("fread(white_elo)"); + exit(1); } if (fread(&black_elo, sizeof(black_elo), 1, fp) != 1) { - perror("fread()"); - //exit(1); - break; + perror("fread(black_elo)"); + exit(1); } if (fread(&opening_num, sizeof(opening_num), 1, fp) != 1) { - perror("fread()"); - //exit(1); - break; + perror("fread(opening_num)"); + exit(1); } if (fread(×tamp, sizeof(timestamp), 1, fp) != 1) { - perror("fread()"); - //exit(1); - break; + perror("fread(timestamp)"); + exit(1); } if (fread(&file_num, sizeof(file_num), 1, fp) != 1) { - perror("fread()"); - //exit(1); - break; + perror("fread(file_num)"); + exit(1); } if (fread(&start_position, sizeof(start_position), 1, fp) != 1) { - perror("fread()"); - //exit(1); - break; + perror("fread(start_position)"); + exit(1); } + int computer = getc(fp); + if (computer == -1) { + perror("getc(computer)"); + exit(1); + } char move[8]; int l = getc(fp); if (l == -1) { + perror("getc(move_length)"); break; } if (l >= int(sizeof(move))) { fprintf(stderr, "Overlong move (%d bytes)\n", l); - // exit(1); - break; + exit(1); } - if (fread(&move[0], l, 1, fp) != 1) { - perror("fread()"); - // exit(1); - break; + if (l == 0) { + move[0] = 0; + } else if (fread(&move[0], l, 1, fp) != 1) { + perror("fread(move)"); + exit(1); } move[l] = 0; int bucket = hash_key_to_bucket(bpfen, bpfen_len, num_buckets); Element e; - e.bpfen = shards[bucket].arena->alloc(bpfen_len); memcpy(e.bpfen, bpfen, bpfen_len); e.bpfen_len = bpfen_len; strcpy(e.move, move); e.result = Result(r); + e.computer = computer; e.opening_num = opening_num; e.white_elo = white_elo; e.black_elo = black_elo;