]> git.sesse.net Git - remoteglot-book/blob - binloader.cpp
Partition the SSTable; somewhat less efficient space-wise, it seems, but we avoid...
[remoteglot-book] / binloader.cpp
1 //#define _GLIBCXX_PARALLEL
2
3 // Usage: ./binloader IN1 IN2 IN3 ... OUT NUM_BUCKETS
4
5 #include <stdio.h>
6 #include <vector>
7 #include <mtbl.h>
8 #include <algorithm>
9 #include <utility>
10 #include <memory>
11 #include <string>
12 #include <string.h>
13 #include "count.h"
14 #include "hash.h"
15
16 using namespace std;
17
18 enum Result { WHITE = 0, DRAW, BLACK };
19 struct Element {
20         string bpfen_and_move;
21         Result result;
22         int opening_num, white_elo, black_elo;
23
24         bool operator< (const Element& other) const {
25                 return bpfen_and_move < other.bpfen_and_move;
26         }
27 };
28
29 int main(int argc, char **argv)
30 {
31         int num_buckets = atoi(argv[argc - 1]);
32
33         vector<vector<Element>> elems;
34         elems.resize(num_buckets);
35
36         size_t num_elems = 0;
37         for (int i = 1; i < argc - 2; ++i) {
38                 FILE *fp = fopen(argv[i], "rb");
39                 if (fp == NULL) {
40                         perror(argv[i]);
41                         exit(1);
42                 }
43                 for ( ;; ) {
44                         int l = getc(fp);
45                         if (l == -1) {
46                                 break;
47                         }
48                 
49                         string bpfen_and_move;
50                         bpfen_and_move.resize(l);
51                         if (fread(&bpfen_and_move[0], l, 1, fp) != 1) {
52                                 perror("fread()");
53                 //              exit(1);
54                                 break;
55                         }
56
57                         int r = getc(fp);
58                         if (r == -1) {
59                                 perror("getc()");
60                                 //exit(1);
61                                 break;
62                         }
63
64                         int opening_num, white_elo, black_elo;
65                         if (fread(&white_elo, sizeof(white_elo), 1, fp) != 1) {
66                                 perror("fread()");
67                                 //exit(1);
68                                 break;
69                         }
70                         if (fread(&black_elo, sizeof(black_elo), 1, fp) != 1) {
71                                 perror("fread()");
72                                 //exit(1);
73                                 break;
74                         }
75                         if (fread(&opening_num, sizeof(opening_num), 1, fp) != 1) {
76                                 perror("fread()");
77                                 //exit(1);
78                                 break;
79                         }
80
81                         int bucket = hash_key_to_bucket(bpfen_and_move.data(), bpfen_and_move.size(), num_buckets);
82                         elems[bucket].emplace_back(Element {move(bpfen_and_move), Result(r), opening_num, white_elo, black_elo});
83                         ++num_elems;
84                 }
85                 fclose(fp);
86
87                 printf("Read %ld elems\n", num_elems);
88         }
89
90         printf("Sorting...\n");
91         for (int i = 0; i < num_buckets; ++i) {
92                 sort(elems[i].begin(), elems[i].end());
93         }
94
95         printf("Writing SSTables...\n");
96         for (int i = 0; i < num_buckets; ++i) {
97                 char filename[256];
98                 snprintf(filename, sizeof(filename), "%s.part%04d", argv[argc - 2], i);
99
100                 mtbl_writer_options* wopt = mtbl_writer_options_init();
101                 mtbl_writer_options_set_compression(wopt, MTBL_COMPRESSION_SNAPPY);
102                 mtbl_writer* mtbl = mtbl_writer_init(filename, wopt);
103                 Count c;
104                 for (size_t j = 0; j < elems[i].size(); ++j) {
105                         const Element &e = elems[i][j];
106                         if (e.result == WHITE) {
107                                 ++c.white;
108                         } else if (e.result == DRAW) {
109                                 ++c.draw;
110                         } else if (e.result == BLACK) {
111                                 ++c.black;
112                         }
113                         c.opening_num = e.opening_num;
114                         if (e.white_elo >= 100 && e.black_elo >= 100) {
115                                 c.sum_white_elo += e.white_elo;
116                                 c.sum_black_elo += e.black_elo;
117                                 ++c.num_elo;
118                         }
119                         if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) {
120                                 mtbl_writer_add(mtbl,
121                                         (const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(),
122                                         (const uint8_t *)&c, sizeof(c));
123                                 c = Count();
124                         }
125                 }
126                 mtbl_writer_destroy(&mtbl);
127         }
128 }