]> git.sesse.net Git - remoteglot-book/blob - binloader.cpp
Parse timestamps from the binary format. Needs some rework, though.
[remoteglot-book] / binloader.cpp
1 //#define _GLIBCXX_PARALLEL
2
3 // Usage: ./binloader IN1 IN2 IN3 ... OUT NUM_BUCKETS
4
5 #include <stdio.h>
6 #include <vector>
7 #include <mtbl.h>
8 #include <algorithm>
9 #include <utility>
10 #include <memory>
11 #include <string>
12 #include <string.h>
13 #include "count.h"
14 #include "hash.h"
15
16 using namespace std;
17
18 enum Result { WHITE = 0, DRAW, BLACK };
19 struct Element {
20         string bpfen_and_move;
21         Result result;
22         int opening_num, white_elo, black_elo;
23         time_t timestamp;
24
25         bool operator< (const Element& other) const {
26                 return bpfen_and_move < other.bpfen_and_move;
27         }
28 };
29
30 int main(int argc, char **argv)
31 {
32         int num_buckets = atoi(argv[argc - 1]);
33
34         vector<vector<Element>> elems;
35         elems.resize(num_buckets);
36
37         size_t num_elems = 0;
38         for (int i = 1; i < argc - 2; ++i) {
39                 FILE *fp = fopen(argv[i], "rb");
40                 if (fp == NULL) {
41                         perror(argv[i]);
42                         exit(1);
43                 }
44                 for ( ;; ) {
45                         int l = getc(fp);
46                         if (l == -1) {
47                                 break;
48                         }
49                 
50                         string bpfen_and_move;
51                         bpfen_and_move.resize(l);
52                         if (fread(&bpfen_and_move[0], l, 1, fp) != 1) {
53                                 perror("fread()");
54                 //              exit(1);
55                                 break;
56                         }
57
58                         int r = getc(fp);
59                         if (r == -1) {
60                                 perror("getc()");
61                                 //exit(1);
62                                 break;
63                         }
64
65                         int opening_num, white_elo, black_elo;
66                         time_t timestamp;
67                         if (fread(&white_elo, sizeof(white_elo), 1, fp) != 1) {
68                                 perror("fread()");
69                                 //exit(1);
70                                 break;
71                         }
72                         if (fread(&black_elo, sizeof(black_elo), 1, fp) != 1) {
73                                 perror("fread()");
74                                 //exit(1);
75                                 break;
76                         }
77                         if (fread(&opening_num, sizeof(opening_num), 1, fp) != 1) {
78                                 perror("fread()");
79                                 //exit(1);
80                                 break;
81                         }
82                         if (fread(&timestamp, sizeof(timestamp), 1, fp) != 1) {
83                                 perror("fread()");
84                                 //exit(1);
85                                 break;
86                         }
87
88                         int bucket = hash_key_to_bucket(bpfen_and_move.data(), bpfen_and_move.size(), num_buckets);
89                         elems[bucket].emplace_back(Element {move(bpfen_and_move), Result(r), opening_num, white_elo, black_elo, timestamp});
90                         ++num_elems;
91                 }
92                 fclose(fp);
93
94                 printf("Read %ld elems\n", num_elems);
95         }
96
97         printf("Sorting...\n");
98         for (int i = 0; i < num_buckets; ++i) {
99                 sort(elems[i].begin(), elems[i].end());
100         }
101
102         printf("Writing SSTables...\n");
103         for (int i = 0; i < num_buckets; ++i) {
104                 char filename[256];
105                 snprintf(filename, sizeof(filename), "%s.part%04d", argv[argc - 2], i);
106
107                 mtbl_writer_options* wopt = mtbl_writer_options_init();
108                 mtbl_writer_options_set_compression(wopt, MTBL_COMPRESSION_SNAPPY);
109                 mtbl_writer* mtbl = mtbl_writer_init(filename, wopt);
110                 Count c;
111                 for (size_t j = 0; j < elems[i].size(); ++j) {
112                         const Element &e = elems[i][j];
113                         if (e.result == WHITE) {
114                                 ++c.white;
115                         } else if (e.result == DRAW) {
116                                 ++c.draw;
117                         } else if (e.result == BLACK) {
118                                 ++c.black;
119                         }
120                         if (e.white_elo >= 100 && e.black_elo >= 100) {
121                                 c.sum_white_elo += e.white_elo;
122                                 c.sum_black_elo += e.black_elo;
123                                 ++c.num_elo;
124                         }
125                         if (c.first_timestamp == DUMMY_TIMESTAMP ||
126                             e.timestamp < c.first_timestamp) {
127                                 c.first_timestamp = e.timestamp;
128                                 c.opening_num = e.opening_num;
129                         }
130                         if (j == elems[i].size() - 1 || e.bpfen_and_move != elems[i][j + 1].bpfen_and_move) {
131                                 mtbl_writer_add(mtbl,
132                                         (const uint8_t *)e.bpfen_and_move.data(), e.bpfen_and_move.size(),
133                                         (const uint8_t *)&c, sizeof(c));
134                                 c = Count();
135                         }
136                 }
137                 mtbl_writer_destroy(&mtbl);
138         }
139 }