]> git.sesse.net Git - plocate/blob - bench.cpp
When failing the benchmark tests, stop printing out differences after the first ten.
[plocate] / bench.cpp
1 #include <stdio.h>
2 #include <unistd.h>
3 #include <fcntl.h>
4 #include <chrono>
5 #include <memory>
6
7 #define dprintf(...)
8 //#define dprintf(...) fprintf(stderr, __VA_ARGS__);
9
10 #include "turbopfor.h"
11 #include "vp4.h"
12 #include "db.h"
13 #include "io_uring_engine.h"
14
15 using namespace std;
16 using namespace std::chrono;
17
18 int main(void)
19 {
20         int fd = open("plocate.db", O_RDONLY);
21         if (fd == -1) {
22                 perror("plocate.db");
23                 exit(1);
24         }
25
26         Header hdr;
27         complete_pread(fd, &hdr, sizeof(hdr), /*offset=*/0);
28
29         unique_ptr<Trigram[]> ht(new Trigram[hdr.hashtable_size + hdr.extra_ht_slots + 1]);
30         complete_pread(fd, ht.get(), (hdr.hashtable_size + hdr.extra_ht_slots + 1) * sizeof(Trigram), hdr.hash_table_offset_bytes);
31
32         uint32_t longest_pl = 0;
33         vector<pair<string, unsigned>> posting_lists;
34         for (unsigned i = 0; i < hdr.hashtable_size + hdr.extra_ht_slots; ++i) {
35                 if (ht[i].num_docids == 0) {
36                         continue;
37                 }
38                 size_t len = ht[i + 1].offset - ht[i].offset;
39                 string str;
40                 str.resize(len);
41                 complete_pread(fd, &str[0], len, ht[i].offset);
42                 posting_lists.emplace_back(move(str), ht[i].num_docids);
43                 longest_pl = std::max(ht[i].num_docids, longest_pl);
44         }
45         ht.reset();
46         fprintf(stderr, "Read %zu posting lists.\n", posting_lists.size());
47
48         size_t num_errors = 0;
49         for (auto &[pl, num_docids] : posting_lists) {
50                 //fprintf(stderr, "%zu bytes, %u docids\n", pl.size(), num_docids);
51                 vector<uint32_t> out1, out2;
52                 out1.resize(num_docids + 128);
53                 out2.resize(num_docids + 128);
54                 unsigned char *pldata = reinterpret_cast<unsigned char *>(&pl[0]);
55                 p4nd1dec32(pldata, num_docids, &out1[0]);
56                 decode_pfor_delta1<128>(pldata, num_docids, &out2[0]);
57                 for (unsigned i = 0; i < num_docids; ++i) {
58                         if (out1[i] != out2[i]) {
59                                 if (++num_errors < 10) {
60                                         for (unsigned j = 0; j < num_docids; ++j) {
61                                                 fprintf(stderr, "%3u: reference=%u ours=%u  (diff=%d)\n", j, out1[j], out2[j], out1[j] - out2[j]);
62                                         }
63                                 }
64                                 break;
65                         }
66                 }
67         }
68         fprintf(stderr, "%zu/%zu posting lists had errors in decoding.\n", num_errors, posting_lists.size());
69
70         vector<uint32_t> dummy;
71         dummy.resize(longest_pl + 128);
72         steady_clock::time_point start = steady_clock::now();
73         for (auto &[pl, num_docids] : posting_lists) {
74                 unsigned char *pldata = reinterpret_cast<unsigned char *>(&pl[0]);
75                 p4nd1dec32(pldata, num_docids, &dummy[0]);
76         }
77         steady_clock::time_point end = steady_clock::now();
78         double reference_sec = duration<double>(end - start).count();
79         fprintf(stderr, "Decoding with reference implementation: %.1f ms\n", 1e3 * reference_sec);
80
81         start = steady_clock::now();
82         for (auto &[pl, num_docids] : posting_lists) {
83                 unsigned char *pldata = reinterpret_cast<unsigned char *>(&pl[0]);
84                 decode_pfor_delta1<128>(pldata, num_docids, &dummy[0]);
85         }
86         end = steady_clock::now();
87         double own_sec = duration<double>(end - start).count();
88         fprintf(stderr, "Decoding with own implementation: %.1f ms (%.2f%% speed)\n", 1e3 * own_sec, 100.0 * reference_sec / own_sec);
89 }