git.sesse.net Git - plocate/blob - bench.cpp

   1 #include <chrono>
   2 #include <fcntl.h>
   3 #include <memory>
   4 #include <stdio.h>
   5 #include <unistd.h>
   6
   7 #define dprintf(...)
   8 //#define dprintf(...) fprintf(stderr, __VA_ARGS__);
   9
  10 #include "db.h"
  11 #include "io_uring_engine.h"
  12 #include "turbopfor.h"
  13 #include "vp4.h"
  14
  15 using namespace std;
  16 using namespace std::chrono;
  17
  18 int main(void)
  19 {
  20         int fd = open("plocate.db", O_RDONLY);
  21         if (fd == -1) {
  22                 perror("plocate.db");
  23                 exit(1);
  24         }
  25
  26         Header hdr;
  27         complete_pread(fd, &hdr, sizeof(hdr), /*offset=*/0);
  28
  29         unique_ptr<Trigram[]> ht(new Trigram[hdr.hashtable_size + hdr.extra_ht_slots + 1]);
  30         complete_pread(fd, ht.get(), (hdr.hashtable_size + hdr.extra_ht_slots + 1) * sizeof(Trigram), hdr.hash_table_offset_bytes);
  31
  32         size_t posting_list_bytes = 0, total_elements = 0;
  33         uint32_t longest_pl = 0;
  34         vector<pair<string, unsigned>> posting_lists;
  35         for (unsigned i = 0; i < hdr.hashtable_size + hdr.extra_ht_slots; ++i) {
  36                 if (ht[i].num_docids == 0) {
  37                         continue;
  38                 }
  39                 size_t len = ht[i + 1].offset - ht[i].offset;
  40                 string str;
  41                 str.resize(len);
  42                 complete_pread(fd, &str[0], len, ht[i].offset);
  43                 posting_lists.emplace_back(move(str), ht[i].num_docids);
  44                 longest_pl = std::max(ht[i].num_docids, longest_pl);
  45                 posting_list_bytes += len;
  46                 total_elements += ht[i].num_docids;
  47         }
  48         ht.reset();
  49         fprintf(stderr, "Read %zu posting lists.\n", posting_lists.size());
  50
  51         size_t num_errors = 0;
  52         for (auto &[pl, num_docids] : posting_lists) {
  53                 //fprintf(stderr, "%zu bytes, %u docids\n", pl.size(), num_docids);
  54                 vector<uint32_t> out1, out2;
  55                 out1.resize(num_docids + 128);
  56                 out2.resize(num_docids + 128);
  57                 unsigned char *pldata = reinterpret_cast<unsigned char *>(&pl[0]);
  58                 p4nd1dec128v32(pldata, num_docids, &out1[0]);
  59                 decode_pfor_delta1<128>(pldata, num_docids, /*interleaved=*/true, &out2[0]);
  60                 for (unsigned i = 0; i < num_docids; ++i) {
  61                         if (out1[i] != out2[i]) {
  62                                 if (++num_errors < 10) {
  63                                         for (unsigned j = 0; j < num_docids; ++j) {
  64                                                 fprintf(stderr, "%3u: reference=%u ours=%u  (diff=%d)\n", j, out1[j], out2[j], out1[j] - out2[j]);
  65                                         }
  66                                 }
  67                                 break;
  68                         }
  69                 }
  70         }
  71         fprintf(stderr, "%zu/%zu posting lists had errors in decoding.\n", num_errors, posting_lists.size());
  72
  73         vector<uint32_t> dummy;
  74         dummy.resize(longest_pl + 128);
  75         steady_clock::time_point start = steady_clock::now();
  76         for (auto &[pl, num_docids] : posting_lists) {
  77                 unsigned char *pldata = reinterpret_cast<unsigned char *>(&pl[0]);
  78                 p4nd1dec128v32(pldata, num_docids, &dummy[0]);
  79         }
  80         steady_clock::time_point end = steady_clock::now();
  81         double reference_sec = duration<double>(end - start).count();
  82         fprintf(stderr, "Decoding with reference implementation: %.1f ms\n", 1e3 * reference_sec);
  83
  84         start = steady_clock::now();
  85         for (auto &[pl, num_docids] : posting_lists) {
  86                 unsigned char *pldata = reinterpret_cast<unsigned char *>(&pl[0]);
  87                 decode_pfor_delta1<128>(pldata, num_docids, /*interleaved=*/true, &dummy[0]);
  88         }
  89         end = steady_clock::now();
  90         double own_sec = duration<double>(end - start).count();
  91         fprintf(stderr, "Decoding with own implementation: %.3f ms (%.2f%% speed)\n", 1e3 * own_sec, 100.0 * reference_sec / own_sec);
  92
  93         // Three numbers giving rules of thumb for judging our own implementation:
  94         //
  95         // - Compressed speed is easy to compare to disk I/O, to see the relative importance
  96         // - Uncompressed speed is easy to compare to intersection speeds and memory bandwidth
  97         //   (also very roughly comparable to the benchmark numbers in the TurboPFor README)
  98         // - ns/element gives an absolute measure for plocate (e.g. if we can decompress at
  99         //   1 ns/element, a 10k-element posting list goes by in 0.01 ms, which is way beyond
 100         //   instantaneous in practice).
 101         fprintf(stderr, "%.1f MB/sec (compressed), %.1f MB/sec (uncompressed), %.1f ns/element\n", posting_list_bytes / own_sec / 1048576.0,
 102                 (total_elements * sizeof(uint32_t)) / own_sec / 1048576.0, 1e9 * own_sec / total_elements);
 103 }