+class Corpus {
+public:
+ Corpus(int fd);
+ ~Corpus();
+ const Trigram *find_trigram(uint32_t trgm) const;
+ const unsigned char *get_compressed_posting_list(const Trigram *trigram) const;
+ string_view get_compressed_filename_block(uint32_t docid) const;
+
+private:
+ const int fd;
+ off_t len;
+ const char *data;
+ const uint64_t *filename_offsets;
+ const Trigram *trgm_begin, *trgm_end;
+};
+
+Corpus::Corpus(int fd)
+ : fd(fd)
+{
+ len = lseek(fd, 0, SEEK_END);
+ if (len == -1) {
+ perror("lseek");
+ exit(1);
+ }
+ data = (char *)mmap(nullptr, len, PROT_READ, MAP_SHARED, fd, /*offset=*/0);
+ if (data == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ uint64_t num_trigrams = *(const uint64_t *)data;
+ uint64_t filename_index_offset = *(const uint64_t *)(data + sizeof(uint64_t));
+ filename_offsets = (const uint64_t *)(data + filename_index_offset);
+
+ trgm_begin = (Trigram *)(data + sizeof(uint64_t) * 2);
+ trgm_end = trgm_begin + num_trigrams;
+}
+
+Corpus::~Corpus()
+{
+ munmap((void *)data, len);
+ close(fd);
+}
+
+const Trigram *Corpus::find_trigram(uint32_t trgm) const
+{
+ const Trigram *trgmptr = lower_bound(trgm_begin, trgm_end, trgm, [](const Trigram &trgm, uint32_t t) {
+ return trgm.trgm < t;
+ });
+ if (trgmptr == trgm_end || trgmptr->trgm != trgm) {
+ return nullptr;
+ }
+ return trgmptr;
+}
+
+const unsigned char *Corpus::get_compressed_posting_list(const Trigram *trgmptr) const
+{
+ return reinterpret_cast<const unsigned char *>(data + trgmptr->offset);
+}
+
+string_view Corpus::get_compressed_filename_block(uint32_t docid) const