- string buf;
- size_t bytes_used = 0;
- for (auto &[trigram, docids] : invindex) {
- auto last = unique(docids.begin(), docids.end());
- docids.erase(last, docids.end());
- longest_posting_list = max(longest_posting_list, docids.size());
- trigrams += docids.size();
-
- size_t bytes_needed = P4NENC_BOUND(docids.size());
- if (buf.size() < bytes_needed) buf.resize(bytes_needed);
- size_t bytes = p4nd1enc128v32(&docids[0], docids.size(), reinterpret_cast<unsigned char *>(&buf[0]));
- pl[trigram] = string(buf.data(), bytes);
- bytes_used += bytes;
- }
- dprintf("%zu files, %zu different trigrams, %zu entries, avg len %.2f, longest %zu\n",
- files.size(), invindex.size(), trigrams, double(trigrams) / invindex.size(), longest_posting_list);
-
- dprintf("%zu bytes used for posting lists (%.2f bits/entry)\n", bytes_used, 8 * bytes_used / double(trigrams));
- //steady_clock::time_point end = steady_clock::now();
- dprintf("Building posting lists took %.1f ms.\n\n", 1e3 * duration<float>(end - start).count());