+class PostingListBuilder {
+public:
+ void add_docid(uint32_t docid);
+ void finish();
+
+ string encoded;
+ size_t num_docids = 0;
+
+private:
+ void write_header(uint32_t docid);
+ void append_block();
+
+ uint32_t pending_docids[128];
+ unsigned num_pending_docids = 0;
+
+ uint32_t last_block_end;
+};
+
+void PostingListBuilder::add_docid(uint32_t docid)
+{
+ // Deduplicate against the last inserted value, if any.
+ if (num_pending_docids == 0) {
+ if (encoded.empty()) {
+ // Very first docid.
+ write_header(docid);
+ ++num_docids;
+ last_block_end = docid;
+ return;
+ } else if (docid == last_block_end) {
+ return;
+ }
+ } else {
+ if (docid == pending_docids[num_pending_docids - 1]) {
+ return;
+ }
+ }
+
+ pending_docids[num_pending_docids++] = docid;
+ if (num_pending_docids == 128) {
+ append_block();
+ num_pending_docids = 0;
+ last_block_end = docid;
+ }
+ ++num_docids;
+}
+
+void PostingListBuilder::finish()
+{
+ if (num_pending_docids == 0) {
+ return;
+ }
+
+ assert(!encoded.empty()); // write_header() should already have run.
+
+ // No interleaving for partial blocks.
+ unsigned char buf[P4NENC_BOUND(128)];
+ unsigned char *end = p4d1enc32(pending_docids, num_pending_docids, buf, last_block_end);
+ encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
+}
+
+void PostingListBuilder::append_block()
+{
+ unsigned char buf[P4NENC_BOUND(128)];
+ assert(num_pending_docids == 128);
+ unsigned char *end = p4d1enc128v32(pending_docids, 128, buf, last_block_end);
+ encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
+}
+
+void PostingListBuilder::write_header(uint32_t docid)
+{
+ unsigned char buf[P4NENC_BOUND(1)];
+ size_t bytes = p4nd1enc128v32(&docid, 1, buf);
+ encoded.append(reinterpret_cast<char *>(buf), bytes);
+}
+