The trigram distribution is long-tail, so allocating 128 docids
up-front was seemingly a waste. Saves ~20% more RAM in plocate-build.
void write_header(uint32_t docid);
void append_block();
void write_header(uint32_t docid);
void append_block();
- uint32_t pending_docids[128];
- unsigned num_pending_docids = 0;
+ vector<uint32_t> pending_docids;
uint32_t last_block_end;
};
uint32_t last_block_end;
};
void PostingListBuilder::add_docid(uint32_t docid)
{
// Deduplicate against the last inserted value, if any.
void PostingListBuilder::add_docid(uint32_t docid)
{
// Deduplicate against the last inserted value, if any.
- if (num_pending_docids == 0) {
+ if (pending_docids.empty()) {
if (encoded.empty()) {
// Very first docid.
write_header(docid);
if (encoded.empty()) {
// Very first docid.
write_header(docid);
- if (docid == pending_docids[num_pending_docids - 1]) {
+ if (docid == pending_docids.back()) {
- pending_docids[num_pending_docids++] = docid;
- if (num_pending_docids == 128) {
+ pending_docids.push_back(docid);
+ if (pending_docids.size() == 128) {
- num_pending_docids = 0;
+ pending_docids.clear();
last_block_end = docid;
}
++num_docids;
last_block_end = docid;
}
++num_docids;
void PostingListBuilder::finish()
{
void PostingListBuilder::finish()
{
- if (num_pending_docids == 0) {
+ if (pending_docids.empty()) {
// No interleaving for partial blocks.
unsigned char buf[P4NENC_BOUND(128)];
// No interleaving for partial blocks.
unsigned char buf[P4NENC_BOUND(128)];
- unsigned char *end = p4d1enc32(pending_docids, num_pending_docids, buf, last_block_end);
+ unsigned char *end = p4d1enc32(pending_docids.data(), pending_docids.size(), buf, last_block_end);
encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
}
void PostingListBuilder::append_block()
{
unsigned char buf[P4NENC_BOUND(128)];
encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
}
void PostingListBuilder::append_block()
{
unsigned char buf[P4NENC_BOUND(128)];
- assert(num_pending_docids == 128);
- unsigned char *end = p4d1enc128v32(pending_docids, 128, buf, last_block_end);
+ assert(pending_docids.size() == 128);
+ unsigned char *end = p4d1enc128v32(pending_docids.data(), 128, buf, last_block_end);
encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
}
encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
}