]> git.sesse.net Git - plocate/blobdiff - plocate-build.cpp
Implement support for larger basevals in TurboPFor.
[plocate] / plocate-build.cpp
index e273e6ca79e6d369e959ba1c3e0df4b264e25ce4..023aedb53339da12b0e5487f662d5193fca6943f 100644 (file)
@@ -2,20 +2,19 @@
 #include "turbopfor-encode.h"
 
 #include <algorithm>
-#include <arpa/inet.h>
 #include <assert.h>
 #include <chrono>
-#include <endian.h>
-#include <fcntl.h>
+#include <iosfwd>
 #include <math.h>
 #include <memory>
+#include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <string>
+#include <string_view>
 #include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <unordered_map>
+#include <utility>
 #include <vector>
 #include <zstd.h>
 
@@ -144,7 +143,17 @@ void PostingListBuilder::write_header(uint32_t docid)
 class Corpus {
 public:
        Corpus(FILE *outfp, size_t block_size)
-               : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), block_size(block_size) {}
+               : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), block_size(block_size)
+       {
+               fill(invindex.get(), invindex.get() + NUM_TRIGRAMS, nullptr);
+       }
+       ~Corpus()
+       {
+               for (unsigned i = 0; i < NUM_TRIGRAMS; ++i) {
+                       delete invindex[i];
+               }
+       }
+
        void add_file(string filename);
        void flush_block();
 
@@ -364,6 +373,7 @@ void do_build(const char *infile, const char *outfile, int block_size)
        hdr.version = -1;  // Mark as broken.
        hdr.hashtable_size = 0;  // Not known yet.
        hdr.extra_ht_slots = num_overflow_slots;
+       hdr.num_docids = 0;
        hdr.hash_table_offset_bytes = -1;  // We don't know these offsets yet.
        hdr.filename_index_offset_bytes = -1;
        fwrite(&hdr, sizeof(hdr), 1, outfp);