#include "db.h"
+#include "turbopfor-encode.h"
#include <algorithm>
-#include <arpa/inet.h>
#include <assert.h>
#include <chrono>
-#include <endian.h>
-#include <fcntl.h>
+#include <iosfwd>
#include <math.h>
#include <memory>
+#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <string>
+#include <string_view>
#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <unordered_map>
+#include <utility>
#include <vector>
#include <zstd.h>
-#include "turbopfor-encode.h"
-
#define P4NENC_BOUND(n) ((n + 127) / 128 + (n + 32) * sizeof(uint32_t))
#define dprintf(...)
//#define dprintf(...) fprintf(stderr, __VA_ARGS__);
class Corpus {
public:
Corpus(FILE *outfp, size_t block_size)
- : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), block_size(block_size) {}
+ : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), block_size(block_size)
+ {
+ fill(invindex.get(), invindex.get() + NUM_TRIGRAMS, nullptr);
+ }
+ ~Corpus()
+ {
+ for (unsigned i = 0; i < NUM_TRIGRAMS; ++i) {
+ delete invindex[i];
+ }
+ }
+
void add_file(string filename);
void flush_block();
hdr.version = -1; // Mark as broken.
hdr.hashtable_size = 0; // Not known yet.
hdr.extra_ht_slots = num_overflow_slots;
+ hdr.num_docids = 0;
hdr.hash_table_offset_bytes = -1; // We don't know these offsets yet.
hdr.filename_index_offset_bytes = -1;
fwrite(&hdr, sizeof(hdr), 1, outfp);