+void do_build(const char *infile, const char *outfile, int block_size, bool plaintext)
+{
+ FILE *infp = fopen(infile, "rb");
+ if (infp == nullptr) {
+ perror(infile);
+ exit(1);
+ }
+
+ // Train the dictionary by sampling real blocks.
+ // The documentation for ZDICT_trainFromBuffer() claims that a reasonable
+ // dictionary size is ~100 kB, but 1 kB seems to actually compress better for us,
+ // and decompress just as fast.
+ DictionaryBuilder builder(/*blocks_to_keep=*/1000, block_size);
+ if (plaintext) {
+ read_plaintext(infp, &builder);
+ } else {
+ read_mlocate(infp, &builder);
+ }
+ string dictionary = builder.train(1024);
+
+ DatabaseBuilder db(outfile, block_size, dictionary);
+ Corpus *corpus = db.start_corpus();
+ if (plaintext) {
+ read_plaintext(infp, corpus);
+ } else {
+ read_mlocate(infp, corpus);
+ }
+ fclose(infp);
+
+ dprintf("Read %zu files from %s\n", corpus->num_files, infile);
+ db.finish_corpus();
+}
+