- // Train the dictionary by sampling real blocks.
- // The documentation for ZDICT_trainFromBuffer() claims that a reasonable
- // dictionary size is ~100 kB, but 1 kB seems to actually compress better for us,
- // and decompress just as fast.
- DictionaryBuilder builder(/*blocks_to_keep=*/1000, block_size);
- read_mlocate(infile, &builder);
- string dictionary = builder.train(1024);
- ZSTD_CDict *cdict = ZSTD_createCDict(dictionary.data(), dictionary.size(), /*level=*/6);
-
- hdr.zstd_dictionary_offset_bytes = ftell(outfp);
- fwrite(dictionary.data(), dictionary.size(), 1, outfp);
- hdr.zstd_dictionary_length_bytes = dictionary.size();
-
- Corpus corpus(outfp, block_size, cdict);
- read_mlocate(infile, &corpus);
- if (false) { // To read a plain text file.
- FILE *fp = fopen(infile, "r");
- while (!feof(fp)) {
- char buf[1024];
- if (fgets(buf, 1024, fp) == nullptr || feof(fp)) {
- break;
- }
- string s(buf);
- if (s.back() == '\n')
- s.pop_back();
- corpus.add_file(move(s));
- }
- fclose(fp);
+ if (dictionary.empty()) {
+ hdr.zstd_dictionary_offset_bytes = 0;
+ hdr.zstd_dictionary_length_bytes = 0;
+ } else {
+ hdr.zstd_dictionary_offset_bytes = ftell(outfp);
+ fwrite(dictionary.data(), dictionary.size(), 1, outfp);
+ hdr.zstd_dictionary_length_bytes = dictionary.size();
+ cdict = ZSTD_createCDict(dictionary.data(), dictionary.size(), /*level=*/6);