1 #include "database-builder.h"
24 using namespace std::chrono;
26 bool use_debug = false;
29 DBE_NORMAL = 0, /* A non-directory file */
30 DBE_DIRECTORY = 1, /* A directory */
31 DBE_END = 2 /* End of directory contents; contains no name */
39 uint8_t check_visibility;
50 string read_cstr(FILE *fp)
66 void handle_directory(FILE *fp, DatabaseReceiver *receiver)
69 if (fread(&dummy, sizeof(dummy), 1, fp) != 1) {
77 string dir_path = read_cstr(fp);
78 if (dir_path == "/") {
84 if (type == DBE_NORMAL) {
85 string filename = read_cstr(fp);
86 receiver->add_file(dir_path + "/" + filename);
87 } else if (type == DBE_DIRECTORY) {
88 string dirname = read_cstr(fp);
89 receiver->add_file(dir_path + "/" + dirname);
91 return; // Probably end.
96 void read_plaintext(FILE *fp, DatabaseReceiver *receiver)
98 if (fseek(fp, 0, SEEK_SET) != 0) {
105 if (fgets(buf, sizeof(buf), fp) == nullptr) {
110 while (s.back() != '\n' && !feof(fp)) {
111 // The string was longer than the buffer, so read again.
112 if (fgets(buf, sizeof(buf), fp) == nullptr) {
117 if (!s.empty() && s.back() == '\n')
119 receiver->add_file(move(s));
123 void read_mlocate(FILE *fp, DatabaseReceiver *receiver)
125 if (fseek(fp, 0, SEEK_SET) != 0) {
131 if (fread(&hdr, sizeof(hdr), 1, fp) != 1) {
132 perror("short read");
136 // TODO: Care about the base path.
137 string path = read_cstr(fp);
139 if (fseek(fp, ntohl(hdr.conf_size), SEEK_CUR) != 0) {
140 perror("skip conf block");
145 handle_directory(fp, receiver);
149 void do_build(const char *infile, const char *outfile, int block_size, bool plaintext)
151 FILE *infp = fopen(infile, "rb");
152 if (infp == nullptr) {
157 // Train the dictionary by sampling real blocks.
158 // The documentation for ZDICT_trainFromBuffer() claims that a reasonable
159 // dictionary size is ~100 kB, but 1 kB seems to actually compress better for us,
160 // and decompress just as fast.
161 DictionaryBuilder builder(/*blocks_to_keep=*/1000, block_size);
163 read_plaintext(infp, &builder);
165 read_mlocate(infp, &builder);
167 string dictionary = builder.train(1024);
169 DatabaseBuilder db(outfile, block_size, dictionary);
170 Corpus *corpus = db.start_corpus();
172 read_plaintext(infp, corpus);
174 read_mlocate(infp, corpus);
178 dprintf("Read %zu files from %s\n", corpus->num_files, infile);
185 "Usage: plocate-build MLOCATE_DB PLOCATE_DB\n"
187 "Generate plocate index from mlocate.db, typically /var/lib/mlocate/mlocate.db.\n"
188 "Normally, the destination should be /var/lib/mlocate/plocate.db.\n"
190 " -b, --block-size SIZE number of filenames to store in each block (default 32)\n"
191 " -p, --plaintext input is a plaintext file, not an mlocate database\n"
192 " --help print this help\n"
193 " --version print version information\n");
198 printf("plocate-build %s\n", PLOCATE_VERSION);
199 printf("Copyright 2020 Steinar H. Gunderson\n");
200 printf("License GPLv2+: GNU GPL version 2 or later <https://gnu.org/licenses/gpl.html>.\n");
201 printf("This is free software: you are free to change and redistribute it.\n");
202 printf("There is NO WARRANTY, to the extent permitted by law.\n");
205 int main(int argc, char **argv)
207 static const struct option long_options[] = {
208 { "block-size", required_argument, 0, 'b' },
209 { "plaintext", no_argument, 0, 'p' },
210 { "help", no_argument, 0, 'h' },
211 { "version", no_argument, 0, 'V' },
212 { "debug", no_argument, 0, 'D' }, // Not documented.
217 bool plaintext = false;
219 setlocale(LC_ALL, "");
221 int option_index = 0;
222 int c = getopt_long(argc, argv, "b:hpVD", long_options, &option_index);
228 block_size = atoi(optarg);
247 if (argc - optind != 2) {
252 do_build(argv[optind], argv[optind + 1], block_size, plaintext);