1 #include "database-builder.h"
25 using namespace std::chrono;
27 bool use_debug = false;
30 DBE_NORMAL = 0, /* A non-directory file */
31 DBE_DIRECTORY = 1, /* A directory */
32 DBE_END = 2 /* End of directory contents; contains no name */
40 uint8_t check_visibility;
51 string read_cstr(FILE *fp)
67 void handle_directory(FILE *fp, DatabaseReceiver *receiver)
70 if (fread(&dummy, sizeof(dummy), 1, fp) != 1) {
78 string dir_path = read_cstr(fp);
79 if (dir_path == "/") {
85 if (type == DBE_NORMAL) {
86 string filename = read_cstr(fp);
87 receiver->add_file(dir_path + "/" + filename, unknown_dir_time);
88 } else if (type == DBE_DIRECTORY) {
89 string dirname = read_cstr(fp);
90 receiver->add_file(dir_path + "/" + dirname, unknown_dir_time);
92 return; // Probably end.
97 void read_plaintext(FILE *fp, DatabaseReceiver *receiver)
99 if (fseek(fp, 0, SEEK_SET) != 0) {
106 if (fgets(buf, sizeof(buf), fp) == nullptr) {
111 while (s.back() != '\n' && !feof(fp)) {
112 // The string was longer than the buffer, so read again.
113 if (fgets(buf, sizeof(buf), fp) == nullptr) {
118 if (!s.empty() && s.back() == '\n')
120 receiver->add_file(move(s), unknown_dir_time);
124 void read_mlocate(FILE *fp, DatabaseReceiver *receiver)
126 if (fseek(fp, 0, SEEK_SET) != 0) {
132 if (fread(&hdr, sizeof(hdr), 1, fp) != 1) {
133 perror("short read");
137 // TODO: Care about the base path.
138 string path = read_cstr(fp);
140 if (fseek(fp, ntohl(hdr.conf_size), SEEK_CUR) != 0) {
141 perror("skip conf block");
146 handle_directory(fp, receiver);
150 void do_build(const char *infile, const char *outfile, int block_size, bool plaintext, bool check_visibility)
152 FILE *infp = fopen(infile, "rb");
153 if (infp == nullptr) {
158 // Train the dictionary by sampling real blocks.
159 // The documentation for ZDICT_trainFromBuffer() claims that a reasonable
160 // dictionary size is ~100 kB, but 1 kB seems to actually compress better for us,
161 // and decompress just as fast.
162 DictionaryBuilder builder(/*blocks_to_keep=*/1000, block_size);
164 read_plaintext(infp, &builder);
166 read_mlocate(infp, &builder);
168 string dictionary = builder.train(1024);
170 DatabaseBuilder db(outfile, /*owner=*/-1, block_size, dictionary, check_visibility);
171 DatabaseReceiver *corpus = db.start_corpus(/*store_dir_times=*/false);
173 read_plaintext(infp, corpus);
175 read_mlocate(infp, corpus);
179 dprintf("Read %zu files from %s\n", corpus->num_files_seen(), infile);
186 "Usage: plocate-build MLOCATE_DB PLOCATE_DB\n"
188 "Generate plocate index from mlocate.db, typically /var/lib/mlocate/mlocate.db.\n"
189 "Normally, the destination should be /var/lib/mlocate/plocate.db.\n"
191 " -b, --block-size SIZE number of filenames to store in each block (default 32)\n"
192 " -p, --plaintext input is a plaintext file, not an mlocate database\n"
193 " -l, --require-visibility FLAG check visibility before reporting files\n"
194 " --help print this help\n"
195 " --version print version information\n");
200 printf("plocate-build %s\n", PACKAGE_VERSION);
201 printf("Copyright 2020 Steinar H. Gunderson\n");
202 printf("License GPLv2+: GNU GPL version 2 or later <https://gnu.org/licenses/gpl.html>.\n");
203 printf("This is free software: you are free to change and redistribute it.\n");
204 printf("There is NO WARRANTY, to the extent permitted by law.\n");
207 bool parse_bool(const string &str, bool *result)
209 if (str == "0" || str == "no") {
213 if (str == "1" || str == "yes") {
220 int main(int argc, char **argv)
222 static const struct option long_options[] = {
223 { "block-size", required_argument, 0, 'b' },
224 { "plaintext", no_argument, 0, 'p' },
225 { "require-visibility", required_argument, 0, 'l' },
226 { "help", no_argument, 0, 'h' },
227 { "version", no_argument, 0, 'V' },
228 { "debug", no_argument, 0, 'D' }, // Not documented.
233 bool plaintext = false;
234 bool check_visibility = true;
236 setlocale(LC_ALL, "");
238 int option_index = 0;
239 int c = getopt_long(argc, argv, "b:hpl:VD", long_options, &option_index);
245 block_size = atoi(optarg);
251 if (!parse_bool(optarg, &check_visibility) != 0) {
252 fprintf(stderr, "plocate-build: invalid value `%s' for --%s\n",
253 optarg, "require-visibility");
271 if (argc - optind != 2) {
276 do_build(argv[optind], argv[optind + 1], block_size, plaintext, check_visibility);