X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=updatedb.cpp;h=e1320b3e1ef58affd7dda61f7486921589754f8f;hb=fd6198891d6fd9642effc0843fef6f23b991af3e;hp=f655aa33fe6b80468b17b81c59f732594af3a7dc;hpb=dd80692a89481eeff56ebcb1f5be9edf179ca766;p=plocate diff --git a/updatedb.cpp b/updatedb.cpp index f655aa3..e1320b3 100644 --- a/updatedb.cpp +++ b/updatedb.cpp @@ -92,7 +92,11 @@ int opendir_noatime(int dirfd, const char *path) static bool noatime_failed = false; if (!noatime_failed) { +#ifdef O_NOATIME int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY | O_NOATIME); +#else + int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY); +#endif if (fd != -1) { return fd; } else if (errno == EPERM) { @@ -526,7 +530,7 @@ string ExistingDB::read_next_dictionary() const // “parent_dev” must be the device of the parent directory of “path”. // // Takes ownership of fd. -int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, Corpus *corpus, DictionaryBuilder *dict_builder) +int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, DatabaseReceiver *corpus, DictionaryBuilder *dict_builder) { if (string_list_contains_dir_path(&conf_prunepaths, &conf_prunepaths_index, path)) { if (conf_debug_pruning) { @@ -598,11 +602,20 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti // by reading from the database. (We still need to open and stat everything, // though, but that happens in a later step.) entries = move(db_entries); + if (conf_verbose) { + for (const entry &e : entries) { + printf("%s/%s\n", path.c_str(), e.name.c_str()); + } + } } else { dir = fdopendir(fd); // Takes over ownership of fd. if (dir == nullptr) { - perror("fdopendir"); - exit(1); + // fdopendir() wants to fstat() the fd to verify that it's indeed + // a directory, which can seemingly fail on at least CIFS filesystems + // if the server feels like it. We treat this as if we had an error + // on opening it, ie., ignore the directory. + close(fd); + return 0; } dirent *de; @@ -619,7 +632,23 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti entry e; e.name = de->d_name; - e.is_directory = (de->d_type == DT_DIR); + if (de->d_type == DT_UNKNOWN) { + // Evidently some file systems, like older versions of XFS + // (mkfs.xfs -m crc=0 -n ftype=0), can return this, + // and we need a stat(). If we wanted to optimize for this, + // we could probably defer it to later (we're stat-ing directories + // when recursing), but this is rare, and not really worth it -- + // the second stat() will be cached anyway. + struct stat buf; + if (fstatat(fd, de->d_name, &buf, AT_SYMLINK_NOFOLLOW) == 0 && + S_ISDIR(buf.st_mode)) { + e.is_directory = true; + } else { + e.is_directory = false; + } + } else { + e.is_directory = (de->d_type == DT_DIR); + } if (conf_verbose) { printf("%s/%s\n", path.c_str(), de->d_name); @@ -688,7 +717,21 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti struct stat buf; if (fstat(e.fd, &buf) != 0) { - perror(path.c_str()); + // It's possible that this is a filesystem that's excluded + // (and the failure is e.g. because the network is down). + // As a last-ditch effort, we try to check that before dying, + // i.e., duplicate the check from further down. + // + // It would be better to be able to run filesystem_is_excluded() + // for cheap on everything and just avoid the stat, but it seems + // hard to do that without any kind of raciness. + if (filesystem_is_excluded((path_plus_slash + e.name).c_str())) { + close(e.fd); + e.fd = -1; + continue; + } + + perror((path_plus_slash + e.name).c_str()); exit(1); } @@ -737,6 +780,15 @@ int main(int argc, char **argv) // and can set whatever we want). 128k should be ample for most setups. rlimit rlim; if (getrlimit(RLIMIT_NOFILE, &rlim) != -1) { + // Even root cannot increase rlim_cur beyond rlim_max, + // so we need to try to increase rlim_max first. + // Ignore errors, though. + if (rlim.rlim_max < 131072) { + rlim.rlim_max = 131072; + setrlimit(RLIMIT_NOFILE, &rlim); + getrlimit(RLIMIT_NOFILE, &rlim); + } + rlim_t wanted = std::max(rlim.rlim_cur, 131072); rlim.rlim_cur = std::min(wanted, rlim.rlim_max); setrlimit(RLIMIT_NOFILE, &rlim); // Ignore errors. @@ -762,9 +814,9 @@ int main(int argc, char **argv) owner = grp->gr_gid; } - DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary()); + DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary(), conf_check_visibility); db.set_conf_block(conf_block); - Corpus *corpus = db.start_corpus(/*store_dir_times=*/true); + DatabaseReceiver *corpus = db.start_corpus(/*store_dir_times=*/true); int root_fd = opendir_noatime(AT_FDCWD, conf_scan_root); if (root_fd == -1) {