#include <fcntl.h>
#include <getopt.h>
#include <grp.h>
+#include <inttypes.h>
#include <iosfwd>
#include <math.h>
#include <memory>
static bool noatime_failed = false;
if (!noatime_failed) {
+#ifdef O_NOATIME
int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY | O_NOATIME);
+#else
+ int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY);
+#endif
if (fd != -1) {
return fd;
} else if (errno == EPERM) {
dev_t dev;
};
-bool filesystem_is_excluded(const char *path)
+bool filesystem_is_excluded(const string &path)
{
if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
- fprintf(stderr, "Checking whether filesystem `%s' is excluded:\n", path);
+ fprintf(stderr, "Checking whether filesystem `%s' is excluded:\n", path.c_str());
}
FILE *f = setmntent("/proc/mounts", "r");
if (f == nullptr) {
struct mntent *me;
while ((me = getmntent(f)) != nullptr) {
if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
fprintf(stderr, " `%s', type `%s'\n", me->mnt_dir, me->mnt_type);
}
+ if (path != me->mnt_dir) {
+ continue;
+ }
string type(me->mnt_type);
for (char &p : type) {
p = toupper(p);
}
- if (find(conf_prunefs.begin(), conf_prunefs.end(), type) != conf_prunefs.end()) {
- /* Paths in /proc/self/mounts contain no symbolic links. Besides
- avoiding a few system calls, avoiding the realpath () avoids hangs
- if the filesystem is unavailable hard-mounted NFS. */
- char *dir = me->mnt_dir;
- if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
- fprintf(stderr, " => type matches, dir `%s'\n", dir);
- }
- bool res = (strcmp(path, dir) == 0);
- if (dir != me->mnt_dir)
- free(dir);
- if (res) {
- endmntent(f);
- return true;
- }
+ bool exclude = (find(conf_prunefs.begin(), conf_prunefs.end(), type) != conf_prunefs.end());
+ if (exclude && conf_debug_pruning) {
+ fprintf(stderr, " => excluded due to filesystem type\n");
}
+ endmntent(f);
+ return exclude;
}
if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
- fprintf(stderr, "...done\n");
+ fprintf(stderr, "...not found in mount list\n");
}
endmntent(f);
return false;
// “parent_dev” must be the device of the parent directory of “path”.
//
// Takes ownership of fd.
-int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, Corpus *corpus, DictionaryBuilder *dict_builder)
+int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, DatabaseReceiver *corpus, DictionaryBuilder *dict_builder)
{
if (string_list_contains_dir_path(&conf_prunepaths, &conf_prunepaths_index, path)) {
if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
fprintf(stderr, "Skipping `%s': in prunepaths\n", path.c_str());
}
close(fd);
}
if (conf_prune_bind_mounts && is_bind_mount(path.c_str())) {
if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
fprintf(stderr, "Skipping `%s': bind mount\n", path.c_str());
}
close(fd);
} else {
dir = fdopendir(fd); // Takes over ownership of fd.
if (dir == nullptr) {
- perror("fdopendir");
- exit(1);
+ // fdopendir() wants to fstat() the fd to verify that it's indeed
+ // a directory, which can seemingly fail on at least CIFS filesystems
+ // if the server feels like it. We treat this as if we had an error
+ // on opening it, ie., ignore the directory.
+ close(fd);
+ return 0;
}
dirent *de;
entry e;
e.name = de->d_name;
- e.is_directory = (de->d_type == DT_DIR);
+ if (de->d_type == DT_UNKNOWN) {
+ // Evidently some file systems, like older versions of XFS
+ // (mkfs.xfs -m crc=0 -n ftype=0), can return this,
+ // and we need a stat(). If we wanted to optimize for this,
+ // we could probably defer it to later (we're stat-ing directories
+ // when recursing), but this is rare, and not really worth it --
+ // the second stat() will be cached anyway.
+ struct stat buf;
+ if (fstatat(fd, de->d_name, &buf, AT_SYMLINK_NOFOLLOW) == 0 &&
+ S_ISDIR(buf.st_mode)) {
+ e.is_directory = true;
+ } else {
+ e.is_directory = false;
+ }
+ } else {
+ e.is_directory = (de->d_type == DT_DIR);
+ }
if (conf_verbose) {
printf("%s/%s\n", path.c_str(), de->d_name);
if (find(conf_prunenames.begin(), conf_prunenames.end(), e.name) != conf_prunenames.end()) {
if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
fprintf(stderr, "Skipping `%s': in prunenames\n", e.name.c_str());
}
continue;
if (getrlimit(RLIMIT_NOFILE, &rlim) == -1) {
fprintf(stderr, "Hint: Try `ulimit -n 131072' or similar.\n");
} else {
- fprintf(stderr, "Hint: Try `ulimit -n %lu' or similar (current limit is %lu).\n",
- rlim.rlim_cur * 2, rlim.rlim_cur);
+ fprintf(stderr, "Hint: Try `ulimit -n %" PRIu64 " or similar (current limit is %" PRIu64 ").\n",
+ static_cast<uint64_t>(rlim.rlim_cur * 2), static_cast<uint64_t>(rlim.rlim_cur));
}
exit(1);
}
struct stat buf;
if (fstat(e.fd, &buf) != 0) {
- perror(path.c_str());
+ // It's possible that this is a filesystem that's excluded
+ // (and the failure is e.g. because the network is down).
+ // As a last-ditch effort, we try to check that before dying,
+ // i.e., duplicate the check from further down.
+ //
+ // It would be better to be able to run filesystem_is_excluded()
+ // for cheap on everything and just avoid the stat, but it seems
+ // hard to do that without any kind of raciness.
+ if (filesystem_is_excluded(path_plus_slash + e.name)) {
+ close(e.fd);
+ e.fd = -1;
+ continue;
+ }
+
+ perror((path_plus_slash + e.name).c_str());
exit(1);
}
e.dev = buf.st_dev;
if (buf.st_dev != parent_dev) {
- if (filesystem_is_excluded((path_plus_slash + e.name).c_str())) {
+ if (filesystem_is_excluded(path_plus_slash + e.name)) {
close(e.fd);
e.fd = -1;
continue;
// and can set whatever we want). 128k should be ample for most setups.
rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim) != -1) {
+ // Even root cannot increase rlim_cur beyond rlim_max,
+ // so we need to try to increase rlim_max first.
+ // Ignore errors, though.
+ if (rlim.rlim_max < 131072) {
+ rlim.rlim_max = 131072;
+ setrlimit(RLIMIT_NOFILE, &rlim);
+ getrlimit(RLIMIT_NOFILE, &rlim);
+ }
+
rlim_t wanted = std::max<rlim_t>(rlim.rlim_cur, 131072);
rlim.rlim_cur = std::min<rlim_t>(wanted, rlim.rlim_max);
setrlimit(RLIMIT_NOFILE, &rlim); // Ignore errors.
conf_prepare(argc, argv);
if (conf_prune_bind_mounts) {
- bind_mount_init(MOUNTINFO_PATH);
+ bind_mount_init();
}
int fd = open(conf_output.c_str(), O_RDONLY);
owner = grp->gr_gid;
}
- DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary());
+ DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary(), conf_check_visibility);
db.set_conf_block(conf_block);
- Corpus *corpus = db.start_corpus(/*store_dir_times=*/true);
+ DatabaseReceiver *corpus = db.start_corpus(/*store_dir_times=*/true);
int root_fd = opendir_noatime(AT_FDCWD, conf_scan_root);
if (root_fd == -1) {