#include <fcntl.h>
#include <getopt.h>
#include <grp.h>
+#include <inttypes.h>
#include <iosfwd>
#include <math.h>
#include <memory>
// Takes ownership of fd.
int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, DatabaseReceiver *corpus, DictionaryBuilder *dict_builder)
{
- if (string_list_contains_dir_path(&conf_prunepaths, &conf_prunepaths_index, path)) {
- if (conf_debug_pruning) {
- /* This is debugging output, don't mark anything for translation */
- fprintf(stderr, "Skipping `%s': in prunepaths\n", path.c_str());
- }
- close(fd);
- return 0;
- }
if (conf_prune_bind_mounts && is_bind_mount(path.c_str())) {
if (conf_debug_pruning) {
/* This is debugging output, don't mark anything for translation */
} else {
dir = fdopendir(fd); // Takes over ownership of fd.
if (dir == nullptr) {
- perror("fdopendir");
- exit(1);
+ // fdopendir() wants to fstat() the fd to verify that it's indeed
+ // a directory, which can seemingly fail on at least CIFS filesystems
+ // if the server feels like it. We treat this as if we had an error
+ // on opening it, ie., ignore the directory.
+ close(fd);
+ return 0;
}
dirent *de;
entry e;
e.name = de->d_name;
- e.is_directory = (de->d_type == DT_DIR);
+ if (de->d_type == DT_UNKNOWN) {
+ // Evidently some file systems, like older versions of XFS
+ // (mkfs.xfs -m crc=0 -n ftype=0), can return this,
+ // and we need a stat(). If we wanted to optimize for this,
+ // we could probably defer it to later (we're stat-ing directories
+ // when recursing), but this is rare, and not really worth it --
+ // the second stat() will be cached anyway.
+ struct stat buf;
+ if (fstatat(fd, de->d_name, &buf, AT_SYMLINK_NOFOLLOW) == 0 &&
+ S_ISDIR(buf.st_mode)) {
+ e.is_directory = true;
+ } else {
+ e.is_directory = false;
+ }
+ } else {
+ e.is_directory = (de->d_type == DT_DIR);
+ }
if (conf_verbose) {
printf("%s/%s\n", path.c_str(), de->d_name);
}
continue;
}
+ if (string_list_contains_dir_path(&conf_prunepaths, &conf_prunepaths_index, (path_plus_slash + e.name).c_str())) {
+ if (conf_debug_pruning) {
+ /* This is debugging output, don't mark anything for translation */
+ fprintf(stderr, "Skipping `%s/%s': in prunepaths\n", path.c_str(), e.name.c_str());
+ }
+ continue;
+ }
e.fd = opendir_noatime(fd, e.name.c_str());
if (e.fd == -1) {
if (getrlimit(RLIMIT_NOFILE, &rlim) == -1) {
fprintf(stderr, "Hint: Try `ulimit -n 131072' or similar.\n");
} else {
- fprintf(stderr, "Hint: Try `ulimit -n %lu' or similar (current limit is %lu).\n",
- rlim.rlim_cur * 2, rlim.rlim_cur);
+ fprintf(stderr, "Hint: Try `ulimit -n %" PRIu64 " or similar (current limit is %" PRIu64 ").\n",
+ static_cast<uint64_t>(rlim.rlim_cur * 2), static_cast<uint64_t>(rlim.rlim_cur));
}
exit(1);
}
struct stat buf;
if (fstat(e.fd, &buf) != 0) {
- perror(path.c_str());
+ // It's possible that this is a filesystem that's excluded
+ // (and the failure is e.g. because the network is down).
+ // As a last-ditch effort, we try to check that before dying,
+ // i.e., duplicate the check from further down.
+ //
+ // It would be better to be able to run filesystem_is_excluded()
+ // for cheap on everything and just avoid the stat, but it seems
+ // hard to do that without any kind of raciness.
+ if (filesystem_is_excluded((path_plus_slash + e.name).c_str())) {
+ close(e.fd);
+ e.fd = -1;
+ continue;
+ }
+
+ perror((path_plus_slash + e.name).c_str());
exit(1);
}