#include <stdlib.h>
#include <string.h>
#include <string>
+#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
-#include <sys/resource.h>
#include <unistd.h>
#include <utility>
#include <vector>
static bool noatime_failed = false;
if (!noatime_failed) {
+#ifdef O_NOATIME
int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY | O_NOATIME);
+#else
+ int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY);
+#endif
if (fd != -1) {
return fd;
} else if (errno == EPERM) {
// “parent_dev” must be the device of the parent directory of “path”.
//
// Takes ownership of fd.
-int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, Corpus *corpus, DictionaryBuilder *dict_builder)
+int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, DatabaseReceiver *corpus, DictionaryBuilder *dict_builder)
{
if (string_list_contains_dir_path(&conf_prunepaths, &conf_prunepaths_index, path)) {
if (conf_debug_pruning) {
// by reading from the database. (We still need to open and stat everything,
// though, but that happens in a later step.)
entries = move(db_entries);
+ if (conf_verbose) {
+ for (const entry &e : entries) {
+ printf("%s/%s\n", path.c_str(), e.name.c_str());
+ }
+ }
} else {
dir = fdopendir(fd); // Takes over ownership of fd.
if (dir == nullptr) {
- perror("fdopendir");
- exit(1);
+ // fdopendir() wants to fstat() the fd to verify that it's indeed
+ // a directory, which can seemingly fail on at least CIFS filesystems
+ // if the server feels like it. We treat this as if we had an error
+ // on opening it, ie., ignore the directory.
+ close(fd);
+ return 0;
}
dirent *de;
entry e;
e.name = de->d_name;
- e.is_directory = (de->d_type == DT_DIR);
+ if (de->d_type == DT_UNKNOWN) {
+ // Evidently some file systems, like older versions of XFS
+ // (mkfs.xfs -m crc=0 -n ftype=0), can return this,
+ // and we need a stat(). If we wanted to optimize for this,
+ // we could probably defer it to later (we're stat-ing directories
+ // when recursing), but this is rare, and not really worth it --
+ // the second stat() will be cached anyway.
+ struct stat buf;
+ if (fstatat(fd, de->d_name, &buf, AT_SYMLINK_NOFOLLOW) == 0 &&
+ S_ISDIR(buf.st_mode)) {
+ e.is_directory = true;
+ } else {
+ e.is_directory = false;
+ }
+ } else {
+ e.is_directory = (de->d_type == DT_DIR);
+ }
if (conf_verbose) {
printf("%s/%s\n", path.c_str(), de->d_name);
if (getrlimit(RLIMIT_NOFILE, &rlim) == -1) {
fprintf(stderr, "Hint: Try `ulimit -n 131072' or similar.\n");
} else {
- fprintf(stderr, "Hint: Try `ulimit -n %lu' or similar (current limit is %lu).\n",
- rlim.rlim_cur * 2, rlim.rlim_cur);
+ fprintf(stderr, "Hint: Try `ulimit -n %" PRIu64 " or similar (current limit is %" PRIu64 ").\n",
+ static_cast<uint64_t>(rlim.rlim_cur * 2), static_cast<uint64_t>(rlim.rlim_cur));
}
exit(1);
}
struct stat buf;
if (fstat(e.fd, &buf) != 0) {
- perror(path.c_str());
+ // It's possible that this is a filesystem that's excluded
+ // (and the failure is e.g. because the network is down).
+ // As a last-ditch effort, we try to check that before dying,
+ // i.e., duplicate the check from further down.
+ //
+ // It would be better to be able to run filesystem_is_excluded()
+ // for cheap on everything and just avoid the stat, but it seems
+ // hard to do that without any kind of raciness.
+ if (filesystem_is_excluded((path_plus_slash + e.name).c_str())) {
+ close(e.fd);
+ e.fd = -1;
+ continue;
+ }
+
+ perror((path_plus_slash + e.name).c_str());
exit(1);
}
// and can set whatever we want). 128k should be ample for most setups.
rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim) != -1) {
+ // Even root cannot increase rlim_cur beyond rlim_max,
+ // so we need to try to increase rlim_max first.
+ // Ignore errors, though.
+ if (rlim.rlim_max < 131072) {
+ rlim.rlim_max = 131072;
+ setrlimit(RLIMIT_NOFILE, &rlim);
+ getrlimit(RLIMIT_NOFILE, &rlim);
+ }
+
rlim_t wanted = std::max<rlim_t>(rlim.rlim_cur, 131072);
rlim.rlim_cur = std::min<rlim_t>(wanted, rlim.rlim_max);
setrlimit(RLIMIT_NOFILE, &rlim); // Ignore errors.
owner = grp->gr_gid;
}
- DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary());
- Corpus *corpus = db.start_corpus(/*store_dir_times=*/true);
+ DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary(), conf_check_visibility);
+ db.set_conf_block(conf_block);
+ DatabaseReceiver *corpus = db.start_corpus(/*store_dir_times=*/true);
int root_fd = opendir_noatime(AT_FDCWD, conf_scan_root);
if (root_fd == -1) {