]> git.sesse.net Git - plocate/blobdiff - updatedb.cpp
Release plocate 1.1.22.
[plocate] / updatedb.cpp
index 6596ca1c141983f976a479ede399f307037a2ffc..cfdd3afff0c8f0773f865065d8202ecea2d3f296 100644 (file)
@@ -39,6 +39,7 @@ any later version.
 #include <fcntl.h>
 #include <getopt.h>
 #include <grp.h>
+#include <inttypes.h>
 #include <iosfwd>
 #include <math.h>
 #include <memory>
@@ -92,7 +93,11 @@ int opendir_noatime(int dirfd, const char *path)
        static bool noatime_failed = false;
 
        if (!noatime_failed) {
+#ifdef O_NOATIME
                int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY | O_NOATIME);
+#else
+               int fd = openat(dirfd, path, O_RDONLY | O_DIRECTORY);
+#endif
                if (fd != -1) {
                        return fd;
                } else if (errno == EPERM) {
@@ -154,11 +159,10 @@ struct entry {
        dev_t dev;
 };
 
-bool filesystem_is_excluded(const char *path)
+bool filesystem_is_excluded(const string &path)
 {
        if (conf_debug_pruning) {
-               /* This is debugging output, don't mark anything for translation */
-               fprintf(stderr, "Checking whether filesystem `%s' is excluded:\n", path);
+               fprintf(stderr, "Checking whether filesystem `%s' is excluded:\n", path.c_str());
        }
        FILE *f = setmntent("/proc/mounts", "r");
        if (f == nullptr) {
@@ -168,34 +172,24 @@ bool filesystem_is_excluded(const char *path)
        struct mntent *me;
        while ((me = getmntent(f)) != nullptr) {
                if (conf_debug_pruning) {
-                       /* This is debugging output, don't mark anything for translation */
                        fprintf(stderr, " `%s', type `%s'\n", me->mnt_dir, me->mnt_type);
                }
+               if (path != me->mnt_dir) {
+                       continue;
+               }
                string type(me->mnt_type);
                for (char &p : type) {
                        p = toupper(p);
                }
-               if (find(conf_prunefs.begin(), conf_prunefs.end(), type) != conf_prunefs.end()) {
-                       /* Paths in /proc/self/mounts contain no symbolic links.  Besides
-                          avoiding a few system calls, avoiding the realpath () avoids hangs
-                          if the filesystem is unavailable hard-mounted NFS. */
-                       char *dir = me->mnt_dir;
-                       if (conf_debug_pruning) {
-                               /* This is debugging output, don't mark anything for translation */
-                               fprintf(stderr, " => type matches, dir `%s'\n", dir);
-                       }
-                       bool res = (strcmp(path, dir) == 0);
-                       if (dir != me->mnt_dir)
-                               free(dir);
-                       if (res) {
-                               endmntent(f);
-                               return true;
-                       }
+               bool exclude = (find(conf_prunefs.begin(), conf_prunefs.end(), type) != conf_prunefs.end());
+               if (exclude && conf_debug_pruning) {
+                       fprintf(stderr, " => excluded due to filesystem type\n");
                }
+               endmntent(f);
+               return exclude;
        }
        if (conf_debug_pruning) {
-               /* This is debugging output, don't mark anything for translation */
-               fprintf(stderr, "...done\n");
+               fprintf(stderr, "...not found in mount list\n");
        }
        endmntent(f);
        return false;
@@ -526,11 +520,10 @@ string ExistingDB::read_next_dictionary() const
 // “parent_dev” must be the device of the parent directory of “path”.
 //
 // Takes ownership of fd.
-int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, Corpus *corpus, DictionaryBuilder *dict_builder)
+int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_time db_modified, ExistingDB *existing_db, DatabaseReceiver *corpus, DictionaryBuilder *dict_builder)
 {
        if (string_list_contains_dir_path(&conf_prunepaths, &conf_prunepaths_index, path)) {
                if (conf_debug_pruning) {
-                       /* This is debugging output, don't mark anything for translation */
                        fprintf(stderr, "Skipping `%s': in prunepaths\n", path.c_str());
                }
                close(fd);
@@ -538,7 +531,6 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti
        }
        if (conf_prune_bind_mounts && is_bind_mount(path.c_str())) {
                if (conf_debug_pruning) {
-                       /* This is debugging output, don't mark anything for translation */
                        fprintf(stderr, "Skipping `%s': bind mount\n", path.c_str());
                }
                close(fd);
@@ -598,11 +590,20 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti
                // by reading from the database. (We still need to open and stat everything,
                // though, but that happens in a later step.)
                entries = move(db_entries);
+               if (conf_verbose) {
+                       for (const entry &e : entries) {
+                               printf("%s/%s\n", path.c_str(), e.name.c_str());
+                       }
+               }
        } else {
                dir = fdopendir(fd);  // Takes over ownership of fd.
                if (dir == nullptr) {
-                       perror("fdopendir");
-                       exit(1);
+                       // fdopendir() wants to fstat() the fd to verify that it's indeed
+                       // a directory, which can seemingly fail on at least CIFS filesystems
+                       // if the server feels like it. We treat this as if we had an error
+                       // on opening it, ie., ignore the directory.
+                       close(fd);
+                       return 0;
                }
 
                dirent *de;
@@ -619,7 +620,23 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti
 
                        entry e;
                        e.name = de->d_name;
-                       e.is_directory = (de->d_type == DT_DIR);
+                       if (de->d_type == DT_UNKNOWN) {
+                               // Evidently some file systems, like older versions of XFS
+                               // (mkfs.xfs -m crc=0 -n ftype=0), can return this,
+                               // and we need a stat(). If we wanted to optimize for this,
+                               // we could probably defer it to later (we're stat-ing directories
+                               // when recursing), but this is rare, and not really worth it --
+                               // the second stat() will be cached anyway.
+                               struct stat buf;
+                               if (fstatat(fd, de->d_name, &buf, AT_SYMLINK_NOFOLLOW) == 0 &&
+                                   S_ISDIR(buf.st_mode)) {
+                                       e.is_directory = true;
+                               } else {
+                                       e.is_directory = false;
+                               }
+                       } else {
+                               e.is_directory = (de->d_type == DT_DIR);
+                       }
 
                        if (conf_verbose) {
                                printf("%s/%s\n", path.c_str(), de->d_name);
@@ -662,7 +679,6 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti
 
                if (find(conf_prunenames.begin(), conf_prunenames.end(), e.name) != conf_prunenames.end()) {
                        if (conf_debug_pruning) {
-                               /* This is debugging output, don't mark anything for translation */
                                fprintf(stderr, "Skipping `%s': in prunenames\n", e.name.c_str());
                        }
                        continue;
@@ -678,8 +694,8 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti
                                if (getrlimit(RLIMIT_NOFILE, &rlim) == -1) {
                                        fprintf(stderr, "Hint: Try `ulimit -n 131072' or similar.\n");
                                } else {
-                                       fprintf(stderr, "Hint: Try `ulimit -n %lu' or similar (current limit is %lu).\n",
-                                               rlim.rlim_cur * 2, rlim.rlim_cur);
+                                       fprintf(stderr, "Hint: Try `ulimit -n %" PRIu64 " or similar (current limit is %" PRIu64 ").\n",
+                                               static_cast<uint64_t>(rlim.rlim_cur * 2), static_cast<uint64_t>(rlim.rlim_cur));
                                }
                                exit(1);
                        }
@@ -688,13 +704,27 @@ int scan(const string &path, int fd, dev_t parent_dev, dir_time modified, dir_ti
 
                struct stat buf;
                if (fstat(e.fd, &buf) != 0) {
-                       perror(path.c_str());
+                       // It's possible that this is a filesystem that's excluded
+                       // (and the failure is e.g. because the network is down).
+                       // As a last-ditch effort, we try to check that before dying,
+                       // i.e., duplicate the check from further down.
+                       //
+                       // It would be better to be able to run filesystem_is_excluded()
+                       // for cheap on everything and just avoid the stat, but it seems
+                       // hard to do that without any kind of raciness.
+                       if (filesystem_is_excluded(path_plus_slash + e.name)) {
+                               close(e.fd);
+                               e.fd = -1;
+                               continue;
+                       }
+
+                       perror((path_plus_slash + e.name).c_str());
                        exit(1);
                }
 
                e.dev = buf.st_dev;
                if (buf.st_dev != parent_dev) {
-                       if (filesystem_is_excluded((path_plus_slash + e.name).c_str())) {
+                       if (filesystem_is_excluded(path_plus_slash + e.name)) {
                                close(e.fd);
                                e.fd = -1;
                                continue;
@@ -737,6 +767,15 @@ int main(int argc, char **argv)
        // and can set whatever we want). 128k should be ample for most setups.
        rlimit rlim;
        if (getrlimit(RLIMIT_NOFILE, &rlim) != -1) {
+               // Even root cannot increase rlim_cur beyond rlim_max,
+               // so we need to try to increase rlim_max first.
+               // Ignore errors, though.
+               if (rlim.rlim_max < 131072) {
+                       rlim.rlim_max = 131072;
+                       setrlimit(RLIMIT_NOFILE, &rlim);
+                       getrlimit(RLIMIT_NOFILE, &rlim);
+               }
+
                rlim_t wanted = std::max<rlim_t>(rlim.rlim_cur, 131072);
                rlim.rlim_cur = std::min<rlim_t>(wanted, rlim.rlim_max);
                setrlimit(RLIMIT_NOFILE, &rlim);  // Ignore errors.
@@ -744,7 +783,7 @@ int main(int argc, char **argv)
 
        conf_prepare(argc, argv);
        if (conf_prune_bind_mounts) {
-               bind_mount_init(MOUNTINFO_PATH);
+               bind_mount_init();
        }
 
        int fd = open(conf_output.c_str(), O_RDONLY);
@@ -762,8 +801,9 @@ int main(int argc, char **argv)
                owner = grp->gr_gid;
        }
 
-       DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary());
-       Corpus *corpus = db.start_corpus(/*store_dir_times=*/true);
+       DatabaseBuilder db(conf_output.c_str(), owner, conf_block_size, existing_db.read_next_dictionary(), conf_check_visibility);
+       db.set_conf_block(conf_block);
+       DatabaseReceiver *corpus = db.start_corpus(/*store_dir_times=*/true);
 
        int root_fd = opendir_noatime(AT_FDCWD, conf_scan_root);
        if (root_fd == -1) {