]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/fs.c
Update bcachefs sources to a8115093df bcachefs: Fix divide by zero in rebalance_work()
[bcachefs-tools-debian] / libbcachefs / fs.c
index 57e6e21896e1e41c76b0e7d144da5d52765b8589..8958957b648bd7ef16f7b2a3f78e447ab7b31ca5 100644 (file)
 #include "fs-common.h"
 #include "fs-io.h"
 #include "fs-ioctl.h"
+#include "fs-io-buffered.h"
+#include "fs-io-direct.h"
+#include "fs-io-pagecache.h"
 #include "fsck.h"
 #include "inode.h"
 #include "io.h"
 #include "journal.h"
 #include "keylist.h"
 #include "quota.h"
+#include "snapshot.h"
 #include "super.h"
 #include "xattr.h"
 
@@ -43,58 +47,6 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
                                struct bch_inode_unpacked *,
                                struct bch_subvolume *);
 
-static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
-{
-       BUG_ON(atomic_long_read(&lock->v) == 0);
-
-       if (atomic_long_sub_return_release(i, &lock->v) == 0)
-               wake_up_all(&lock->wait);
-}
-
-static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
-{
-       long v = atomic_long_read(&lock->v), old;
-
-       do {
-               old = v;
-
-               if (i > 0 ? v < 0 : v > 0)
-                       return false;
-       } while ((v = atomic_long_cmpxchg_acquire(&lock->v,
-                                       old, old + i)) != old);
-       return true;
-}
-
-static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
-{
-       wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
-}
-
-void bch2_pagecache_add_put(struct pagecache_lock *lock)
-{
-       __pagecache_lock_put(lock, 1);
-}
-
-bool bch2_pagecache_add_tryget(struct pagecache_lock *lock)
-{
-       return __pagecache_lock_tryget(lock, 1);
-}
-
-void bch2_pagecache_add_get(struct pagecache_lock *lock)
-{
-       __pagecache_lock_get(lock, 1);
-}
-
-void bch2_pagecache_block_put(struct pagecache_lock *lock)
-{
-       __pagecache_lock_put(lock, -1);
-}
-
-void bch2_pagecache_block_get(struct pagecache_lock *lock)
-{
-       __pagecache_lock_get(lock, -1);
-}
-
 void bch2_inode_update_after_write(struct btree_trans *trans,
                                   struct bch_inode_info *inode,
                                   struct bch_inode_unpacked *bi,
@@ -141,7 +93,7 @@ retry:
 
        ret   = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
                                BTREE_ITER_INTENT) ?:
-               (set ? set(inode, &inode_u, p) : 0) ?:
+               (set ? set(&trans, inode, &inode_u, p) : 0) ?:
                bch2_inode_write(&trans, &iter, &inode_u) ?:
                bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
 
@@ -157,6 +109,11 @@ retry:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
+       bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
+                            "inode %u:%llu not found when updating",
+                            inode_inum(inode).subvol,
+                            inode_inum(inode).inum);
+
        bch2_trans_exit(&trans);
        return ret < 0 ? ret : 0;
 }
@@ -250,16 +207,20 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
 
        if (ret) {
                iget_failed(&inode->v);
-               return ERR_PTR(ret);
+               return ERR_PTR(bch2_err_class(ret));
        }
 
+       mutex_lock(&c->vfs_inodes_lock);
+       list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+       mutex_unlock(&c->vfs_inodes_lock);
+
        unlock_new_inode(&inode->v);
 
        return &inode->v;
 }
 
 struct bch_inode_info *
-__bch2_create(struct user_namespace *mnt_userns,
+__bch2_create(struct mnt_idmap *idmap,
              struct bch_inode_info *dir, struct dentry *dentry,
              umode_t mode, dev_t rdev, subvol_inum snapshot_src,
              unsigned flags)
@@ -305,8 +266,8 @@ retry:
                                  inode_inum(dir), &dir_u, &inode_u,
                                  !(flags & BCH_CREATE_TMPFILE)
                                  ? &dentry->d_name : NULL,
-                                 from_kuid(mnt_userns, current_fsuid()),
-                                 from_kgid(mnt_userns, current_fsgid()),
+                                 from_kuid(i_user_ns(&dir->v), current_fsuid()),
+                                 from_kgid(i_user_ns(&dir->v), current_fsgid()),
                                  mode, rdev,
                                  default_acl, acl, snapshot_src, flags) ?:
                bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
@@ -366,6 +327,9 @@ err_before_quota:
 
                inode = old;
        } else {
+               mutex_lock(&c->vfs_inodes_lock);
+               list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+               mutex_unlock(&c->vfs_inodes_lock);
                /*
                 * we really don't want insert_inode_locked2() to be setting
                 * I_NEW...
@@ -410,26 +374,26 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
        return d_splice_alias(vinode, dentry);
 }
 
-static int bch2_mknod(struct user_namespace *mnt_userns,
+static int bch2_mknod(struct mnt_idmap *idmap,
                      struct inode *vdir, struct dentry *dentry,
                      umode_t mode, dev_t rdev)
 {
        struct bch_inode_info *inode =
-               __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, rdev,
+               __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
                              (subvol_inum) { 0 }, 0);
 
        if (IS_ERR(inode))
-               return PTR_ERR(inode);
+               return bch2_err_class(PTR_ERR(inode));
 
        d_instantiate(dentry, &inode->v);
        return 0;
 }
 
-static int bch2_create(struct user_namespace *mnt_userns,
+static int bch2_create(struct mnt_idmap *idmap,
                       struct inode *vdir, struct dentry *dentry,
                       umode_t mode, bool excl)
 {
-       return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFREG, 0);
+       return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
 }
 
 static int __bch2_link(struct bch_fs *c,
@@ -494,19 +458,27 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
        bch2_trans_init(&trans, c, 4, 1024);
 
        ret = commit_do(&trans, NULL, NULL,
-                             BTREE_INSERT_NOFAIL,
-                       bch2_unlink_trans(&trans,
-                                         inode_inum(dir), &dir_u,
-                                         &inode_u, &dentry->d_name,
-                                         deleting_snapshot));
+                       BTREE_INSERT_NOFAIL,
+               bch2_unlink_trans(&trans,
+                                 inode_inum(dir), &dir_u,
+                                 &inode_u, &dentry->d_name,
+                                 deleting_snapshot));
+       if (unlikely(ret))
+               goto err;
 
-       if (likely(!ret)) {
-               bch2_inode_update_after_write(&trans, dir, &dir_u,
-                                             ATTR_MTIME|ATTR_CTIME);
-               bch2_inode_update_after_write(&trans, inode, &inode_u,
-                                             ATTR_MTIME);
-       }
+       bch2_inode_update_after_write(&trans, dir, &dir_u,
+                                     ATTR_MTIME|ATTR_CTIME);
+       bch2_inode_update_after_write(&trans, inode, &inode_u,
+                                     ATTR_MTIME);
 
+       if (inode_u.bi_subvol) {
+               /*
+                * Subvolume deletion is asynchronous, but we still want to tell
+                * the VFS that it's been deleted here:
+                */
+               set_nlink(&inode->v, 0);
+       }
+err:
        bch2_trans_exit(&trans);
        bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
 
@@ -518,7 +490,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
        return __bch2_unlink(vdir, dentry, false);
 }
 
-static int bch2_symlink(struct user_namespace *mnt_userns,
+static int bch2_symlink(struct mnt_idmap *idmap,
                        struct inode *vdir, struct dentry *dentry,
                        const char *symname)
 {
@@ -526,10 +498,10 @@ static int bch2_symlink(struct user_namespace *mnt_userns,
        struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
        int ret;
 
-       inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
+       inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
                              (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
-       if (unlikely(IS_ERR(inode)))
-               return PTR_ERR(inode);
+       if (IS_ERR(inode))
+               return bch2_err_class(PTR_ERR(inode));
 
        inode_lock(&inode->v);
        ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
@@ -553,13 +525,13 @@ err:
        return ret;
 }
 
-static int bch2_mkdir(struct user_namespace *mnt_userns,
+static int bch2_mkdir(struct mnt_idmap *idmap,
                      struct inode *vdir, struct dentry *dentry, umode_t mode)
 {
-       return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFDIR, 0);
+       return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
 }
 
-static int bch2_rename2(struct user_namespace *mnt_userns,
+static int bch2_rename2(struct mnt_idmap *idmap,
                        struct inode *src_vdir, struct dentry *src_dentry,
                        struct inode *dst_vdir, struct dentry *dst_dentry,
                        unsigned flags)
@@ -666,7 +638,7 @@ err:
        return ret;
 }
 
-static void bch2_setattr_copy(struct user_namespace *mnt_userns,
+static void bch2_setattr_copy(struct mnt_idmap *idmap,
                              struct bch_inode_info *inode,
                              struct bch_inode_unpacked *bi,
                              struct iattr *attr)
@@ -675,9 +647,9 @@ static void bch2_setattr_copy(struct user_namespace *mnt_userns,
        unsigned int ia_valid = attr->ia_valid;
 
        if (ia_valid & ATTR_UID)
-               bi->bi_uid = from_kuid(mnt_userns, attr->ia_uid);
+               bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
        if (ia_valid & ATTR_GID)
-               bi->bi_gid = from_kgid(mnt_userns, attr->ia_gid);
+               bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
 
        if (ia_valid & ATTR_SIZE)
                bi->bi_size = attr->ia_size;
@@ -696,13 +668,13 @@ static void bch2_setattr_copy(struct user_namespace *mnt_userns,
                        : inode->v.i_gid;
 
                if (!in_group_p(gid) &&
-                   !capable_wrt_inode_uidgid(mnt_userns, &inode->v, CAP_FSETID))
+                   !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
                        mode &= ~S_ISGID;
                bi->bi_mode = mode;
        }
 }
 
-int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
+int bch2_setattr_nonsize(struct mnt_idmap *idmap,
                         struct bch_inode_info *inode,
                         struct iattr *attr)
 {
@@ -719,10 +691,10 @@ int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
        qid = inode->ei_qid;
 
        if (attr->ia_valid & ATTR_UID)
-               qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
+               qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
 
        if (attr->ia_valid & ATTR_GID)
-               qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
+               qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
 
        ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
                                     KEY_TYPE_QUOTA_PREALLOC);
@@ -740,7 +712,7 @@ retry:
        if (ret)
                goto btree_err;
 
-       bch2_setattr_copy(mnt_userns, inode, &inode_u, attr);
+       bch2_setattr_copy(idmap, inode, &inode_u, attr);
 
        if (attr->ia_valid & ATTR_MODE) {
                ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
@@ -772,7 +744,7 @@ err:
        return bch2_err_class(ret);
 }
 
-static int bch2_getattr(struct user_namespace *mnt_userns,
+static int bch2_getattr(struct mnt_idmap *idmap,
                        const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned query_flags)
 {
@@ -813,7 +785,7 @@ static int bch2_getattr(struct user_namespace *mnt_userns,
        return 0;
 }
 
-static int bch2_setattr(struct user_namespace *mnt_userns,
+static int bch2_setattr(struct mnt_idmap *idmap,
                        struct dentry *dentry, struct iattr *iattr)
 {
        struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
@@ -821,28 +793,29 @@ static int bch2_setattr(struct user_namespace *mnt_userns,
 
        lockdep_assert_held(&inode->v.i_rwsem);
 
-       ret = setattr_prepare(mnt_userns, dentry, iattr);
+       ret = setattr_prepare(idmap, dentry, iattr);
        if (ret)
                return ret;
 
        return iattr->ia_valid & ATTR_SIZE
-               ? bch2_truncate(mnt_userns, inode, iattr)
-               : bch2_setattr_nonsize(mnt_userns, inode, iattr);
+               ? bch2_truncate(idmap, inode, iattr)
+               : bch2_setattr_nonsize(idmap, inode, iattr);
 }
 
-static int bch2_tmpfile(struct user_namespace *mnt_userns,
-                       struct inode *vdir, struct dentry *dentry, umode_t mode)
+static int bch2_tmpfile(struct mnt_idmap *idmap,
+                       struct inode *vdir, struct file *file, umode_t mode)
 {
        struct bch_inode_info *inode =
-               __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0,
+               __bch2_create(idmap, to_bch_ei(vdir),
+                             file->f_path.dentry, mode, 0,
                              (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
 
        if (IS_ERR(inode))
-               return PTR_ERR(inode);
+               return bch2_err_class(PTR_ERR(inode));
 
-       d_mark_tmpfile(dentry, &inode->v);
-       d_instantiate(dentry, &inode->v);
-       return 0;
+       d_mark_tmpfile(file, &inode->v);
+       d_instantiate(file->f_path.dentry, &inode->v);
+       return finish_open_simple(file, 0);
 }
 
 static int bch2_fill_extent(struct bch_fs *c,
@@ -862,6 +835,9 @@ static int bch2_fill_extent(struct bch_fs *c,
                        int flags2 = 0;
                        u64 offset = p.ptr.offset;
 
+                       if (p.ptr.unwritten)
+                               flags2 |= FIEMAP_EXTENT_UNWRITTEN;
+
                        if (p.crc.compression_type)
                                flags2 |= FIEMAP_EXTENT_ENCODED;
                        else
@@ -971,6 +947,7 @@ retry:
                cur.k->k.p.offset += cur.k->k.size;
 
                if (have_extent) {
+                       bch2_trans_unlock(&trans);
                        ret = bch2_fill_extent(c, info,
                                        bkey_i_to_s_c(prev.k), 0);
                        if (ret)
@@ -989,9 +966,11 @@ err:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
-       if (!ret && have_extent)
+       if (!ret && have_extent) {
+               bch2_trans_unlock(&trans);
                ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
                                       FIEMAP_EXTENT_LAST);
+       }
 
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&cur, c);
@@ -1025,11 +1004,16 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
 {
        struct bch_inode_info *inode = file_bch_inode(file);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       int ret;
 
        if (!dir_emit_dots(file, ctx))
                return 0;
 
-       return bch2_readdir(c, inode_inum(inode), ctx);
+       ret = bch2_readdir(c, inode_inum(inode), ctx);
+       if (ret)
+               bch_err_fn(c, ret);
+
+       return bch2_err_class(ret);
 }
 
 static const struct file_operations bch_file_operations = {
@@ -1039,7 +1023,7 @@ static const struct file_operations bch_file_operations = {
        .mmap           = bch2_mmap,
        .open           = generic_file_open,
        .fsync          = bch2_fsync,
-       .splice_read    = generic_file_splice_read,
+       .splice_read    = filemap_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = bch2_fallocate_dispatch,
        .unlocked_ioctl = bch2_fs_file_ioctl,
@@ -1254,7 +1238,8 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
        struct bch_inode_unpacked inode_u;
        subvol_inum target;
        u32 snapshot;
-       unsigned name_len;
+       struct qstr dirent_name;
+       unsigned name_len = 0;
        int ret;
 
        if (!S_ISDIR(dir->v.i_mode))
@@ -1289,14 +1274,14 @@ retry:
                        goto err;
 
                if (k.k->type != KEY_TYPE_dirent) {
-                       ret = -ENOENT;
+                       ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
                        goto err;
                }
 
                d = bkey_s_c_to_dirent(k);
                ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
                if (ret > 0)
-                       ret = -ENOENT;
+                       ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
                if (ret)
                        goto err;
 
@@ -1331,9 +1316,10 @@ retry:
        ret = -ENOENT;
        goto err;
 found:
-       name_len = min_t(unsigned, bch2_dirent_name_bytes(d), NAME_MAX);
+       dirent_name = bch2_dirent_get_name(d);
 
-       memcpy(name, d.v->d_name, name_len);
+       name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
+       memcpy(name, dirent_name.name, name_len);
        name[name_len] = '\0';
 err:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -1397,6 +1383,8 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
                inode->v.i_op   = &bch_special_inode_operations;
                break;
        }
+
+       mapping_set_large_folios(inode->v.i_mapping);
 }
 
 static struct inode *bch2_alloc_inode(struct super_block *sb)
@@ -1409,7 +1397,8 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
 
        inode_init_once(&inode->v);
        mutex_init(&inode->ei_update_lock);
-       pagecache_lock_init(&inode->ei_pagecache_lock);
+       two_state_lock_init(&inode->ei_pagecache_lock);
+       INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
        mutex_init(&inode->ei_quota_lock);
 
        return &inode->v;
@@ -1428,7 +1417,8 @@ static void bch2_destroy_inode(struct inode *vinode)
        call_rcu(&vinode->i_rcu, bch2_i_callback);
 }
 
-static int inode_update_times_fn(struct bch_inode_info *inode,
+static int inode_update_times_fn(struct btree_trans *trans,
+                                struct bch_inode_info *inode,
                                 struct bch_inode_unpacked *bi,
                                 void *p)
 {
@@ -1474,53 +1464,74 @@ static void bch2_evict_inode(struct inode *vinode)
                                KEY_TYPE_QUOTA_WARN);
                bch2_inode_rm(c, inode_inum(inode));
        }
+
+       mutex_lock(&c->vfs_inodes_lock);
+       list_del_init(&inode->ei_vfs_inode_list);
+       mutex_unlock(&c->vfs_inodes_lock);
 }
 
-void bch2_evict_subvolume_inodes(struct bch_fs *c,
-                                snapshot_id_list *s)
+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
 {
-       struct super_block *sb = c->vfs_sb;
-       struct inode *inode;
+       struct bch_inode_info *inode, **i;
+       DARRAY(struct bch_inode_info *) grabbed;
+       bool clean_pass = false, this_pass_clean;
 
-       spin_lock(&sb->s_inode_list_lock);
-       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-               if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
-                   (inode->i_state & I_FREEING))
-                       continue;
+       /*
+        * Initially, we scan for inodes without I_DONTCACHE, then mark them to
+        * be pruned with d_mark_dontcache().
+        *
+        * Once we've had a clean pass where we didn't find any inodes without
+        * I_DONTCACHE, we wait for them to be freed:
+        */
 
-               d_mark_dontcache(inode);
-               d_prune_aliases(inode);
-       }
-       spin_unlock(&sb->s_inode_list_lock);
+       darray_init(&grabbed);
+       darray_make_room(&grabbed, 1024);
 again:
        cond_resched();
-       spin_lock(&sb->s_inode_list_lock);
-       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-               if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
-                   (inode->i_state & I_FREEING))
+       this_pass_clean = true;
+
+       mutex_lock(&c->vfs_inodes_lock);
+       list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
+               if (!snapshot_list_has_id(s, inode->ei_subvol))
                        continue;
 
-               if (!(inode->i_state & I_DONTCACHE)) {
-                       d_mark_dontcache(inode);
-                       d_prune_aliases(inode);
-               }
+               if (!(inode->v.i_state & I_DONTCACHE) &&
+                   !(inode->v.i_state & I_FREEING) &&
+                   igrab(&inode->v)) {
+                       this_pass_clean = false;
+
+                       if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
+                               iput(&inode->v);
+                               break;
+                       }
+               } else if (clean_pass && this_pass_clean) {
+                       wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
+                       DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
 
-               spin_lock(&inode->i_lock);
-               if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) &&
-                   !(inode->i_state & I_FREEING)) {
-                       wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW);
-                       DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
                        prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-                       spin_unlock(&inode->i_lock);
-                       spin_unlock(&sb->s_inode_list_lock);
+                       mutex_unlock(&c->vfs_inodes_lock);
+
                        schedule();
                        finish_wait(wq, &wait.wq_entry);
                        goto again;
                }
+       }
+       mutex_unlock(&c->vfs_inodes_lock);
+
+       darray_for_each(grabbed, i) {
+               inode = *i;
+               d_mark_dontcache(&inode->v);
+               d_prune_aliases(&inode->v);
+               iput(&inode->v);
+       }
+       grabbed.nr = 0;
 
-               spin_unlock(&inode->i_lock);
+       if (!clean_pass || !this_pass_clean) {
+               clean_pass = this_pass_clean;
+               goto again;
        }
-       spin_unlock(&sb->s_inode_list_lock);
+
+       darray_exit(&grabbed);
 }
 
 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1818,8 +1829,11 @@ got_sb:
        kfree(devs[0]);
        kfree(devs);
 
-       if (IS_ERR(sb))
-               return ERR_CAST(sb);
+       if (IS_ERR(sb)) {
+               ret = PTR_ERR(sb);
+               ret = bch2_err_class(ret);
+               return ERR_PTR(ret);
+       }
 
        c = sb->s_fs_info;
 
@@ -1846,7 +1860,7 @@ got_sb:
        sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
        sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
        c->vfs_sb               = sb;
-       strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
+       strscpy(sb->s_id, c->name, sizeof(sb->s_id));
 
        ret = super_setup_bdi(sb);
        if (ret)
@@ -1893,7 +1907,7 @@ out:
 
 err_put_super:
        deactivate_locked_super(sb);
-       return ERR_PTR(ret);
+       return ERR_PTR(bch2_err_class(ret));
 }
 
 static void bch2_kill_sb(struct super_block *sb)
@@ -1917,15 +1931,14 @@ MODULE_ALIAS_FS("bcachefs");
 void bch2_vfs_exit(void)
 {
        unregister_filesystem(&bcache_fs_type);
-       if (bch2_inode_cache)
-               kmem_cache_destroy(bch2_inode_cache);
+       kmem_cache_destroy(bch2_inode_cache);
 }
 
 int __init bch2_vfs_init(void)
 {
        int ret = -ENOMEM;
 
-       bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
+       bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
        if (!bch2_inode_cache)
                goto err;