]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/fs.c
Update bcachefs sources to 2115a2ffde bcachefs: Kill bch2_verify_bucket_evacuated()
[bcachefs-tools-debian] / libbcachefs / fs.c
index 4591b75f80233d51d5bbb34ca0a6b4d65c1af024..6dbbc6ff4cb8a5e51959621bc3cefe5c5cfd3b3c 100644 (file)
@@ -105,6 +105,11 @@ retry:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
+       bch2_fs_fatal_err_on(ret == -ENOENT, c,
+                            "inode %u:%llu not found when updating",
+                            inode_inum(inode).subvol,
+                            inode_inum(inode).inum);
+
        bch2_trans_exit(&trans);
        return ret < 0 ? ret : 0;
 }
@@ -201,6 +206,10 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
                return ERR_PTR(ret);
        }
 
+       mutex_lock(&c->vfs_inodes_lock);
+       list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+       mutex_unlock(&c->vfs_inodes_lock);
+
        unlock_new_inode(&inode->v);
 
        return &inode->v;
@@ -314,6 +323,9 @@ err_before_quota:
 
                inode = old;
        } else {
+               mutex_lock(&c->vfs_inodes_lock);
+               list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+               mutex_unlock(&c->vfs_inodes_lock);
                /*
                 * we really don't want insert_inode_locked2() to be setting
                 * I_NEW...
@@ -442,19 +454,27 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
        bch2_trans_init(&trans, c, 4, 1024);
 
        ret = commit_do(&trans, NULL, NULL,
-                             BTREE_INSERT_NOFAIL,
-                       bch2_unlink_trans(&trans,
-                                         inode_inum(dir), &dir_u,
-                                         &inode_u, &dentry->d_name,
-                                         deleting_snapshot));
+                       BTREE_INSERT_NOFAIL,
+               bch2_unlink_trans(&trans,
+                                 inode_inum(dir), &dir_u,
+                                 &inode_u, &dentry->d_name,
+                                 deleting_snapshot));
+       if (unlikely(ret))
+               goto err;
 
-       if (likely(!ret)) {
-               bch2_inode_update_after_write(&trans, dir, &dir_u,
-                                             ATTR_MTIME|ATTR_CTIME);
-               bch2_inode_update_after_write(&trans, inode, &inode_u,
-                                             ATTR_MTIME);
-       }
+       bch2_inode_update_after_write(&trans, dir, &dir_u,
+                                     ATTR_MTIME|ATTR_CTIME);
+       bch2_inode_update_after_write(&trans, inode, &inode_u,
+                                     ATTR_MTIME);
 
+       if (inode_u.bi_subvol) {
+               /*
+                * Subvolume deletion is asynchronous, but we still want to tell
+                * the VFS that it's been deleted here:
+                */
+               set_nlink(&inode->v, 0);
+       }
+err:
        bch2_trans_exit(&trans);
        bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
 
@@ -667,10 +687,10 @@ int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
        qid = inode->ei_qid;
 
        if (attr->ia_valid & ATTR_UID)
-               qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
+               qid.q[QTYP_USR] = from_kuid(mnt_userns, attr->ia_uid);
 
        if (attr->ia_valid & ATTR_GID)
-               qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
+               qid.q[QTYP_GRP] = from_kgid(mnt_userns, attr->ia_gid);
 
        ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
                                     KEY_TYPE_QUOTA_PREALLOC);
@@ -779,18 +799,19 @@ static int bch2_setattr(struct user_namespace *mnt_userns,
 }
 
 static int bch2_tmpfile(struct user_namespace *mnt_userns,
-                       struct inode *vdir, struct dentry *dentry, umode_t mode)
+                       struct inode *vdir, struct file *file, umode_t mode)
 {
        struct bch_inode_info *inode =
-               __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0,
+               __bch2_create(mnt_userns, to_bch_ei(vdir),
+                             file->f_path.dentry, mode, 0,
                              (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
 
        if (IS_ERR(inode))
                return bch2_err_class(PTR_ERR(inode));
 
-       d_mark_tmpfile(dentry, &inode->v);
-       d_instantiate(dentry, &inode->v);
-       return 0;
+       d_mark_tmpfile(file, &inode->v);
+       d_instantiate(file->f_path.dentry, &inode->v);
+       return finish_open_simple(file, 0);
 }
 
 static int bch2_fill_extent(struct bch_fs *c,
@@ -810,6 +831,9 @@ static int bch2_fill_extent(struct bch_fs *c,
                        int flags2 = 0;
                        u64 offset = p.ptr.offset;
 
+                       if (p.ptr.unwritten)
+                               flags2 |= FIEMAP_EXTENT_UNWRITTEN;
+
                        if (p.crc.compression_type)
                                flags2 |= FIEMAP_EXTENT_ENCODED;
                        else
@@ -1345,6 +1369,8 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
                inode->v.i_op   = &bch_special_inode_operations;
                break;
        }
+
+       mapping_set_large_folios(inode->v.i_mapping);
 }
 
 static struct inode *bch2_alloc_inode(struct super_block *sb)
@@ -1358,6 +1384,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
        inode_init_once(&inode->v);
        mutex_init(&inode->ei_update_lock);
        two_state_lock_init(&inode->ei_pagecache_lock);
+       INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
        mutex_init(&inode->ei_quota_lock);
 
        return &inode->v;
@@ -1422,53 +1449,78 @@ static void bch2_evict_inode(struct inode *vinode)
                                KEY_TYPE_QUOTA_WARN);
                bch2_inode_rm(c, inode_inum(inode));
        }
+
+       mutex_lock(&c->vfs_inodes_lock);
+       list_del_init(&inode->ei_vfs_inode_list);
+       mutex_unlock(&c->vfs_inodes_lock);
 }
 
-void bch2_evict_subvolume_inodes(struct bch_fs *c,
-                                snapshot_id_list *s)
+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
 {
-       struct super_block *sb = c->vfs_sb;
-       struct inode *inode;
+       struct bch_inode_info *inode, **i;
+       DARRAY(struct bch_inode_info *) grabbed;
+       bool clean_pass = false, this_pass_clean;
 
-       spin_lock(&sb->s_inode_list_lock);
-       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-               if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
-                   (inode->i_state & I_FREEING))
-                       continue;
+       /*
+        * Initially, we scan for inodes without I_DONTCACHE, then mark them to
+        * be pruned with d_mark_dontcache().
+        *
+        * Once we've had a clean pass where we didn't find any inodes without
+        * I_DONTCACHE, we wait for them to be freed:
+        */
 
-               d_mark_dontcache(inode);
-               d_prune_aliases(inode);
-       }
-       spin_unlock(&sb->s_inode_list_lock);
+       darray_init(&grabbed);
+       darray_make_room(&grabbed, 1024);
 again:
        cond_resched();
-       spin_lock(&sb->s_inode_list_lock);
-       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-               if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
-                   (inode->i_state & I_FREEING))
+       this_pass_clean = true;
+
+       mutex_lock(&c->vfs_inodes_lock);
+       list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
+               if (!snapshot_list_has_id(s, inode->ei_subvol))
                        continue;
 
-               if (!(inode->i_state & I_DONTCACHE)) {
-                       d_mark_dontcache(inode);
-                       d_prune_aliases(inode);
-               }
+               if (!(inode->v.i_state & I_DONTCACHE) &&
+                   !(inode->v.i_state & I_FREEING)) {
+                       this_pass_clean = false;
+
+                       d_mark_dontcache(&inode->v);
+                       d_prune_aliases(&inode->v);
+
+                       /*
+                        * If i_count was zero, we have to take and release a
+                        * ref in order for I_DONTCACHE to be noticed and the
+                        * inode to be dropped;
+                        */
+
+                       if (!atomic_read(&inode->v.i_count) &&
+                           igrab(&inode->v) &&
+                           darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN))
+                               break;
+               } else if (clean_pass && this_pass_clean) {
+                       wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
+                       DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
 
-               spin_lock(&inode->i_lock);
-               if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) &&
-                   !(inode->i_state & I_FREEING)) {
-                       wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW);
-                       DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
                        prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-                       spin_unlock(&inode->i_lock);
-                       spin_unlock(&sb->s_inode_list_lock);
+                       mutex_unlock(&c->vfs_inodes_lock);
+
                        schedule();
                        finish_wait(wq, &wait.wq_entry);
                        goto again;
                }
+       }
+       mutex_unlock(&c->vfs_inodes_lock);
+
+       darray_for_each(grabbed, i)
+               iput(&(*i)->v);
+       grabbed.nr = 0;
 
-               spin_unlock(&inode->i_lock);
+       if (!clean_pass || !this_pass_clean) {
+               clean_pass = this_pass_clean;
+               goto again;
        }
-       spin_unlock(&sb->s_inode_list_lock);
+
+       darray_exit(&grabbed);
 }
 
 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1766,8 +1818,11 @@ got_sb:
        kfree(devs[0]);
        kfree(devs);
 
-       if (IS_ERR(sb))
-               return ERR_CAST(sb);
+       if (IS_ERR(sb)) {
+               ret = PTR_ERR(sb);
+               ret = bch2_err_class(ret);
+               return ERR_PTR(ret);
+       }
 
        c = sb->s_fs_info;
 
@@ -1841,7 +1896,7 @@ out:
 
 err_put_super:
        deactivate_locked_super(sb);
-       return ERR_PTR(ret);
+       return ERR_PTR(bch2_err_class(ret));
 }
 
 static void bch2_kill_sb(struct super_block *sb)