if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
+ bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
+ "inode %u:%llu not found when updating",
+ inode_inum(inode).subvol,
+ inode_inum(inode).inum);
+
bch2_trans_exit(&trans);
return ret < 0 ? ret : 0;
}
return ERR_PTR(ret);
}
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
+
unlock_new_inode(&inode->v);
return &inode->v;
}
struct bch_inode_info *
-__bch2_create(struct user_namespace *mnt_userns,
+__bch2_create(struct mnt_idmap *idmap,
struct bch_inode_info *dir, struct dentry *dentry,
umode_t mode, dev_t rdev, subvol_inum snapshot_src,
unsigned flags)
inode_inum(dir), &dir_u, &inode_u,
!(flags & BCH_CREATE_TMPFILE)
? &dentry->d_name : NULL,
- from_kuid(mnt_userns, current_fsuid()),
- from_kgid(mnt_userns, current_fsgid()),
+ from_kuid(i_user_ns(&dir->v), current_fsuid()),
+ from_kgid(i_user_ns(&dir->v), current_fsgid()),
mode, rdev,
default_acl, acl, snapshot_src, flags) ?:
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
inode = old;
} else {
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
/*
* we really don't want insert_inode_locked2() to be setting
* I_NEW...
return d_splice_alias(vinode, dentry);
}
-static int bch2_mknod(struct user_namespace *mnt_userns,
+static int bch2_mknod(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
struct bch_inode_info *inode =
- __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, rdev,
+ __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
(subvol_inum) { 0 }, 0);
if (IS_ERR(inode))
return 0;
}
-static int bch2_create(struct user_namespace *mnt_userns,
+static int bch2_create(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFREG, 0);
+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
}
static int __bch2_link(struct bch_fs *c,
bch2_trans_init(&trans, c, 4, 1024);
ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL,
- bch2_unlink_trans(&trans,
- inode_inum(dir), &dir_u,
- &inode_u, &dentry->d_name,
- deleting_snapshot));
+ BTREE_INSERT_NOFAIL,
+ bch2_unlink_trans(&trans,
+ inode_inum(dir), &dir_u,
+ &inode_u, &dentry->d_name,
+ deleting_snapshot));
+ if (unlikely(ret))
+ goto err;
- if (likely(!ret)) {
- bch2_inode_update_after_write(&trans, dir, &dir_u,
- ATTR_MTIME|ATTR_CTIME);
- bch2_inode_update_after_write(&trans, inode, &inode_u,
- ATTR_MTIME);
- }
+ bch2_inode_update_after_write(&trans, dir, &dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+ bch2_inode_update_after_write(&trans, inode, &inode_u,
+ ATTR_MTIME);
+ if (inode_u.bi_subvol) {
+ /*
+ * Subvolume deletion is asynchronous, but we still want to tell
+ * the VFS that it's been deleted here:
+ */
+ set_nlink(&inode->v, 0);
+ }
+err:
bch2_trans_exit(&trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
return __bch2_unlink(vdir, dentry, false);
}
-static int bch2_symlink(struct user_namespace *mnt_userns,
+static int bch2_symlink(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry,
const char *symname)
{
struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
int ret;
- inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
+ inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
if (IS_ERR(inode))
return bch2_err_class(PTR_ERR(inode));
return ret;
}
-static int bch2_mkdir(struct user_namespace *mnt_userns,
+static int bch2_mkdir(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry, umode_t mode)
{
- return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFDIR, 0);
+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
}
-static int bch2_rename2(struct user_namespace *mnt_userns,
+static int bch2_rename2(struct mnt_idmap *idmap,
struct inode *src_vdir, struct dentry *src_dentry,
struct inode *dst_vdir, struct dentry *dst_dentry,
unsigned flags)
return ret;
}
-static void bch2_setattr_copy(struct user_namespace *mnt_userns,
+static void bch2_setattr_copy(struct mnt_idmap *idmap,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
struct iattr *attr)
unsigned int ia_valid = attr->ia_valid;
if (ia_valid & ATTR_UID)
- bi->bi_uid = from_kuid(mnt_userns, attr->ia_uid);
+ bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
if (ia_valid & ATTR_GID)
- bi->bi_gid = from_kgid(mnt_userns, attr->ia_gid);
+ bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
if (ia_valid & ATTR_SIZE)
bi->bi_size = attr->ia_size;
: inode->v.i_gid;
if (!in_group_p(gid) &&
- !capable_wrt_inode_uidgid(mnt_userns, &inode->v, CAP_FSETID))
+ !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
mode &= ~S_ISGID;
bi->bi_mode = mode;
}
}
-int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
+int bch2_setattr_nonsize(struct mnt_idmap *idmap,
struct bch_inode_info *inode,
struct iattr *attr)
{
qid = inode->ei_qid;
if (attr->ia_valid & ATTR_UID)
- qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
+ qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
if (attr->ia_valid & ATTR_GID)
- qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
+ qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
KEY_TYPE_QUOTA_PREALLOC);
if (ret)
goto btree_err;
- bch2_setattr_copy(mnt_userns, inode, &inode_u, attr);
+ bch2_setattr_copy(idmap, inode, &inode_u, attr);
if (attr->ia_valid & ATTR_MODE) {
ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
return bch2_err_class(ret);
}
-static int bch2_getattr(struct user_namespace *mnt_userns,
+static int bch2_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned query_flags)
{
return 0;
}
-static int bch2_setattr(struct user_namespace *mnt_userns,
+static int bch2_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
lockdep_assert_held(&inode->v.i_rwsem);
- ret = setattr_prepare(mnt_userns, dentry, iattr);
+ ret = setattr_prepare(idmap, dentry, iattr);
if (ret)
return ret;
return iattr->ia_valid & ATTR_SIZE
- ? bch2_truncate(mnt_userns, inode, iattr)
- : bch2_setattr_nonsize(mnt_userns, inode, iattr);
+ ? bch2_truncate(idmap, inode, iattr)
+ : bch2_setattr_nonsize(idmap, inode, iattr);
}
-static int bch2_tmpfile(struct user_namespace *mnt_userns,
- struct inode *vdir, struct dentry *dentry, umode_t mode)
+static int bch2_tmpfile(struct mnt_idmap *idmap,
+ struct inode *vdir, struct file *file, umode_t mode)
{
struct bch_inode_info *inode =
- __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0,
+ __bch2_create(idmap, to_bch_ei(vdir),
+ file->f_path.dentry, mode, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
if (IS_ERR(inode))
return bch2_err_class(PTR_ERR(inode));
- d_mark_tmpfile(dentry, &inode->v);
- d_instantiate(dentry, &inode->v);
- return 0;
+ d_mark_tmpfile(file, &inode->v);
+ d_instantiate(file->f_path.dentry, &inode->v);
+ return finish_open_simple(file, 0);
}
static int bch2_fill_extent(struct bch_fs *c,
cur.k->k.p.offset += cur.k->k.size;
if (have_extent) {
+ bch2_trans_unlock(&trans);
ret = bch2_fill_extent(c, info,
bkey_i_to_s_c(prev.k), 0);
if (ret)
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- if (!ret && have_extent)
+ if (!ret && have_extent) {
+ bch2_trans_unlock(&trans);
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
FIEMAP_EXTENT_LAST);
+ }
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&cur, c);
.mmap = bch2_mmap,
.open = generic_file_open,
.fsync = bch2_fsync,
- .splice_read = generic_file_splice_read,
+ .splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = bch2_fallocate_dispatch,
.unlocked_ioctl = bch2_fs_file_ioctl,
goto err;
if (k.k->type != KEY_TYPE_dirent) {
- ret = -ENOENT;
+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
goto err;
}
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
if (ret > 0)
- ret = -ENOENT;
+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
if (ret)
goto err;
inode->v.i_op = &bch_special_inode_operations;
break;
}
+
+ mapping_set_large_folios(inode->v.i_mapping);
}
static struct inode *bch2_alloc_inode(struct super_block *sb)
inode_init_once(&inode->v);
mutex_init(&inode->ei_update_lock);
two_state_lock_init(&inode->ei_pagecache_lock);
+ INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
mutex_init(&inode->ei_quota_lock);
return &inode->v;
KEY_TYPE_QUOTA_WARN);
bch2_inode_rm(c, inode_inum(inode));
}
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_del_init(&inode->ei_vfs_inode_list);
+ mutex_unlock(&c->vfs_inodes_lock);
}
-void bch2_evict_subvolume_inodes(struct bch_fs *c,
- snapshot_id_list *s)
+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
{
- struct super_block *sb = c->vfs_sb;
- struct inode *inode;
+ struct bch_inode_info *inode, **i;
+ DARRAY(struct bch_inode_info *) grabbed;
+ bool clean_pass = false, this_pass_clean;
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
- (inode->i_state & I_FREEING))
- continue;
+ /*
+ * Initially, we scan for inodes without I_DONTCACHE, then mark them to
+ * be pruned with d_mark_dontcache().
+ *
+ * Once we've had a clean pass where we didn't find any inodes without
+ * I_DONTCACHE, we wait for them to be freed:
+ */
- d_mark_dontcache(inode);
- d_prune_aliases(inode);
- }
- spin_unlock(&sb->s_inode_list_lock);
+ darray_init(&grabbed);
+ darray_make_room(&grabbed, 1024);
again:
cond_resched();
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
- (inode->i_state & I_FREEING))
+ this_pass_clean = true;
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
+ if (!snapshot_list_has_id(s, inode->ei_subvol))
continue;
- if (!(inode->i_state & I_DONTCACHE)) {
- d_mark_dontcache(inode);
- d_prune_aliases(inode);
- }
+ if (!(inode->v.i_state & I_DONTCACHE) &&
+ !(inode->v.i_state & I_FREEING) &&
+ igrab(&inode->v)) {
+ this_pass_clean = false;
+
+ if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
+ iput(&inode->v);
+ break;
+ }
+ } else if (clean_pass && this_pass_clean) {
+ wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
+ DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
- spin_lock(&inode->i_lock);
- if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) &&
- !(inode->i_state & I_FREEING)) {
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW);
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
+ mutex_unlock(&c->vfs_inodes_lock);
+
schedule();
finish_wait(wq, &wait.wq_entry);
goto again;
}
+ }
+ mutex_unlock(&c->vfs_inodes_lock);
+
+ darray_for_each(grabbed, i) {
+ inode = *i;
+ d_mark_dontcache(&inode->v);
+ d_prune_aliases(&inode->v);
+ iput(&inode->v);
+ }
+ grabbed.nr = 0;
- spin_unlock(&inode->i_lock);
+ if (!clean_pass || !this_pass_clean) {
+ clean_pass = this_pass_clean;
+ goto again;
}
- spin_unlock(&sb->s_inode_list_lock);
+
+ darray_exit(&grabbed);
}
static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
kfree(devs[0]);
kfree(devs);
- if (IS_ERR(sb))
- return ERR_CAST(sb);
+ if (IS_ERR(sb)) {
+ ret = PTR_ERR(sb);
+ ret = bch2_err_class(ret);
+ return ERR_PTR(ret);
+ }
c = sb->s_fs_info;
err_put_super:
deactivate_locked_super(sb);
- return ERR_PTR(ret);
+ return ERR_PTR(bch2_err_class(ret));
}
static void bch2_kill_sb(struct super_block *sb)
{
int ret = -ENOMEM;
- bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
+ bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
if (!bch2_inode_cache)
goto err;