#include "buckets.h"
#include "chardev.h"
#include "dirent.h"
+#include "errcode.h"
#include "extents.h"
#include "fs.h"
#include "fs-common.h"
struct bch_inode_unpacked *,
struct bch_subvolume *);
-static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
-{
- BUG_ON(atomic_long_read(&lock->v) == 0);
-
- if (atomic_long_sub_return_release(i, &lock->v) == 0)
- wake_up_all(&lock->wait);
-}
-
-static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
-{
- long v = atomic_long_read(&lock->v), old;
-
- do {
- old = v;
-
- if (i > 0 ? v < 0 : v > 0)
- return false;
- } while ((v = atomic_long_cmpxchg_acquire(&lock->v,
- old, old + i)) != old);
- return true;
-}
-
-static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
-{
- wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
-}
-
-void bch2_pagecache_add_put(struct pagecache_lock *lock)
-{
- __pagecache_lock_put(lock, 1);
-}
-
-bool bch2_pagecache_add_tryget(struct pagecache_lock *lock)
-{
- return __pagecache_lock_tryget(lock, 1);
-}
-
-void bch2_pagecache_add_get(struct pagecache_lock *lock)
-{
- __pagecache_lock_get(lock, 1);
-}
-
-void bch2_pagecache_block_put(struct pagecache_lock *lock)
-{
- __pagecache_lock_put(lock, -1);
-}
-
-void bch2_pagecache_block_get(struct pagecache_lock *lock)
-{
- __pagecache_lock_get(lock, -1);
-}
-
void bch2_inode_update_after_write(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
bch2_trans_iter_exit(&trans, &iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
+ bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
+ "inode %u:%llu not found when updating",
+ inode_inum(inode).subvol,
+ inode_inum(inode).inum);
+
bch2_trans_exit(&trans);
return ret < 0 ? ret : 0;
}
return ERR_PTR(ret);
}
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
+
unlock_new_inode(&inode->v);
return &inode->v;
}
struct bch_inode_info *
-__bch2_create(struct user_namespace *mnt_userns,
+__bch2_create(struct mnt_idmap *idmap,
struct bch_inode_info *dir, struct dentry *dentry,
umode_t mode, dev_t rdev, subvol_inum snapshot_src,
unsigned flags)
inode_inum(dir), &dir_u, &inode_u,
!(flags & BCH_CREATE_TMPFILE)
? &dentry->d_name : NULL,
- from_kuid(mnt_userns, current_fsuid()),
- from_kgid(mnt_userns, current_fsgid()),
+ from_kuid(i_user_ns(&dir->v), current_fsuid()),
+ from_kgid(i_user_ns(&dir->v), current_fsgid()),
mode, rdev,
default_acl, acl, snapshot_src, flags) ?:
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
KEY_TYPE_QUOTA_WARN);
err_before_quota:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
goto err_trans;
}
inode = old;
} else {
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
/*
* we really don't want insert_inode_locked2() to be setting
* I_NEW...
return d_splice_alias(vinode, dentry);
}
-static int bch2_mknod(struct user_namespace *mnt_userns,
+static int bch2_mknod(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
struct bch_inode_info *inode =
- __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, rdev,
+ __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
(subvol_inum) { 0 }, 0);
if (IS_ERR(inode))
- return PTR_ERR(inode);
+ return bch2_err_class(PTR_ERR(inode));
d_instantiate(dentry, &inode->v);
return 0;
}
-static int bch2_create(struct user_namespace *mnt_userns,
+static int bch2_create(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFREG, 0);
+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
}
static int __bch2_link(struct bch_fs *c,
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c, 4, 1024);
- ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+ ret = commit_do(&trans, NULL, NULL, 0,
bch2_link_trans(&trans,
inode_inum(dir), &dir_u,
inode_inum(inode), &inode_u,
bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
bch2_trans_init(&trans, c, 4, 1024);
- ret = __bch2_trans_do(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL,
- bch2_unlink_trans(&trans,
- inode_inum(dir), &dir_u,
- &inode_u, &dentry->d_name,
- deleting_snapshot));
+ ret = commit_do(&trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL,
+ bch2_unlink_trans(&trans,
+ inode_inum(dir), &dir_u,
+ &inode_u, &dentry->d_name,
+ deleting_snapshot));
+ if (unlikely(ret))
+ goto err;
- if (likely(!ret)) {
- bch2_inode_update_after_write(&trans, dir, &dir_u,
- ATTR_MTIME|ATTR_CTIME);
- bch2_inode_update_after_write(&trans, inode, &inode_u,
- ATTR_MTIME);
- }
+ bch2_inode_update_after_write(&trans, dir, &dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+ bch2_inode_update_after_write(&trans, inode, &inode_u,
+ ATTR_MTIME);
+ if (inode_u.bi_subvol) {
+ /*
+ * Subvolume deletion is asynchronous, but we still want to tell
+ * the VFS that it's been deleted here:
+ */
+ set_nlink(&inode->v, 0);
+ }
+err:
bch2_trans_exit(&trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
return __bch2_unlink(vdir, dentry, false);
}
-static int bch2_symlink(struct user_namespace *mnt_userns,
+static int bch2_symlink(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry,
const char *symname)
{
struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
int ret;
- inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
+ inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
- if (unlikely(IS_ERR(inode)))
- return PTR_ERR(inode);
+ if (IS_ERR(inode))
+ return bch2_err_class(PTR_ERR(inode));
inode_lock(&inode->v);
ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
return ret;
}
-static int bch2_mkdir(struct user_namespace *mnt_userns,
+static int bch2_mkdir(struct mnt_idmap *idmap,
struct inode *vdir, struct dentry *dentry, umode_t mode)
{
- return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFDIR, 0);
+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
}
-static int bch2_rename2(struct user_namespace *mnt_userns,
+static int bch2_rename2(struct mnt_idmap *idmap,
struct inode *src_vdir, struct dentry *src_dentry,
struct inode *dst_vdir, struct dentry *dst_dentry,
unsigned flags)
goto err;
}
- ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+ ret = commit_do(&trans, NULL, NULL, 0,
bch2_rename_trans(&trans,
inode_inum(src_dir), &src_dir_u,
inode_inum(dst_dir), &dst_dir_u,
return ret;
}
-static void bch2_setattr_copy(struct user_namespace *mnt_userns,
+static void bch2_setattr_copy(struct mnt_idmap *idmap,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
struct iattr *attr)
unsigned int ia_valid = attr->ia_valid;
if (ia_valid & ATTR_UID)
- bi->bi_uid = from_kuid(mnt_userns, attr->ia_uid);
+ bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
if (ia_valid & ATTR_GID)
- bi->bi_gid = from_kgid(mnt_userns, attr->ia_gid);
+ bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
if (ia_valid & ATTR_SIZE)
bi->bi_size = attr->ia_size;
: inode->v.i_gid;
if (!in_group_p(gid) &&
- !capable_wrt_inode_uidgid(mnt_userns, &inode->v, CAP_FSETID))
+ !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
mode &= ~S_ISGID;
bi->bi_mode = mode;
}
}
-int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
+int bch2_setattr_nonsize(struct mnt_idmap *idmap,
struct bch_inode_info *inode,
struct iattr *attr)
{
qid = inode->ei_qid;
if (attr->ia_valid & ATTR_UID)
- qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
+ qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
if (attr->ia_valid & ATTR_GID)
- qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
+ qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
KEY_TYPE_QUOTA_PREALLOC);
if (ret)
goto btree_err;
- bch2_setattr_copy(mnt_userns, inode, &inode_u, attr);
+ bch2_setattr_copy(idmap, inode, &inode_u, attr);
if (attr->ia_valid & ATTR_MODE) {
ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
btree_err:
bch2_trans_iter_exit(&trans, &inode_iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
goto err_trans;
err:
mutex_unlock(&inode->ei_update_lock);
- return ret;
+ return bch2_err_class(ret);
}
-static int bch2_getattr(struct user_namespace *mnt_userns,
+static int bch2_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned query_flags)
{
return 0;
}
-static int bch2_setattr(struct user_namespace *mnt_userns,
+static int bch2_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
lockdep_assert_held(&inode->v.i_rwsem);
- ret = setattr_prepare(mnt_userns, dentry, iattr);
+ ret = setattr_prepare(idmap, dentry, iattr);
if (ret)
return ret;
return iattr->ia_valid & ATTR_SIZE
- ? bch2_truncate(mnt_userns, inode, iattr)
- : bch2_setattr_nonsize(mnt_userns, inode, iattr);
+ ? bch2_truncate(idmap, inode, iattr)
+ : bch2_setattr_nonsize(idmap, inode, iattr);
}
-static int bch2_tmpfile(struct user_namespace *mnt_userns,
- struct inode *vdir, struct dentry *dentry, umode_t mode)
+static int bch2_tmpfile(struct mnt_idmap *idmap,
+ struct inode *vdir, struct file *file, umode_t mode)
{
struct bch_inode_info *inode =
- __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0,
+ __bch2_create(idmap, to_bch_ei(vdir),
+ file->f_path.dentry, mode, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
if (IS_ERR(inode))
- return PTR_ERR(inode);
+ return bch2_err_class(PTR_ERR(inode));
- d_mark_tmpfile(dentry, &inode->v);
- d_instantiate(dentry, &inode->v);
- return 0;
+ d_mark_tmpfile(file, &inode->v);
+ d_instantiate(file->f_path.dentry, &inode->v);
+ return finish_open_simple(file, 0);
}
static int bch2_fill_extent(struct bch_fs *c,
int flags2 = 0;
u64 offset = p.ptr.offset;
+ if (p.ptr.unwritten)
+ flags2 |= FIEMAP_EXTENT_UNWRITTEN;
+
if (p.crc.compression_type)
flags2 |= FIEMAP_EXTENT_ENCODED;
else
start = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret && have_extent)
};
static const struct address_space_operations bch_address_space_operations = {
- .writepage = bch2_writepage,
- .readpage = bch2_readpage,
+ .read_folio = bch2_read_folio,
.writepages = bch2_writepages,
.readahead = bch2_readahead,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.write_begin = bch2_write_begin,
.write_end = bch2_write_end,
- .invalidatepage = bch2_invalidatepage,
- .releasepage = bch2_releasepage,
+ .invalidate_folio = bch2_invalidate_folio,
+ .release_folio = bch2_release_folio,
.direct_IO = noop_direct_IO,
#ifdef CONFIG_MIGRATION
- .migratepage = bch2_migrate_page,
+ .migrate_folio = filemap_migrate_folio,
#endif
.error_remove_page = generic_error_remove_page,
};
goto err;
if (k.k->type != KEY_TYPE_dirent) {
- ret = -ENOENT;
+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
goto err;
}
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
if (ret > 0)
- ret = -ENOENT;
+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
if (ret)
goto err;
memcpy(name, d.v->d_name, name_len);
name[name_len] = '\0';
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter1);
inode->v.i_op = &bch_special_inode_operations;
break;
}
+
+ mapping_set_large_folios(inode->v.i_mapping);
}
static struct inode *bch2_alloc_inode(struct super_block *sb)
inode_init_once(&inode->v);
mutex_init(&inode->ei_update_lock);
- pagecache_lock_init(&inode->ei_pagecache_lock);
+ two_state_lock_init(&inode->ei_pagecache_lock);
+ INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
mutex_init(&inode->ei_quota_lock);
return &inode->v;
ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
- return ret;
+ return bch2_err_class(ret);
}
static void bch2_evict_inode(struct inode *vinode)
KEY_TYPE_QUOTA_WARN);
bch2_inode_rm(c, inode_inum(inode));
}
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_del_init(&inode->ei_vfs_inode_list);
+ mutex_unlock(&c->vfs_inodes_lock);
}
-void bch2_evict_subvolume_inodes(struct bch_fs *c,
- snapshot_id_list *s)
+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
{
- struct super_block *sb = c->vfs_sb;
- struct inode *inode;
+ struct bch_inode_info *inode, **i;
+ DARRAY(struct bch_inode_info *) grabbed;
+ bool clean_pass = false, this_pass_clean;
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
- (inode->i_state & I_FREEING))
- continue;
+ /*
+ * Initially, we scan for inodes without I_DONTCACHE, then mark them to
+ * be pruned with d_mark_dontcache().
+ *
+ * Once we've had a clean pass where we didn't find any inodes without
+ * I_DONTCACHE, we wait for them to be freed:
+ */
- d_mark_dontcache(inode);
- d_prune_aliases(inode);
- }
- spin_unlock(&sb->s_inode_list_lock);
+ darray_init(&grabbed);
+ darray_make_room(&grabbed, 1024);
again:
cond_resched();
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
- (inode->i_state & I_FREEING))
+ this_pass_clean = true;
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
+ if (!snapshot_list_has_id(s, inode->ei_subvol))
continue;
- if (!(inode->i_state & I_DONTCACHE)) {
- d_mark_dontcache(inode);
- d_prune_aliases(inode);
- }
+ if (!(inode->v.i_state & I_DONTCACHE) &&
+ !(inode->v.i_state & I_FREEING)) {
+ this_pass_clean = false;
+
+ d_mark_dontcache(&inode->v);
+ d_prune_aliases(&inode->v);
+
+ /*
+ * If i_count was zero, we have to take and release a
+ * ref in order for I_DONTCACHE to be noticed and the
+ * inode to be dropped;
+ */
+
+ if (!atomic_read(&inode->v.i_count) &&
+ igrab(&inode->v) &&
+ darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN))
+ break;
+ } else if (clean_pass && this_pass_clean) {
+ wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
+ DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
- spin_lock(&inode->i_lock);
- if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) &&
- !(inode->i_state & I_FREEING)) {
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW);
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
+ mutex_unlock(&c->vfs_inodes_lock);
+
schedule();
finish_wait(wq, &wait.wq_entry);
goto again;
}
+ }
+ mutex_unlock(&c->vfs_inodes_lock);
+
+ darray_for_each(grabbed, i)
+ iput(&(*i)->v);
+ grabbed.nr = 0;
- spin_unlock(&inode->i_lock);
+ if (!clean_pass || !this_pass_clean) {
+ clean_pass = this_pass_clean;
+ goto again;
}
- spin_unlock(&sb->s_inode_list_lock);
+
+ darray_exit(&grabbed);
}
static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
static int bch2_sync_fs(struct super_block *sb, int wait)
{
struct bch_fs *c = sb->s_fs_info;
+ int ret;
if (c->opts.journal_flush_disabled)
return 0;
return 0;
}
- return bch2_journal_flush(&c->journal);
+ ret = bch2_journal_flush(&c->journal);
+ return bch2_err_class(ret);
}
static struct bch_fs *bch2_path_to_fs(const char *path)
ret = bch2_parse_mount_opts(c, &opts, data);
if (ret)
- return ret;
+ goto err;
if (opts.read_only != c->opts.read_only) {
down_write(&c->state_lock);
if (ret) {
bch_err(c, "error going rw: %i", ret);
up_write(&c->state_lock);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
sb->s_flags &= ~SB_RDONLY;
if (opts.errors >= 0)
c->opts.errors = opts.errors;
-
- return ret;
+err:
+ return bch2_err_class(ret);
}
static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
kfree(devs[0]);
kfree(devs);
- if (IS_ERR(sb))
- return ERR_CAST(sb);
+ if (IS_ERR(sb)) {
+ ret = PTR_ERR(sb);
+ ret = bch2_err_class(ret);
+ return ERR_PTR(ret);
+ }
c = sb->s_fs_info;
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
c->vfs_sb = sb;
- strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
+ strscpy(sb->s_id, c->name, sizeof(sb->s_id));
ret = super_setup_bdi(sb);
if (ret)
sb->s_shrink.seeks = 0;
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
- if (IS_ERR(vinode)) {
- bch_err(c, "error mounting: error getting root inode %i",
- (int) PTR_ERR(vinode));
- ret = PTR_ERR(vinode);
+ ret = PTR_ERR_OR_ZERO(vinode);
+ if (ret) {
+ bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
goto err_put_super;
}
err_put_super:
deactivate_locked_super(sb);
- return ERR_PTR(ret);
+ return ERR_PTR(bch2_err_class(ret));
}
static void bch2_kill_sb(struct super_block *sb)
void bch2_vfs_exit(void)
{
unregister_filesystem(&bcache_fs_type);
- if (bch2_inode_cache)
- kmem_cache_destroy(bch2_inode_cache);
+ kmem_cache_destroy(bch2_inode_cache);
}
int __init bch2_vfs_init(void)