struct bch_inode_unpacked *,
struct bch_subvolume *);
-static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
-{
- BUG_ON(atomic_long_read(&lock->v) == 0);
-
- if (atomic_long_sub_return_release(i, &lock->v) == 0)
- wake_up_all(&lock->wait);
-}
-
-static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
-{
- long v = atomic_long_read(&lock->v), old;
-
- do {
- old = v;
-
- if (i > 0 ? v < 0 : v > 0)
- return false;
- } while ((v = atomic_long_cmpxchg_acquire(&lock->v,
- old, old + i)) != old);
- return true;
-}
-
-static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
-{
- wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
-}
-
-void bch2_pagecache_add_put(struct pagecache_lock *lock)
-{
- __pagecache_lock_put(lock, 1);
-}
-
-bool bch2_pagecache_add_tryget(struct pagecache_lock *lock)
-{
- return __pagecache_lock_tryget(lock, 1);
-}
-
-void bch2_pagecache_add_get(struct pagecache_lock *lock)
-{
- __pagecache_lock_get(lock, 1);
-}
-
-void bch2_pagecache_block_put(struct pagecache_lock *lock)
-{
- __pagecache_lock_put(lock, -1);
-}
-
-void bch2_pagecache_block_get(struct pagecache_lock *lock)
-{
- __pagecache_lock_get(lock, -1);
-}
-
void bch2_inode_update_after_write(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
+ bch2_fs_fatal_err_on(ret == -ENOENT, c,
+ "inode %u:%llu not found when updating",
+ inode_inum(inode).subvol,
+ inode_inum(inode).inum);
+
bch2_trans_exit(&trans);
return ret < 0 ? ret : 0;
}
return ERR_PTR(ret);
}
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
+
unlock_new_inode(&inode->v);
return &inode->v;
inode = old;
} else {
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
/*
* we really don't want insert_inode_locked2() to be setting
* I_NEW...
(subvol_inum) { 0 }, 0);
if (IS_ERR(inode))
- return PTR_ERR(inode);
+ return bch2_err_class(PTR_ERR(inode));
d_instantiate(dentry, &inode->v);
return 0;
bch2_trans_init(&trans, c, 4, 1024);
ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL,
- bch2_unlink_trans(&trans,
- inode_inum(dir), &dir_u,
- &inode_u, &dentry->d_name,
- deleting_snapshot));
+ BTREE_INSERT_NOFAIL,
+ bch2_unlink_trans(&trans,
+ inode_inum(dir), &dir_u,
+ &inode_u, &dentry->d_name,
+ deleting_snapshot));
+ if (unlikely(ret))
+ goto err;
- if (likely(!ret)) {
- bch2_inode_update_after_write(&trans, dir, &dir_u,
- ATTR_MTIME|ATTR_CTIME);
- bch2_inode_update_after_write(&trans, inode, &inode_u,
- ATTR_MTIME);
- }
+ bch2_inode_update_after_write(&trans, dir, &dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+ bch2_inode_update_after_write(&trans, inode, &inode_u,
+ ATTR_MTIME);
+ if (inode_u.bi_subvol) {
+ /*
+ * Subvolume deletion is asynchronous, but we still want to tell
+ * the VFS that it's been deleted here:
+ */
+ set_nlink(&inode->v, 0);
+ }
+err:
bch2_trans_exit(&trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
- if (unlikely(IS_ERR(inode)))
- return PTR_ERR(inode);
+ if (IS_ERR(inode))
+ return bch2_err_class(PTR_ERR(inode));
inode_lock(&inode->v);
ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
qid = inode->ei_qid;
if (attr->ia_valid & ATTR_UID)
- qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
+ qid.q[QTYP_USR] = from_kuid(mnt_userns, attr->ia_uid);
if (attr->ia_valid & ATTR_GID)
- qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
+ qid.q[QTYP_GRP] = from_kgid(mnt_userns, attr->ia_gid);
ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
KEY_TYPE_QUOTA_PREALLOC);
}
static int bch2_tmpfile(struct user_namespace *mnt_userns,
- struct inode *vdir, struct dentry *dentry, umode_t mode)
+ struct inode *vdir, struct file *file, umode_t mode)
{
struct bch_inode_info *inode =
- __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0,
+ __bch2_create(mnt_userns, to_bch_ei(vdir),
+ file->f_path.dentry, mode, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
if (IS_ERR(inode))
- return PTR_ERR(inode);
+ return bch2_err_class(PTR_ERR(inode));
- d_mark_tmpfile(dentry, &inode->v);
- d_instantiate(dentry, &inode->v);
- return 0;
+ d_mark_tmpfile(file, &inode->v);
+ d_instantiate(file->f_path.dentry, &inode->v);
+ return finish_open_simple(file, 0);
}
static int bch2_fill_extent(struct bch_fs *c,
int flags2 = 0;
u64 offset = p.ptr.offset;
+ if (p.ptr.unwritten)
+ flags2 |= FIEMAP_EXTENT_UNWRITTEN;
+
if (p.crc.compression_type)
flags2 |= FIEMAP_EXTENT_ENCODED;
else
.release_folio = bch2_release_folio,
.direct_IO = noop_direct_IO,
#ifdef CONFIG_MIGRATION
- .migratepage = bch2_migrate_page,
+ .migrate_folio = filemap_migrate_folio,
#endif
.error_remove_page = generic_error_remove_page,
};
inode->v.i_op = &bch_special_inode_operations;
break;
}
+
+ mapping_set_large_folios(inode->v.i_mapping);
}
static struct inode *bch2_alloc_inode(struct super_block *sb)
inode_init_once(&inode->v);
mutex_init(&inode->ei_update_lock);
- pagecache_lock_init(&inode->ei_pagecache_lock);
+ two_state_lock_init(&inode->ei_pagecache_lock);
+ INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
mutex_init(&inode->ei_quota_lock);
return &inode->v;
KEY_TYPE_QUOTA_WARN);
bch2_inode_rm(c, inode_inum(inode));
}
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_del_init(&inode->ei_vfs_inode_list);
+ mutex_unlock(&c->vfs_inodes_lock);
}
-void bch2_evict_subvolume_inodes(struct bch_fs *c,
- snapshot_id_list *s)
+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
{
- struct super_block *sb = c->vfs_sb;
- struct inode *inode;
+ struct bch_inode_info *inode, **i;
+ DARRAY(struct bch_inode_info *) grabbed;
+ bool clean_pass = false, this_pass_clean;
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
- (inode->i_state & I_FREEING))
- continue;
+ /*
+ * Initially, we scan for inodes without I_DONTCACHE, then mark them to
+ * be pruned with d_mark_dontcache().
+ *
+ * Once we've had a clean pass where we didn't find any inodes without
+ * I_DONTCACHE, we wait for them to be freed:
+ */
- d_mark_dontcache(inode);
- d_prune_aliases(inode);
- }
- spin_unlock(&sb->s_inode_list_lock);
+ darray_init(&grabbed);
+ darray_make_room(&grabbed, 1024);
again:
cond_resched();
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
- (inode->i_state & I_FREEING))
+ this_pass_clean = true;
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
+ if (!snapshot_list_has_id(s, inode->ei_subvol))
continue;
- if (!(inode->i_state & I_DONTCACHE)) {
- d_mark_dontcache(inode);
- d_prune_aliases(inode);
- }
+ if (!(inode->v.i_state & I_DONTCACHE) &&
+ !(inode->v.i_state & I_FREEING)) {
+ this_pass_clean = false;
+
+ d_mark_dontcache(&inode->v);
+ d_prune_aliases(&inode->v);
+
+ /*
+ * If i_count was zero, we have to take and release a
+ * ref in order for I_DONTCACHE to be noticed and the
+ * inode to be dropped;
+ */
+
+ if (!atomic_read(&inode->v.i_count) &&
+ igrab(&inode->v) &&
+ darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN))
+ break;
+ } else if (clean_pass && this_pass_clean) {
+ wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
+ DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
- spin_lock(&inode->i_lock);
- if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) &&
- !(inode->i_state & I_FREEING)) {
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW);
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
+ mutex_unlock(&c->vfs_inodes_lock);
+
schedule();
finish_wait(wq, &wait.wq_entry);
goto again;
}
+ }
+ mutex_unlock(&c->vfs_inodes_lock);
+
+ darray_for_each(grabbed, i)
+ iput(&(*i)->v);
+ grabbed.nr = 0;
- spin_unlock(&inode->i_lock);
+ if (!clean_pass || !this_pass_clean) {
+ clean_pass = this_pass_clean;
+ goto again;
}
- spin_unlock(&sb->s_inode_list_lock);
+
+ darray_exit(&grabbed);
}
static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
kfree(devs[0]);
kfree(devs);
- if (IS_ERR(sb))
- return ERR_CAST(sb);
+ if (IS_ERR(sb)) {
+ ret = PTR_ERR(sb);
+ ret = bch2_err_class(ret);
+ return ERR_PTR(ret);
+ }
c = sb->s_fs_info;
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
c->vfs_sb = sb;
- strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
+ strscpy(sb->s_id, c->name, sizeof(sb->s_id));
ret = super_setup_bdi(sb);
if (ret)
err_put_super:
deactivate_locked_super(sb);
- return ERR_PTR(ret);
+ return ERR_PTR(bch2_err_class(ret));
}
static void bch2_kill_sb(struct super_block *sb)
void bch2_vfs_exit(void)
{
unregister_filesystem(&bcache_fs_type);
- if (bch2_inode_cache)
- kmem_cache_destroy(bch2_inode_cache);
+ kmem_cache_destroy(bch2_inode_cache);
}
int __init bch2_vfs_init(void)