X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Ffs.c;h=a324278b6f43d1df3dcf0d67664c2b52d0458ec3;hb=7e3538959966b80be3d4cfcb7076c8ab40a4be11;hp=43688cd34de9a992a3c7833be717fa08488a998f;hpb=ea83a3985d28372d56ec7cea6e73907551869f63;p=bcachefs-tools-debian diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 43688cd..a324278 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -12,8 +12,10 @@ #include "fs-ioctl.h" #include "fsck.h" #include "inode.h" +#include "io.h" #include "journal.h" #include "keylist.h" +#include "quota.h" #include "super.h" #include "xattr.h" @@ -32,6 +34,19 @@ static void bch2_vfs_inode_init(struct bch_fs *, struct bch_inode_info *, struct bch_inode_unpacked *); +static void journal_seq_copy(struct bch_inode_info *dst, + u64 journal_seq) +{ + u64 old, v = READ_ONCE(dst->ei_journal_seq); + + do { + old = v; + + if (old >= journal_seq) + break; + } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); +} + /* * I_SIZE_DIRTY requires special handling: * @@ -60,117 +75,181 @@ static void bch2_vfs_inode_init(struct bch_fs *, * be set explicitly. */ -int __must_check __bch2_write_inode(struct bch_fs *c, - struct bch_inode_info *inode, - inode_set_fn set, - void *p) +void bch2_inode_update_after_write(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + unsigned fields) { - struct btree_iter iter; - struct bch_inode_unpacked inode_u; - struct bkey_inode_buf inode_p; - u64 inum = inode->v.i_ino; - unsigned i_nlink = READ_ONCE(inode->v.i_nlink); - int ret; - - /* - * We can't write an inode with i_nlink == 0 because it's stored biased; - * however, we don't need to because if i_nlink is 0 the inode is - * getting deleted when it's evicted. - */ - if (!i_nlink) - return 0; + set_nlink(&inode->v, bi->bi_flags & BCH_INODE_UNLINKED + ? 0 + : bi->bi_nlink + nlink_bias(inode->v.i_mode)); + i_uid_write(&inode->v, bi->bi_uid); + i_gid_write(&inode->v, bi->bi_gid); + inode->v.i_mode = bi->bi_mode; - lockdep_assert_held(&inode->ei_update_lock); + if (fields & ATTR_ATIME) + inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime); + if (fields & ATTR_MTIME) + inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime); + if (fields & ATTR_CTIME) + inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime); - bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0), - BTREE_ITER_INTENT); + inode->ei_inode = *bi; - do { - struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter); + bch2_inode_flags_to_vfs(inode); +} - if ((ret = btree_iter_err(k))) - goto out; +int __must_check bch2_write_inode_trans(struct btree_trans *trans, + struct bch_inode_info *inode, + struct bch_inode_unpacked *inode_u, + inode_set_fn set, + void *p) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter = NULL; + struct bkey_inode_buf *inode_p; + int ret; - if (WARN_ONCE(k.k->type != BCH_INODE_FS, - "inode %llu not found when updating", inum)) { - bch2_btree_iter_unlock(&iter); - return -ENOENT; - } + lockdep_assert_held(&inode->ei_update_lock); - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); - if (WARN_ONCE(ret, - "error %i unpacking inode %llu", ret, inum)) { - ret = -ENOENT; - break; - } + if (c->opts.new_inode_updates) { + /* XXX: Don't do this with btree locks held */ + if (!inode->ei_inode_update) + inode->ei_inode_update = + bch2_deferred_update_alloc(c, BTREE_ID_INODES, 64); + } else { + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, + POS(inode->v.i_ino, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + /* The btree node lock is our lock on the inode: */ + ret = bch2_btree_iter_traverse(iter); + if (ret) + return ret; + } - if (set) { - ret = set(inode, &inode_u, p); - if (ret) - goto out; - } + *inode_u = inode->ei_inode; - BUG_ON(i_nlink < nlink_bias(inode->v.i_mode)); - - inode_u.bi_mode = inode->v.i_mode; - inode_u.bi_uid = i_uid_read(&inode->v); - inode_u.bi_gid = i_gid_read(&inode->v); - inode_u.bi_nlink= i_nlink - nlink_bias(inode->v.i_mode); - inode_u.bi_dev = inode->v.i_rdev; - inode_u.bi_atime= timespec_to_bch2_time(c, inode->v.i_atime); - inode_u.bi_mtime= timespec_to_bch2_time(c, inode->v.i_mtime); - inode_u.bi_ctime= timespec_to_bch2_time(c, inode->v.i_ctime); - - bch2_inode_pack(&inode_p, &inode_u); - - ret = bch2_btree_insert_at(c, NULL, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOFAIL, - BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i)); - } while (ret == -EINTR); - - if (!ret) { - inode->ei_size = inode_u.bi_size; - inode->ei_flags = inode_u.bi_flags; + if (set) { + ret = set(inode, inode_u, p); + if (ret) + return ret; } -out: - bch2_btree_iter_unlock(&iter); - return ret < 0 ? ret : 0; + inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); + if (IS_ERR(inode_p)) + return PTR_ERR(inode_p); + + bch2_inode_pack(inode_p, inode_u); + + if (!inode->ei_inode_update) + bch2_trans_update(trans, + BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i)); + else + bch2_trans_update(trans, + BTREE_INSERT_DEFERRED(inode->ei_inode_update, + &inode_p->inode.k_i)); + + return 0; } int __must_check bch2_write_inode(struct bch_fs *c, - struct bch_inode_info *inode) + struct bch_inode_info *inode, + inode_set_fn set, + void *p, unsigned fields) { - return __bch2_write_inode(c, inode, NULL, NULL); + struct btree_trans trans; + struct bch_inode_unpacked inode_u; + int ret; + + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_write_inode_trans(&trans, inode, &inode_u, set, p) ?: + bch2_trans_commit(&trans, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL); + if (ret == -EINTR) + goto retry; + + /* + * the btree node lock protects inode->ei_inode, not ei_update_lock; + * this is important for inode updates via bchfs_write_index_update + */ + if (!ret) + bch2_inode_update_after_write(c, inode, &inode_u, fields); + + bch2_trans_exit(&trans); + return ret < 0 ? ret : 0; } -int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *inode) +int bch2_fs_quota_transfer(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch_qid new_qid, + unsigned qtypes, + enum quota_acct_mode mode) { + unsigned i; int ret; - mutex_lock(&inode->ei_update_lock); - inc_nlink(&inode->v); - ret = bch2_write_inode(c, inode); - mutex_unlock(&inode->ei_update_lock); + qtypes &= enabled_qtypes(c); + + for (i = 0; i < QTYP_NR; i++) + if (new_qid.q[i] == inode->ei_qid.q[i]) + qtypes &= ~(1U << i); + + if (!qtypes) + return 0; + + mutex_lock(&inode->ei_quota_lock); + + ret = bch2_quota_transfer(c, qtypes, new_qid, + inode->ei_qid, + inode->v.i_blocks + + inode->ei_quota_reserved, + mode); + if (!ret) + for (i = 0; i < QTYP_NR; i++) + if (qtypes & (1 << i)) + inode->ei_qid.q[i] = new_qid.q[i]; + + mutex_unlock(&inode->ei_quota_lock); return ret; } -int bch2_dec_nlink(struct bch_fs *c, struct bch_inode_info *inode) +int bch2_reinherit_attrs_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) { - int ret = 0; + struct bch_inode_info *dir = p; + u64 src, dst; + unsigned id; + int ret = 1; - mutex_lock(&inode->ei_update_lock); - drop_nlink(&inode->v); - ret = bch2_write_inode(c, inode); - mutex_unlock(&inode->ei_update_lock); + for (id = 0; id < Inode_opt_nr; id++) { + if (bi->bi_fields_set & (1 << id)) + continue; + + src = bch2_inode_opt_get(&dir->ei_inode, id); + dst = bch2_inode_opt_get(bi, id); + + if (src == dst) + continue; + + bch2_inode_opt_set(bi, id, src); + ret = 0; + } return ret; } -static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) +struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) { struct bch_inode_unpacked inode_u; struct bch_inode_info *inode; @@ -197,117 +276,178 @@ static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) return &inode->v; } -static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c, - struct bch_inode_info *dir, - umode_t mode, dev_t rdev) +static void bch2_inode_init_owner(struct bch_inode_unpacked *inode_u, + const struct inode *dir, umode_t mode) { - struct posix_acl *default_acl = NULL, *acl = NULL; - struct bch_inode_info *inode; + kuid_t uid = current_fsuid(); + kgid_t gid; + + if (dir && dir->i_mode & S_ISGID) { + gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + gid = current_fsgid(); + + inode_u->bi_uid = from_kuid(dir->i_sb->s_user_ns, uid); + inode_u->bi_gid = from_kgid(dir->i_sb->s_user_ns, gid); + inode_u->bi_mode = mode; +} + +static int inode_update_for_create_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_inode_unpacked *new_inode = p; + + bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); + + if (S_ISDIR(new_inode->bi_mode)) + bi->bi_nlink++; + + return 0; +} + +static struct bch_inode_info * +__bch2_create(struct bch_inode_info *dir, struct dentry *dentry, + umode_t mode, dev_t rdev, bool tmpfile) +{ + struct bch_fs *c = dir->v.i_sb->s_fs_info; + struct btree_trans trans; + struct bch_inode_unpacked dir_u; + struct bch_inode_info *inode, *old; struct bch_inode_unpacked inode_u; + struct bch_hash_info hash_info; + struct posix_acl *default_acl = NULL, *acl = NULL; + u64 journal_seq = 0; int ret; - inode = to_bch_ei(new_inode(c->vfs_sb)); - if (unlikely(!inode)) - return ERR_PTR(-ENOMEM); + bch2_inode_init(c, &inode_u, 0, 0, 0, rdev, &dir->ei_inode); + bch2_inode_init_owner(&inode_u, &dir->v, mode); + + hash_info = bch2_hash_info_init(c, &inode_u); - inode_init_owner(&inode->v, &dir->v, mode); + if (tmpfile) + inode_u.bi_flags |= BCH_INODE_UNLINKED; + + ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + return ERR_PTR(ret); #ifdef CONFIG_BCACHEFS_POSIX_ACL - ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl); - if (ret) { - make_bad_inode(&inode->v); + ret = posix_acl_create(&dir->v, &inode_u.bi_mode, &default_acl, &acl); + if (ret) goto err; - } #endif - bch2_inode_init(c, &inode_u, - i_uid_read(&inode->v), - i_gid_read(&inode->v), - inode->v.i_mode, rdev); - ret = bch2_inode_create(c, &inode_u, - BLOCKDEV_INODE_MAX, 0, - &c->unused_inode_hint); - if (unlikely(ret)) { - /* - * indicate to bch_evict_inode that the inode was never actually - * created: - */ - make_bad_inode(&inode->v); + /* + * preallocate vfs inode before btree transaction, so that nothing can + * fail after the transaction succeeds: + */ + inode = to_bch_ei(new_inode(c->vfs_sb)); + if (unlikely(!inode)) { + ret = -ENOMEM; goto err; } - bch2_vfs_inode_init(c, inode, &inode_u); + if (!tmpfile) + mutex_lock(&dir->ei_update_lock); + + bch2_trans_init(&trans, c, 8, 1024); +retry: + bch2_trans_begin(&trans); + + ret = __bch2_inode_create(&trans, &inode_u, + BLOCKDEV_INODE_MAX, 0, + &c->unused_inode_hint) ?: + (default_acl + ? bch2_set_acl_trans(&trans, &inode_u, &hash_info, + default_acl, ACL_TYPE_DEFAULT) + : 0) ?: + (acl + ? bch2_set_acl_trans(&trans, &inode_u, &hash_info, + acl, ACL_TYPE_ACCESS) + : 0) ?: + (!tmpfile + ? __bch2_dirent_create(&trans, dir->v.i_ino, + &dir->ei_str_hash, + mode_to_type(mode), + &dentry->d_name, + inode_u.bi_inum, + BCH_HASH_SET_MUST_CREATE) + : 0) ?: + (!tmpfile + ? bch2_write_inode_trans(&trans, dir, &dir_u, + inode_update_for_create_fn, + &inode_u) + : 0) ?: + bch2_trans_commit(&trans, NULL, + &journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); + if (ret == -EINTR) + goto retry; + if (unlikely(ret)) + goto err_trans; - if (default_acl) { - ret = bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT); - if (unlikely(ret)) - goto err; + if (!tmpfile) { + bch2_inode_update_after_write(c, dir, &dir_u, + ATTR_MTIME|ATTR_CTIME); + journal_seq_copy(dir, journal_seq); + mutex_unlock(&dir->ei_update_lock); } - if (acl) { - ret = bch2_set_acl(&inode->v, acl, ACL_TYPE_ACCESS); - if (unlikely(ret)) - goto err; + bch2_vfs_inode_init(c, inode, &inode_u); + journal_seq_copy(inode, journal_seq); + + set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); + set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); + + /* + * we must insert the new inode into the inode cache before calling + * bch2_trans_exit() and dropping locks, else we could race with another + * thread pulling the inode in and modifying it: + */ + + old = to_bch_ei(insert_inode_locked2(&inode->v)); + if (unlikely(old)) { + /* + * We raced, another process pulled the new inode into cache + * before us: + */ + old->ei_journal_seq = inode->ei_journal_seq; + make_bad_inode(&inode->v); + iput(&inode->v); + + inode = old; + } else { + /* + * we really don't want insert_inode_locked2() to be setting + * I_NEW... + */ + unlock_new_inode(&inode->v); } - insert_inode_hash(&inode->v); - atomic_long_inc(&c->nr_inodes); + bch2_trans_exit(&trans); out: posix_acl_release(default_acl); posix_acl_release(acl); return inode; -err: - clear_nlink(&inode->v); +err_trans: + if (!tmpfile) + mutex_unlock(&dir->ei_update_lock); + + bch2_trans_exit(&trans); + make_bad_inode(&inode->v); iput(&inode->v); +err: + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); inode = ERR_PTR(ret); goto out; } -static int bch2_vfs_dirent_create(struct bch_fs *c, - struct bch_inode_info *dir, - u8 type, const struct qstr *name, - u64 dst) -{ - int ret; - - ret = bch2_dirent_create(c, dir->v.i_ino, &dir->ei_str_hash, - type, name, dst, - &dir->ei_journal_seq, - BCH_HASH_SET_MUST_CREATE); - if (unlikely(ret)) - return ret; - - dir->v.i_mtime = dir->v.i_ctime = current_fs_time(c->vfs_sb); - mark_inode_dirty_sync(&dir->v); - return 0; -} - -static int __bch2_create(struct bch_inode_info *dir, struct dentry *dentry, - umode_t mode, dev_t rdev) -{ - struct bch_fs *c = dir->v.i_sb->s_fs_info; - struct bch_inode_info *inode; - int ret; - - inode = bch2_vfs_inode_create(c, dir, mode, rdev); - if (unlikely(IS_ERR(inode))) - return PTR_ERR(inode); - - ret = bch2_vfs_dirent_create(c, dir, mode_to_type(mode), - &dentry->d_name, inode->v.i_ino); - if (unlikely(ret)) { - clear_nlink(&inode->v); - iput(&inode->v); - return ret; - } - - if (dir->ei_journal_seq > inode->ei_journal_seq) - inode->ei_journal_seq = dir->ei_journal_seq; - - d_instantiate(dentry, &inode->v); - return 0; -} - /* methods */ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, @@ -331,7 +471,69 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, static int bch2_create(struct inode *vdir, struct dentry *dentry, umode_t mode, bool excl) { - return __bch2_create(to_bch_ei(vdir), dentry, mode|S_IFREG, 0); + struct bch_inode_info *inode = + __bch2_create(to_bch_ei(vdir), dentry, mode|S_IFREG, 0, false); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_instantiate(dentry, &inode->v); + return 0; +} + +static int inode_update_for_link_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + bi->bi_ctime = bch2_current_time(c); + + if (bi->bi_flags & BCH_INODE_UNLINKED) + bi->bi_flags &= ~BCH_INODE_UNLINKED; + else + bi->bi_nlink++; + + return 0; +} + +static int __bch2_link(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch_inode_info *dir, + struct dentry *dentry) +{ + struct btree_trans trans; + struct bch_inode_unpacked inode_u; + int ret; + + mutex_lock(&inode->ei_update_lock); + bch2_trans_init(&trans, c, 4, 1024); +retry: + bch2_trans_begin(&trans); + + ret = __bch2_dirent_create(&trans, dir->v.i_ino, + &dir->ei_str_hash, + mode_to_type(inode->v.i_mode), + &dentry->d_name, + inode->v.i_ino, + BCH_HASH_SET_MUST_CREATE) ?: + bch2_write_inode_trans(&trans, inode, &inode_u, + inode_update_for_link_fn, + NULL) ?: + bch2_trans_commit(&trans, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); + + if (ret == -EINTR) + goto retry; + + if (likely(!ret)) + bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); + + bch2_trans_exit(&trans); + mutex_unlock(&inode->ei_update_lock); + return ret; } static int bch2_link(struct dentry *old_dentry, struct inode *vdir, @@ -344,23 +546,41 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, lockdep_assert_held(&inode->v.i_rwsem); - inode->v.i_ctime = current_fs_time(dir->v.i_sb); - - ret = bch2_inc_nlink(c, inode); - if (ret) + ret = __bch2_link(c, inode, dir, dentry); + if (unlikely(ret)) return ret; ihold(&inode->v); + d_instantiate(dentry, &inode->v); + return 0; +} - ret = bch2_vfs_dirent_create(c, dir, mode_to_type(inode->v.i_mode), - &dentry->d_name, inode->v.i_ino); - if (unlikely(ret)) { - bch2_dec_nlink(c, inode); - iput(&inode->v); - return ret; - } +static int inode_update_dir_for_unlink_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_inode_info *unlink_inode = p; + + bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); + + bi->bi_nlink -= S_ISDIR(unlink_inode->v.i_mode); + + return 0; +} + +static int inode_update_for_unlink_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + bi->bi_ctime = bch2_current_time(c); + if (bi->bi_nlink) + bi->bi_nlink--; + else + bi->bi_flags |= BCH_INODE_UNLINKED; - d_instantiate(dentry, &inode->v); return 0; } @@ -369,28 +589,46 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_unpacked dir_u, inode_u; + struct btree_trans trans; int ret; - lockdep_assert_held(&inode->v.i_rwsem); - - ret = bch2_dirent_delete(c, dir->v.i_ino, &dir->ei_str_hash, - &dentry->d_name, &dir->ei_journal_seq); + bch2_lock_inodes(dir, inode); + bch2_trans_init(&trans, c, 4, 1024); +retry: + bch2_trans_begin(&trans); + + ret = __bch2_dirent_delete(&trans, dir->v.i_ino, + &dir->ei_str_hash, + &dentry->d_name) ?: + bch2_write_inode_trans(&trans, dir, &dir_u, + inode_update_dir_for_unlink_fn, + inode) ?: + bch2_write_inode_trans(&trans, inode, &inode_u, + inode_update_for_unlink_fn, + NULL) ?: + bch2_trans_commit(&trans, NULL, + &dir->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL); + if (ret == -EINTR) + goto retry; if (ret) - return ret; + goto err; if (dir->ei_journal_seq > inode->ei_journal_seq) inode->ei_journal_seq = dir->ei_journal_seq; - inode->v.i_ctime = dir->v.i_ctime; - - if (S_ISDIR(inode->v.i_mode)) { - bch2_dec_nlink(c, dir); - drop_nlink(&inode->v); - } - - bch2_dec_nlink(c, inode); + bch2_inode_update_after_write(c, dir, &dir_u, + ATTR_MTIME|ATTR_CTIME); + bch2_inode_update_after_write(c, inode, &inode_u, + ATTR_MTIME); +err: + bch2_trans_exit(&trans); + bch2_unlock_inodes(dir, inode); - return 0; + return ret; } static int bch2_symlink(struct inode *vdir, struct dentry *dentry, @@ -400,7 +638,7 @@ static int bch2_symlink(struct inode *vdir, struct dentry *dentry, struct bch_inode_info *dir = to_bch_ei(vdir), *inode; int ret; - inode = bch2_vfs_inode_create(c, dir, S_IFLNK|S_IRWXUGO, 0); + inode = __bch2_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0, true); if (unlikely(IS_ERR(inode))) return PTR_ERR(inode); @@ -415,37 +653,28 @@ static int bch2_symlink(struct inode *vdir, struct dentry *dentry, if (unlikely(ret)) goto err; - /* XXX: racy */ - if (dir->ei_journal_seq < inode->ei_journal_seq) - dir->ei_journal_seq = inode->ei_journal_seq; + journal_seq_copy(dir, inode->ei_journal_seq); - ret = bch2_vfs_dirent_create(c, dir, DT_LNK, &dentry->d_name, - inode->v.i_ino); + ret = __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) goto err; d_instantiate(dentry, &inode->v); return 0; err: - clear_nlink(&inode->v); iput(&inode->v); return ret; } static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode) { - struct bch_fs *c = vdir->i_sb->s_fs_info; - struct bch_inode_info *dir = to_bch_ei(vdir); - int ret; - - lockdep_assert_held(&dir->v.i_rwsem); + struct bch_inode_info *inode = + __bch2_create(to_bch_ei(vdir), dentry, mode|S_IFDIR, 0, false); - ret = __bch2_create(dir, dentry, mode|S_IFDIR, 0); - if (unlikely(ret)) - return ret; - - bch2_inc_nlink(c, dir); + if (IS_ERR(inode)) + return PTR_ERR(inode); + d_instantiate(dentry, &inode->v); return 0; } @@ -462,152 +691,355 @@ static int bch2_rmdir(struct inode *vdir, struct dentry *dentry) static int bch2_mknod(struct inode *vdir, struct dentry *dentry, umode_t mode, dev_t rdev) { - return __bch2_create(to_bch_ei(vdir), dentry, mode, rdev); + struct bch_inode_info *inode = + __bch2_create(to_bch_ei(vdir), dentry, mode, rdev, false); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_instantiate(dentry, &inode->v); + return 0; } -static int bch2_rename(struct bch_fs *c, - struct bch_inode_info *old_dir, - struct dentry *old_dentry, - struct bch_inode_info *new_dir, - struct dentry *new_dentry) +struct rename_info { + u64 now; + struct bch_inode_info *src_dir; + struct bch_inode_info *dst_dir; + struct bch_inode_info *src_inode; + struct bch_inode_info *dst_inode; + enum bch_rename_mode mode; +}; + +static int inode_update_for_rename_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) { - struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode); - struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode); - struct timespec now = current_fs_time(old_dir->v.i_sb); + struct rename_info *info = p; int ret; - lockdep_assert_held(&old_dir->v.i_rwsem); - lockdep_assert_held(&new_dir->v.i_rwsem); + if (inode == info->src_dir) { + bi->bi_nlink -= S_ISDIR(info->src_inode->v.i_mode); + bi->bi_nlink += info->dst_inode && + S_ISDIR(info->dst_inode->v.i_mode) && + info->mode == BCH_RENAME_EXCHANGE; + } + + if (inode == info->dst_dir) { + bi->bi_nlink += S_ISDIR(info->src_inode->v.i_mode); + bi->bi_nlink -= info->dst_inode && + S_ISDIR(info->dst_inode->v.i_mode); + } + + if (inode == info->src_inode) { + ret = bch2_reinherit_attrs_fn(inode, bi, info->dst_dir); + + BUG_ON(!ret && S_ISDIR(info->src_inode->v.i_mode)); + } + + if (inode == info->dst_inode && + info->mode == BCH_RENAME_EXCHANGE) { + ret = bch2_reinherit_attrs_fn(inode, bi, info->src_dir); + + BUG_ON(!ret && S_ISDIR(info->dst_inode->v.i_mode)); + } + + if (inode == info->dst_inode && + info->mode == BCH_RENAME_OVERWRITE) { + BUG_ON(bi->bi_nlink && + S_ISDIR(info->dst_inode->v.i_mode)); + + if (bi->bi_nlink) + bi->bi_nlink--; + else + bi->bi_flags |= BCH_INODE_UNLINKED; + } - if (new_inode) - filemap_write_and_wait_range(old_inode->v.i_mapping, - 0, LLONG_MAX); + if (inode == info->src_dir || + inode == info->dst_dir) + bi->bi_mtime = info->now; + bi->bi_ctime = info->now; - if (new_inode && S_ISDIR(old_inode->v.i_mode)) { - lockdep_assert_held(&new_inode->v.i_rwsem); + return 0; +} + +static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry, + struct inode *dst_vdir, struct dentry *dst_dentry, + unsigned flags) +{ + struct bch_fs *c = src_vdir->i_sb->s_fs_info; + struct rename_info i = { + .src_dir = to_bch_ei(src_vdir), + .dst_dir = to_bch_ei(dst_vdir), + .src_inode = to_bch_ei(src_dentry->d_inode), + .dst_inode = to_bch_ei(dst_dentry->d_inode), + .mode = flags & RENAME_EXCHANGE + ? BCH_RENAME_EXCHANGE + : dst_dentry->d_inode + ? BCH_RENAME_OVERWRITE : BCH_RENAME, + }; + struct btree_trans trans; + struct bch_inode_unpacked dst_dir_u, src_dir_u; + struct bch_inode_unpacked src_inode_u, dst_inode_u; + u64 journal_seq = 0; + int ret; - if (!S_ISDIR(new_inode->v.i_mode)) + if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) + return -EINVAL; + + if (i.mode == BCH_RENAME_OVERWRITE) { + if (S_ISDIR(i.src_inode->v.i_mode) != + S_ISDIR(i.dst_inode->v.i_mode)) return -ENOTDIR; - if (bch2_empty_dir(c, new_inode->v.i_ino)) + if (S_ISDIR(i.src_inode->v.i_mode) && + bch2_empty_dir(c, i.dst_inode->v.i_ino)) return -ENOTEMPTY; - ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE); - if (unlikely(ret)) + ret = filemap_write_and_wait_range(i.src_inode->v.i_mapping, + 0, LLONG_MAX); + if (ret) return ret; + } - clear_nlink(&new_inode->v); - bch2_dec_nlink(c, old_dir); - } else if (new_inode) { - lockdep_assert_held(&new_inode->v.i_rwsem); + bch2_trans_init(&trans, c, 8, 2048); - ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE); - if (unlikely(ret)) - return ret; + bch2_lock_inodes(i.src_dir, + i.dst_dir, + i.src_inode, + i.dst_inode); - new_inode->v.i_ctime = now; - bch2_dec_nlink(c, new_inode); - } else if (S_ISDIR(old_inode->v.i_mode)) { - ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &old_inode->ei_journal_seq, BCH_RENAME); - if (unlikely(ret)) - return ret; + if (S_ISDIR(i.src_inode->v.i_mode) && + inode_attrs_changing(i.dst_dir, i.src_inode)) { + ret = -EXDEV; + goto err; + } - bch2_inc_nlink(c, new_dir); - bch2_dec_nlink(c, old_dir); - } else { - ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &old_inode->ei_journal_seq, BCH_RENAME); - if (unlikely(ret)) - return ret; + if (i.mode == BCH_RENAME_EXCHANGE && + S_ISDIR(i.dst_inode->v.i_mode) && + inode_attrs_changing(i.src_dir, i.dst_inode)) { + ret = -EXDEV; + goto err; } - old_dir->v.i_ctime = old_dir->v.i_mtime = now; - new_dir->v.i_ctime = new_dir->v.i_mtime = now; - mark_inode_dirty_sync(&old_dir->v); - mark_inode_dirty_sync(&new_dir->v); + if (inode_attr_changing(i.dst_dir, i.src_inode, Inode_opt_project)) { + ret = bch2_fs_quota_transfer(c, i.src_inode, + i.dst_dir->ei_qid, + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + goto err; + } + + if (i.mode == BCH_RENAME_EXCHANGE && + inode_attr_changing(i.src_dir, i.dst_inode, Inode_opt_project)) { + ret = bch2_fs_quota_transfer(c, i.dst_inode, + i.src_dir->ei_qid, + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + goto err; + } + +retry: + bch2_trans_begin(&trans); + i.now = bch2_current_time(c); + + ret = bch2_dirent_rename(&trans, + i.src_dir, &src_dentry->d_name, + i.dst_dir, &dst_dentry->d_name, + i.mode) ?: + bch2_write_inode_trans(&trans, i.src_dir, &src_dir_u, + inode_update_for_rename_fn, &i) ?: + (i.src_dir != i.dst_dir + ? bch2_write_inode_trans(&trans, i.dst_dir, &dst_dir_u, + inode_update_for_rename_fn, &i) + : 0 ) ?: + bch2_write_inode_trans(&trans, i.src_inode, &src_inode_u, + inode_update_for_rename_fn, &i) ?: + (i.dst_inode + ? bch2_write_inode_trans(&trans, i.dst_inode, &dst_inode_u, + inode_update_for_rename_fn, &i) + : 0 ) ?: + bch2_trans_commit(&trans, NULL, + &journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); + if (ret == -EINTR) + goto retry; + if (unlikely(ret)) + goto err; - old_inode->v.i_ctime = now; - mark_inode_dirty_sync(&old_inode->v); + bch2_inode_update_after_write(c, i.src_dir, &src_dir_u, + ATTR_MTIME|ATTR_CTIME); + journal_seq_copy(i.src_dir, journal_seq); + + if (i.src_dir != i.dst_dir) { + bch2_inode_update_after_write(c, i.dst_dir, &dst_dir_u, + ATTR_MTIME|ATTR_CTIME); + journal_seq_copy(i.dst_dir, journal_seq); + } + + journal_seq_copy(i.src_inode, journal_seq); + if (i.dst_inode) + journal_seq_copy(i.dst_inode, journal_seq); + + bch2_inode_update_after_write(c, i.src_inode, &src_inode_u, + ATTR_CTIME); + if (i.dst_inode) + bch2_inode_update_after_write(c, i.dst_inode, &dst_inode_u, + ATTR_CTIME); +err: + bch2_trans_exit(&trans); + + bch2_fs_quota_transfer(c, i.src_inode, + bch_qid(&i.src_inode->ei_inode), + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_NOCHECK); + if (i.dst_inode) + bch2_fs_quota_transfer(c, i.dst_inode, + bch_qid(&i.dst_inode->ei_inode), + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_NOCHECK); + + bch2_unlock_inodes(i.src_dir, + i.dst_dir, + i.src_inode, + i.dst_inode); + + return ret; +} + +static int inode_update_for_setattr_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct iattr *attr = p; + unsigned int ia_valid = attr->ia_valid; + + if (ia_valid & ATTR_UID) + bi->bi_uid = from_kuid(inode->v.i_sb->s_user_ns, attr->ia_uid); + if (ia_valid & ATTR_GID) + bi->bi_gid = from_kgid(inode->v.i_sb->s_user_ns, attr->ia_gid); + + if (ia_valid & ATTR_ATIME) + bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime); + if (ia_valid & ATTR_MTIME) + bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime); + if (ia_valid & ATTR_CTIME) + bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime); + + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + kgid_t gid = ia_valid & ATTR_GID + ? attr->ia_gid + : inode->v.i_gid; + + if (!in_group_p(gid) && + !capable_wrt_inode_uidgid(&inode->v, CAP_FSETID)) + mode &= ~S_ISGID; + bi->bi_mode = mode; + } return 0; } -static int bch2_rename_exchange(struct bch_fs *c, - struct bch_inode_info *old_dir, - struct dentry *old_dentry, - struct bch_inode_info *new_dir, - struct dentry *new_dentry) +static int bch2_setattr_nonsize(struct bch_inode_info *inode, struct iattr *iattr) { - struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode); - struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode); - struct timespec now = current_fs_time(old_dir->v.i_sb); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_qid qid; + struct btree_trans trans; + struct bch_inode_unpacked inode_u; + struct posix_acl *acl = NULL; int ret; - ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &old_inode->ei_journal_seq, BCH_RENAME_EXCHANGE); - if (unlikely(ret)) - return ret; + mutex_lock(&inode->ei_update_lock); - if (S_ISDIR(old_inode->v.i_mode) != - S_ISDIR(new_inode->v.i_mode)) { - if (S_ISDIR(old_inode->v.i_mode)) { - bch2_inc_nlink(c, new_dir); - bch2_dec_nlink(c, old_dir); - } else { - bch2_dec_nlink(c, new_dir); - bch2_inc_nlink(c, old_dir); - } - } + qid = inode->ei_qid; - old_dir->v.i_ctime = old_dir->v.i_mtime = now; - new_dir->v.i_ctime = new_dir->v.i_mtime = now; - mark_inode_dirty_sync(&old_dir->v); - mark_inode_dirty_sync(&new_dir->v); + if (iattr->ia_valid & ATTR_UID) + qid.q[QTYP_USR] = from_kuid(&init_user_ns, iattr->ia_uid); - old_inode->v.i_ctime = now; - new_inode->v.i_ctime = now; - mark_inode_dirty_sync(&old_inode->v); - mark_inode_dirty_sync(&new_inode->v); + if (iattr->ia_valid & ATTR_GID) + qid.q[QTYP_GRP] = from_kgid(&init_user_ns, iattr->ia_gid); - return 0; + ret = bch2_fs_quota_transfer(c, inode, qid, ~0, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + goto err; + + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + kfree(acl); + acl = NULL; + + ret = bch2_write_inode_trans(&trans, inode, &inode_u, + inode_update_for_setattr_fn, iattr) ?: + (iattr->ia_valid & ATTR_MODE + ? bch2_acl_chmod(&trans, inode, iattr->ia_mode, &acl) + : 0) ?: + bch2_trans_commit(&trans, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL); + if (ret == -EINTR) + goto retry; + if (unlikely(ret)) + goto err_trans; + + bch2_inode_update_after_write(c, inode, &inode_u, iattr->ia_valid); + + if (acl) + set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); +err_trans: + bch2_trans_exit(&trans); +err: + mutex_unlock(&inode->ei_update_lock); + + return ret; } -static int bch2_rename2(struct inode *old_vdir, struct dentry *old_dentry, - struct inode *new_vdir, struct dentry *new_dentry, - unsigned flags) +static int bch2_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned query_flags) { - struct bch_fs *c = old_vdir->i_sb->s_fs_info; - struct bch_inode_info *old_dir = to_bch_ei(old_vdir); - struct bch_inode_info *new_dir = to_bch_ei(new_vdir); + struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; - if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) - return -EINVAL; + stat->dev = inode->v.i_sb->s_dev; + stat->ino = inode->v.i_ino; + stat->mode = inode->v.i_mode; + stat->nlink = inode->v.i_nlink; + stat->uid = inode->v.i_uid; + stat->gid = inode->v.i_gid; + stat->rdev = inode->v.i_rdev; + stat->size = i_size_read(&inode->v); + stat->atime = inode->v.i_atime; + stat->mtime = inode->v.i_mtime; + stat->ctime = inode->v.i_ctime; + stat->blksize = block_bytes(c); + stat->blocks = inode->v.i_blocks; + + if (request_mask & STATX_BTIME) { + stat->result_mask |= STATX_BTIME; + stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); + } - if (flags & RENAME_EXCHANGE) - return bch2_rename_exchange(c, old_dir, old_dentry, - new_dir, new_dentry); + if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (inode->ei_inode.bi_flags & BCH_INODE_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP) + stat->attributes |= STATX_ATTR_NODUMP; - return bch2_rename(c, old_dir, old_dentry, new_dir, new_dentry); + return 0; } static int bch2_setattr(struct dentry *dentry, struct iattr *iattr) { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret = 0; + int ret; lockdep_assert_held(&inode->v.i_rwsem); @@ -615,36 +1047,21 @@ static int bch2_setattr(struct dentry *dentry, struct iattr *iattr) if (ret) return ret; - if (iattr->ia_valid & ATTR_SIZE) { - ret = bch2_truncate(inode, iattr); - } else { - mutex_lock(&inode->ei_update_lock); - setattr_copy(&inode->v, iattr); - ret = bch2_write_inode(c, inode); - mutex_unlock(&inode->ei_update_lock); - } - - if (unlikely(ret)) - return ret; - - if (iattr->ia_valid & ATTR_MODE) - ret = posix_acl_chmod(&inode->v, inode->v.i_mode); - - return ret; + return iattr->ia_valid & ATTR_SIZE + ? bch2_truncate(inode, iattr) + : bch2_setattr_nonsize(inode, iattr); } static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode) { - struct bch_fs *c = vdir->i_sb->s_fs_info; - struct bch_inode_info *dir = to_bch_ei(vdir); - struct bch_inode_info *inode; + struct bch_inode_info *inode = + __bch2_create(to_bch_ei(vdir), dentry, mode, 0, true); - /* XXX: i_nlink should be 0? */ - inode = bch2_vfs_inode_create(c, dir, mode, 0); - if (unlikely(IS_ERR(inode))) + if (IS_ERR(inode)) return PTR_ERR(inode); - d_tmpfile(dentry, &inode->v); + d_mark_tmpfile(dentry, &inode->v); + d_instantiate(dentry, &inode->v); return 0; } @@ -653,33 +1070,33 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, { if (bkey_extent_is_data(&k->k)) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; int ret; - extent_for_each_ptr_crc(e, ptr, crc) { + extent_for_each_ptr_decode(e, p, entry) { int flags2 = 0; - u64 offset = ptr->offset; + u64 offset = p.ptr.offset; - if (crc.compression_type) + if (p.crc.compression_type) flags2 |= FIEMAP_EXTENT_ENCODED; else - offset += crc.offset; + offset += p.crc.offset; if ((offset & (PAGE_SECTORS - 1)) || (e.k->size & (PAGE_SECTORS - 1))) flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; ret = fiemap_fill_next_extent(info, - bkey_start_offset(e.k) << 9, - offset << 9, - e.k->size << 9, flags|flags2); + bkey_start_offset(e.k) << 9, + offset << 9, + e.k->size << 9, flags|flags2); if (ret) return ret; } return 0; - } else if (k->k.type == BCH_RESERVATION) { + } else if (k->k.type == KEY_TYPE_reservation) { return fiemap_fill_next_extent(info, bkey_start_offset(&k->k) << 9, 0, k->k.size << 9, @@ -696,7 +1113,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; BKEY_PADDED(k) tmp; bool have_extent = false; @@ -705,10 +1123,12 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(ei->v.i_ino, start >> 9), 0, k) + bch2_trans_init(&trans, c, 0, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, + POS(ei->v.i_ino, start >> 9), 0, k, ret) if (bkey_extent_is_data(k.k) || - k.k->type == BCH_RESERVATION) { + k.k->type == KEY_TYPE_reservation) { if (bkey_cmp(bkey_start_pos(k.k), POS(ei->v.i_ino, (start + len) >> 9)) >= 0) break; @@ -716,17 +1136,17 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (have_extent) { ret = bch2_fill_extent(info, &tmp.k, 0); if (ret) - goto out; + break; } bkey_reassemble(&tmp.k, k); have_extent = true; } - if (have_extent) + if (!ret && have_extent) ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST); -out: - bch2_btree_iter_unlock(&iter); + + ret = bch2_trans_exit(&trans) ?: ret; return ret < 0 ? ret : 0; } @@ -776,6 +1196,7 @@ static const struct file_operations bch_file_operations = { }; static const struct inode_operations bch_file_inode_operations = { + .getattr = bch2_getattr, .setattr = bch2_setattr, .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, @@ -795,6 +1216,7 @@ static const struct inode_operations bch_dir_inode_operations = { .rmdir = bch2_rmdir, .mknod = bch2_mknod, .rename = bch2_rename2, + .getattr = bch2_getattr, .setattr = bch2_setattr, .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, @@ -817,6 +1239,7 @@ static const struct file_operations bch_dir_file_operations = { static const struct inode_operations bch_symlink_inode_operations = { .get_link = page_get_link, + .getattr = bch2_getattr, .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL @@ -826,6 +1249,7 @@ static const struct inode_operations bch_symlink_inode_operations = { }; static const struct inode_operations bch_special_inode_operations = { + .getattr = bch2_getattr, .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL @@ -895,26 +1319,18 @@ static void bch2_vfs_inode_init(struct bch_fs *c, struct bch_inode_info *inode, struct bch_inode_unpacked *bi) { - inode->v.i_mode = bi->bi_mode; - i_uid_write(&inode->v, bi->bi_uid); - i_gid_write(&inode->v, bi->bi_gid); + bch2_inode_update_after_write(c, inode, bi, ~0); + inode->v.i_blocks = bi->bi_sectors; inode->v.i_ino = bi->bi_inum; - set_nlink(&inode->v, bi->bi_nlink + nlink_bias(inode->v.i_mode)); inode->v.i_rdev = bi->bi_dev; inode->v.i_generation = bi->bi_generation; inode->v.i_size = bi->bi_size; - inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime); - inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime); - inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime); inode->ei_journal_seq = 0; - inode->ei_size = bi->bi_size; - inode->ei_flags = bi->bi_flags; - atomic64_set(&inode->ei_sectors, bi->bi_sectors); + inode->ei_quota_reserved = 0; inode->ei_str_hash = bch2_hash_info_init(c, bi); - - bch2_inode_flags_to_vfs(inode); + inode->ei_qid = bch_qid(bi); inode->v.i_mapping->a_ops = &bch_address_space_operations; @@ -948,9 +1364,9 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) inode_init_once(&inode->v); mutex_init(&inode->ei_update_lock); + mutex_init(&inode->ei_quota_lock); + inode->ei_inode_update = NULL; inode->ei_journal_seq = 0; - atomic_long_set(&inode->ei_size_dirty_count, 0); - atomic_long_set(&inode->ei_sectors_dirty_count, 0); return &inode->v; } @@ -968,6 +1384,19 @@ static void bch2_destroy_inode(struct inode *vinode) call_rcu(&vinode->i_rcu, bch2_i_callback); } +static int inode_update_times_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + bi->bi_atime = timespec_to_bch2_time(c, inode->v.i_atime); + bi->bi_mtime = timespec_to_bch2_time(c, inode->v.i_mtime); + bi->bi_ctime = timespec_to_bch2_time(c, inode->v.i_ctime); + + return 0; +} + static int bch2_vfs_write_inode(struct inode *vinode, struct writeback_control *wbc) { @@ -976,7 +1405,8 @@ static int bch2_vfs_write_inode(struct inode *vinode, int ret; mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode); + ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); if (c->opts.journal_flush_disabled) @@ -995,17 +1425,20 @@ static void bch2_evict_inode(struct inode *vinode) truncate_inode_pages_final(&inode->v.i_data); - if (!bch2_journal_error(&c->journal) && !is_bad_inode(&inode->v)) { - /* XXX - we want to check this stuff iff there weren't IO errors: */ - BUG_ON(atomic_long_read(&inode->ei_sectors_dirty_count)); - BUG_ON(atomic64_read(&inode->ei_sectors) != inode->v.i_blocks); - } - clear_inode(&inode->v); + BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); + + if (inode->ei_inode_update) + bch2_deferred_update_free(c, inode->ei_inode_update); + inode->ei_inode_update = NULL; + if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { + bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), + KEY_TYPE_QUOTA_WARN); + bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, + KEY_TYPE_QUOTA_WARN); bch2_inode_rm(c, inode->v.i_ino); - atomic_long_dec(&c->nr_inodes); } } @@ -1013,21 +1446,23 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct bch_fs *c = sb->s_fs_info; + struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); + unsigned shift = sb->s_blocksize_bits - 9; u64 fsid; buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT; - buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c)) >> PAGE_SECTOR_SHIFT; + buf->f_blocks = usage.capacity >> shift; + buf->f_bfree = (usage.capacity - usage.used) >> shift; buf->f_bavail = buf->f_bfree; - buf->f_files = atomic_long_read(&c->nr_inodes); + buf->f_files = usage.nr_inodes; buf->f_ffree = U64_MAX; fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64)); buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; - buf->f_namelen = NAME_MAX; + buf->f_namelen = BCH_NAME_MAX; return 0; } @@ -1044,81 +1479,100 @@ static int bch2_sync_fs(struct super_block *sb, int wait) return bch2_journal_flush(&c->journal); } -static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name, - struct bch_opts opts) +static struct bch_fs *bch2_path_to_fs(const char *dev) { - size_t nr_devs = 0, i = 0; - char *dev_name, *s, **devs; - struct bch_fs *c = NULL; - const char *err = "cannot allocate memory"; + struct bch_fs *c; + struct block_device *bdev = lookup_bdev(dev); - dev_name = kstrdup(_dev_name, GFP_KERNEL); - if (!dev_name) - return NULL; + if (IS_ERR(bdev)) + return ERR_CAST(bdev); - for (s = dev_name; s; s = strchr(s + 1, ':')) - nr_devs++; + c = bch2_bdev_to_fs(bdev); + bdput(bdev); + return c ?: ERR_PTR(-ENOENT); +} - devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL); - if (!devs) - goto err; +static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs, + unsigned nr_devs, struct bch_opts opts) +{ + struct bch_fs *c, *c1, *c2; + size_t i; - for (i = 0, s = dev_name; - s; - (s = strchr(s, ':')) && (*s++ = '\0')) - devs[i++] = s; + if (!nr_devs) + return ERR_PTR(-EINVAL); + + c = bch2_fs_open(devs, nr_devs, opts); - err = bch2_fs_open(devs, nr_devs, opts, &c); - if (err) { + if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) { /* * Already open? * Look up each block device, make sure they all belong to a * filesystem and they all belong to the _same_ filesystem */ - for (i = 0; i < nr_devs; i++) { - struct block_device *bdev = lookup_bdev(devs[i]); - struct bch_fs *c2; - - if (IS_ERR(bdev)) - goto err; + c1 = bch2_path_to_fs(devs[0]); + if (IS_ERR(c1)) + return c; - c2 = bch2_bdev_to_fs(bdev); - bdput(bdev); - - if (!c) - c = c2; - else if (c2) + for (i = 1; i < nr_devs; i++) { + c2 = bch2_path_to_fs(devs[i]); + if (!IS_ERR(c2)) closure_put(&c2->cl); - if (!c) - goto err; - if (c != c2) { - closure_put(&c->cl); - goto err; + if (c1 != c2) { + closure_put(&c1->cl); + return c; } } - mutex_lock(&c->state_lock); + c = c1; + } - if (!bch2_fs_running(c)) { - mutex_unlock(&c->state_lock); - closure_put(&c->cl); - err = "incomplete filesystem"; - c = NULL; - goto err; - } + if (IS_ERR(c)) + return c; + + mutex_lock(&c->state_lock); + if (!test_bit(BCH_FS_STARTED, &c->flags)) { mutex_unlock(&c->state_lock); + closure_put(&c->cl); + pr_err("err mounting %s: incomplete filesystem", dev_name); + return ERR_PTR(-EINVAL); } + mutex_unlock(&c->state_lock); + set_bit(BCH_FS_BDEV_MOUNTED, &c->flags); + return c; +} + +static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name, + struct bch_opts opts) +{ + char *dev_name = NULL, **devs = NULL, *s; + struct bch_fs *c = ERR_PTR(-ENOMEM); + size_t i, nr_devs = 0; + + dev_name = kstrdup(_dev_name, GFP_KERNEL); + if (!dev_name) + goto err; + + for (s = dev_name; s; s = strchr(s + 1, ':')) + nr_devs++; + + devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL); + if (!devs) + goto err; + + for (i = 0, s = dev_name; + s; + (s = strchr(s, ':')) && (*s++ = '\0')) + devs[i++] = s; + + c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts); err: kfree(devs); kfree(dev_name); - - if (!c) - pr_err("bch_fs_open err %s", err); return c; } @@ -1128,29 +1582,28 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) struct bch_opts opts = bch2_opts_empty(); int ret; - opt_set(opts, read_only, (*flags & MS_RDONLY) != 0); + opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); ret = bch2_parse_mount_opts(&opts, data); if (ret) return ret; if (opts.read_only != c->opts.read_only) { - const char *err = NULL; - mutex_lock(&c->state_lock); if (opts.read_only) { bch2_fs_read_only(c); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } else { - err = bch2_fs_read_write(c); - if (err) { - bch_err(c, "error going rw: %s", err); + ret = bch2_fs_read_write(c); + if (ret) { + bch_err(c, "error going rw: %i", ret); + mutex_unlock(&c->state_lock); return -EINVAL; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; } c->opts.read_only = opts.read_only; @@ -1168,28 +1621,22 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; enum bch_opt_id i; + char buf[512]; for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); - if (opt->mode < OPT_MOUNT) + if (!(opt->mode & OPT_MOUNT)) continue; if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) continue; - switch (opt->type) { - case BCH_OPT_BOOL: - seq_printf(seq, ",%s%s", v ? "" : "no", opt->attr.name); - break; - case BCH_OPT_UINT: - seq_printf(seq, ",%s=%llu", opt->attr.name, v); - break; - case BCH_OPT_STR: - seq_printf(seq, ",%s=%s", opt->attr.name, opt->choices[v]); - break; - } + bch2_opt_to_text(&PBUF(buf), c, opt, v, + OPT_SHOW_MOUNT_STYLE); + seq_putc(seq, ','); + seq_puts(seq, buf); } return 0; @@ -1234,17 +1681,17 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, unsigned i; int ret; - opt_set(opts, read_only, (flags & MS_RDONLY) != 0); + opt_set(opts, read_only, (flags & SB_RDONLY) != 0); ret = bch2_parse_mount_opts(&opts, data); if (ret) return ERR_PTR(ret); c = bch2_open_as_blockdevs(dev_name, opts); - if (!c) - return ERR_PTR(-ENOENT); + if (IS_ERR(c)) + return ERR_CAST(c); - sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|MS_NOSEC, c); + sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c); if (IS_ERR(sb)) { closure_put(&c->cl); return ERR_CAST(sb); @@ -1255,7 +1702,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (sb->s_root) { closure_put(&c->cl); - if ((flags ^ sb->s_flags) & MS_RDONLY) { + if ((flags ^ sb->s_flags) & SB_RDONLY) { ret = -EBUSY; goto err_put_super; } @@ -1268,13 +1715,24 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &bch_super_operations; sb->s_export_op = &bch_export_ops; +#ifdef CONFIG_BCACHEFS_QUOTA + sb->s_qcop = &bch2_quotactl_operations; + sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ; +#endif sb->s_xattr = bch2_xattr_handlers; sb->s_magic = BCACHEFS_STATFS_MAGIC; sb->s_time_gran = c->sb.time_precision; c->vfs_sb = sb; - sb->s_bdi = &c->bdi; strlcpy(sb->s_id, c->name, sizeof(sb->s_id)); + ret = super_setup_bdi(sb); + if (ret) + goto err_put_super; + + sb->s_bdi->congested_fn = bch2_congested; + sb->s_bdi->congested_data = c; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + for_each_online_member(ca, c, i) { struct block_device *bdev = ca->disk_sb.bdev; @@ -1287,22 +1745,25 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, #ifdef CONFIG_BCACHEFS_POSIX_ACL if (c->opts.acl) - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= SB_POSIXACL; #endif vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO); if (IS_ERR(vinode)) { + bch_err(c, "error mounting: error getting root inode %i", + (int) PTR_ERR(vinode)); ret = PTR_ERR(vinode); goto err_put_super; } sb->s_root = d_make_root(vinode); if (!sb->s_root) { + bch_err(c, "error mounting: error allocating root dentry"); ret = -ENOMEM; goto err_put_super; } - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; out: return dget(sb->s_root);