+// SPDX-License-Identifier: GPL-2.0
#ifndef NO_BCACHEFS_FS
#include "bcachefs.h"
#include "chardev.h"
+#include "dirent.h"
#include "fs.h"
+#include "fs-common.h"
#include "fs-ioctl.h"
#include "quota.h"
#include <linux/compat.h>
+#include <linux/fsnotify.h>
#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include <linux/writeback.h>
#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32)
+#define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
+#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
+#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
-/* Inode flags: */
+struct flags_set {
+ unsigned mask;
+ unsigned flags;
-/* bcachefs inode flags -> vfs inode flags: */
-static const unsigned bch_flags_to_vfs[] = {
- [__BCH_INODE_SYNC] = S_SYNC,
- [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
- [__BCH_INODE_APPEND] = S_APPEND,
- [__BCH_INODE_NOATIME] = S_NOATIME,
-};
-
-/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-static const unsigned bch_flags_to_uflags[] = {
- [__BCH_INODE_SYNC] = FS_SYNC_FL,
- [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
- [__BCH_INODE_APPEND] = FS_APPEND_FL,
- [__BCH_INODE_NODUMP] = FS_NODUMP_FL,
- [__BCH_INODE_NOATIME] = FS_NOATIME_FL,
-};
+ unsigned projid;
-/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-static const unsigned bch_flags_to_xflags[] = {
- [__BCH_INODE_SYNC] = FS_XFLAG_SYNC,
- [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
- [__BCH_INODE_APPEND] = FS_XFLAG_APPEND,
- [__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP,
- [__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME,
- //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
+ bool set_projinherit;
+ bool projinherit;
};
-#define map_flags(_map, _in) \
-({ \
- unsigned _i, _out = 0; \
- \
- for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
- if ((_in) & (1 << _i)) \
- (_out) |= _map[_i]; \
- (_out); \
-})
-
-#define map_flags_rev(_map, _in) \
-({ \
- unsigned _i, _out = 0; \
- \
- for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
- if ((_in) & _map[_i]) { \
- (_out) |= 1 << _i; \
- (_in) &= ~_map[_i]; \
- } \
- (_out); \
-})
-
-#define set_flags(_map, _in, _out) \
-do { \
- unsigned _i; \
- \
- for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
- if ((_in) & (1 << _i)) \
- (_out) |= _map[_i]; \
- else \
- (_out) &= ~_map[_i]; \
-} while (0)
-
-/* Set VFS inode flags from bcachefs inode: */
-void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-{
- set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-}
-
static int bch2_inode_flags_set(struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
void *p)
{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
/*
* We're relying on btree locking here for exclusion with other ioctl
* calls - use the flags in the btree (@bi), not inode->i_flags:
*/
- unsigned newflags = *((unsigned *) p);
- unsigned oldflags = bi->bi_flags;
+ struct flags_set *s = p;
+ unsigned newflags = s->flags;
+ unsigned oldflags = bi->bi_flags & s->mask;
if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
- if (!S_ISREG(inode->v.i_mode) &&
- !S_ISDIR(inode->v.i_mode) &&
+ if (!S_ISREG(bi->bi_mode) &&
+ !S_ISDIR(bi->bi_mode) &&
(newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
return -EINVAL;
- bi->bi_flags = newflags;
- inode->v.i_ctime = current_time(&inode->v);
+ if (s->set_projinherit) {
+ bi->bi_fields_set &= ~(1 << Inode_opt_project);
+ bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
+ }
+
+ bi->bi_flags &= ~s->mask;
+ bi->bi_flags |= newflags;
+
+ bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
return 0;
}
struct bch_inode_info *inode,
void __user *arg)
{
- unsigned flags, uflags;
+ struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
+ unsigned uflags;
int ret;
if (get_user(uflags, (int __user *) arg))
return -EFAULT;
- flags = map_flags_rev(bch_flags_to_uflags, uflags);
+ s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
if (uflags)
return -EOPNOTSUPP;
return ret;
inode_lock(&inode->v);
- if (!inode_owner_or_capable(&inode->v)) {
+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
ret = -EACCES;
goto setflags_out;
}
mutex_lock(&inode->ei_update_lock);
- ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags);
-
- if (!ret)
- bch2_inode_flags_to_vfs(inode);
+ ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
+ ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
setflags_out:
struct fsxattr fa = { 0 };
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
+
+ if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
+ fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
+
fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
return copy_to_user(arg, &fa, sizeof(fa));
}
-static int bch2_set_projid(struct bch_fs *c,
- struct bch_inode_info *inode,
- u32 projid)
+static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
{
- struct bch_qid qid = inode->ei_qid;
- int ret;
+ struct flags_set *s = p;
- if (projid == inode->ei_qid.q[QTYP_PRJ])
- return 0;
-
- qid.q[QTYP_PRJ] = projid;
-
- ret = bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid,
- inode->v.i_blocks);
- if (ret)
- return ret;
+ if (s->projid != bi->bi_project) {
+ bi->bi_fields_set |= 1U << Inode_opt_project;
+ bi->bi_project = s->projid;
+ }
- inode->ei_qid.q[QTYP_PRJ] = projid;
- return 0;
+ return bch2_inode_flags_set(inode, bi, p);
}
static int bch2_ioc_fssetxattr(struct bch_fs *c,
struct bch_inode_info *inode,
struct fsxattr __user *arg)
{
+ struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
struct fsxattr fa;
- unsigned flags;
int ret;
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
- flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
+ s.set_projinherit = true;
+ s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
+ fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
+
+ s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
if (fa.fsx_xflags)
return -EOPNOTSUPP;
+ if (fa.fsx_projid >= U32_MAX)
+ return -EINVAL;
+
+ /*
+ * inode fields accessible via the xattr interface are stored with a +1
+ * bias, so that 0 means unset:
+ */
+ s.projid = fa.fsx_projid + 1;
+
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(&inode->v);
- if (!inode_owner_or_capable(&inode->v)) {
+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
ret = -EACCES;
goto err;
}
if (ret)
goto err_unlock;
- ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags);
- if (!ret)
- bch2_inode_flags_to_vfs(inode);
+ ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+ ATTR_CTIME);
err_unlock:
mutex_unlock(&inode->ei_update_lock);
err:
return ret;
}
+static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct bch_inode_info *dir = p;
+
+ return !bch2_reinherit_attrs(bi, &dir->ei_inode);
+}
+
+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
+ struct file *file,
+ struct bch_inode_info *src,
+ const char __user *name)
+{
+ struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode);
+ struct bch_inode_info *dst;
+ struct inode *vinode = NULL;
+ char *kname = NULL;
+ struct qstr qstr;
+ int ret = 0;
+ subvol_inum inum;
+
+ kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
+ if (!kname)
+ return -ENOMEM;
+
+ ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
+ if (unlikely(ret < 0))
+ goto err1;
+
+ qstr.len = ret;
+ qstr.name = kname;
+
+ ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum);
+ if (ret)
+ goto err1;
+
+ vinode = bch2_vfs_inode_get(c, inum);
+ ret = PTR_ERR_OR_ZERO(vinode);
+ if (ret)
+ goto err1;
+
+ dst = to_bch_ei(vinode);
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ goto err2;
+
+ bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+ if (inode_attr_changing(src, dst, Inode_opt_project)) {
+ ret = bch2_fs_quota_transfer(c, dst,
+ src->ei_qid,
+ 1 << QTYP_PRJ,
+ KEY_TYPE_QUOTA_PREALLOC);
+ if (ret)
+ goto err3;
+ }
+
+ ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
+err3:
+ bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+ /* return true if we did work */
+ if (ret >= 0)
+ ret = !ret;
+
+ mnt_drop_write_file(file);
+err2:
+ iput(vinode);
+err1:
+ kfree(kname);
+
+ return ret;
+}
+
+static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
+{
+ u32 flags;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, arg))
+ return -EFAULT;
+
+ bch_notice(c, "shutdown by ioctl type %u", flags);
+
+ down_write(&c->vfs_sb->s_umount);
+
+ switch (flags) {
+ case FSOP_GOING_FLAGS_DEFAULT:
+ ret = freeze_bdev(c->vfs_sb->s_bdev);
+ if (ret)
+ goto err;
+
+ bch2_journal_flush(&c->journal);
+ c->vfs_sb->s_flags |= SB_RDONLY;
+ bch2_fs_emergency_read_only(c);
+ thaw_bdev(c->vfs_sb->s_bdev);
+ break;
+
+ case FSOP_GOING_FLAGS_LOGFLUSH:
+ bch2_journal_flush(&c->journal);
+ fallthrough;
+
+ case FSOP_GOING_FLAGS_NOLOGFLUSH:
+ c->vfs_sb->s_flags |= SB_RDONLY;
+ bch2_fs_emergency_read_only(c);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+err:
+ up_write(&c->vfs_sb->s_umount);
+ return ret;
+}
+
+static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
+ struct bch_ioctl_subvolume arg)
+{
+ struct inode *dir;
+ struct bch_inode_info *inode;
+ struct user_namespace *s_user_ns;
+ struct dentry *dst_dentry;
+ struct path src_path, dst_path;
+ int how = LOOKUP_FOLLOW;
+ int error;
+ subvol_inum snapshot_src = { 0 };
+ unsigned lookup_flags = 0;
+ unsigned create_flags = BCH_CREATE_SUBVOL;
+
+ if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE|
+ BCH_SUBVOL_SNAPSHOT_RO))
+ return -EINVAL;
+
+ if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
+ (arg.src_ptr ||
+ (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
+ return -EINVAL;
+
+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
+ create_flags |= BCH_CREATE_SNAPSHOT;
+
+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
+ create_flags |= BCH_CREATE_SNAPSHOT_RO;
+
+ /* why do we need this lock? */
+ down_read(&c->vfs_sb->s_umount);
+
+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
+ sync_inodes_sb(c->vfs_sb);
+retry:
+ if (arg.src_ptr) {
+ error = user_path_at(arg.dirfd,
+ (const char __user *)(unsigned long)arg.src_ptr,
+ how, &src_path);
+ if (error)
+ goto err1;
+
+ if (src_path.dentry->d_sb->s_fs_info != c) {
+ path_put(&src_path);
+ error = -EXDEV;
+ goto err1;
+ }
+
+ snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
+ }
+
+ dst_dentry = user_path_create(arg.dirfd,
+ (const char __user *)(unsigned long)arg.dst_ptr,
+ &dst_path, lookup_flags);
+ error = PTR_ERR_OR_ZERO(dst_dentry);
+ if (error)
+ goto err2;
+
+ if (dst_dentry->d_sb->s_fs_info != c) {
+ error = -EXDEV;
+ goto err3;
+ }
+
+ if (dst_dentry->d_inode) {
+ error = -EEXIST;
+ goto err3;
+ }
+
+ dir = dst_path.dentry->d_inode;
+ if (IS_DEADDIR(dir)) {
+ error = -BCH_ERR_ENOENT_directory_dead;
+ goto err3;
+ }
+
+ s_user_ns = dir->i_sb->s_user_ns;
+ if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
+ !kgid_has_mapping(s_user_ns, current_fsgid())) {
+ error = -EOVERFLOW;
+ goto err3;
+ }
+
+ error = inode_permission(file_mnt_idmap(filp),
+ dir, MAY_WRITE | MAY_EXEC);
+ if (error)
+ goto err3;
+
+ if (!IS_POSIXACL(dir))
+ arg.mode &= ~current_umask();
+
+ error = security_path_mkdir(&dst_path, dst_dentry, arg.mode);
+ if (error)
+ goto err3;
+
+ if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
+ !arg.src_ptr)
+ snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol;
+
+ inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
+ dst_dentry, arg.mode|S_IFDIR,
+ 0, snapshot_src, create_flags);
+ error = PTR_ERR_OR_ZERO(inode);
+ if (error)
+ goto err3;
+
+ d_instantiate(dst_dentry, &inode->v);
+ fsnotify_mkdir(dir, dst_dentry);
+err3:
+ done_path_create(&dst_path, dst_dentry);
+err2:
+ if (arg.src_ptr)
+ path_put(&src_path);
+
+ if (retry_estale(error, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
+ }
+err1:
+ up_read(&c->vfs_sb->s_umount);
+
+ return error;
+}
+
+static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
+ struct bch_ioctl_subvolume arg)
+{
+ struct path path;
+ struct inode *dir;
+ int ret = 0;
+
+ if (arg.flags)
+ return -EINVAL;
+
+ ret = user_path_at(arg.dirfd,
+ (const char __user *)(unsigned long)arg.dst_ptr,
+ LOOKUP_FOLLOW, &path);
+ if (ret)
+ return ret;
+
+ if (path.dentry->d_sb->s_fs_info != c) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ dir = path.dentry->d_parent->d_inode;
+
+ ret = __bch2_unlink(dir, path.dentry, true);
+ if (ret)
+ goto err;
+
+ fsnotify_rmdir(dir, path.dentry);
+ d_delete(path.dentry);
+err:
+ path_put(&path);
+ return ret;
+}
+
long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct bch_inode_info *inode = file_bch_inode(file);
- struct super_block *sb = inode->v.i_sb;
- struct bch_fs *c = sb->s_fs_info;
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ long ret;
switch (cmd) {
case FS_IOC_GETFLAGS:
- return bch2_ioc_getflags(inode, (int __user *) arg);
+ ret = bch2_ioc_getflags(inode, (int __user *) arg);
+ break;
case FS_IOC_SETFLAGS:
- return bch2_ioc_setflags(c, file, inode, (int __user *) arg);
+ ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
+ break;
case FS_IOC_FSGETXATTR:
- return bch2_ioc_fsgetxattr(inode, (void __user *) arg);
+ ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
+ break;
+
case FS_IOC_FSSETXATTR:
- return bch2_ioc_fssetxattr(c, file, inode, (void __user *) arg);
+ ret = bch2_ioc_fssetxattr(c, file, inode,
+ (void __user *) arg);
+ break;
+
+ case BCHFS_IOC_REINHERIT_ATTRS:
+ ret = bch2_ioc_reinherit_attrs(c, file, inode,
+ (void __user *) arg);
+ break;
case FS_IOC_GETVERSION:
- return -ENOTTY;
+ ret = -ENOTTY;
+ break;
+
case FS_IOC_SETVERSION:
- return -ENOTTY;
+ ret = -ENOTTY;
+ break;
case FS_IOC_GOINGDOWN:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ ret = bch2_ioc_goingdown(c, (u32 __user *) arg);
+ break;
- down_write(&sb->s_umount);
- sb->s_flags |= MS_RDONLY;
- bch2_fs_emergency_read_only(c);
- up_write(&sb->s_umount);
- return 0;
+ case BCH_IOCTL_SUBVOLUME_CREATE: {
+ struct bch_ioctl_subvolume i;
+
+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
+ ? -EFAULT
+ : bch2_ioctl_subvolume_create(c, file, i);
+ break;
+ }
+
+ case BCH_IOCTL_SUBVOLUME_DESTROY: {
+ struct bch_ioctl_subvolume i;
+
+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
+ ? -EFAULT
+ : bch2_ioctl_subvolume_destroy(c, file, i);
+ break;
+ }
default:
- return bch2_fs_ioctl(c, cmd, (void __user *) arg);
+ ret = bch2_fs_ioctl(c, cmd, (void __user *) arg);
+ break;
}
+
+ return bch2_err_class(ret);
}
#ifdef CONFIG_COMPAT