]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/fs-ioctl.c
Update bcachefs sources to 070ec8d07b bcachefs: Snapshot depth, skiplist fields
[bcachefs-tools-debian] / libbcachefs / fs-ioctl.c
index 6ae67f92dd348c3e689be7a57046526ef7dc36b8..dfa1bf73c8541187abbe87e7127213d337ee24e7 100644 (file)
+// SPDX-License-Identifier: GPL-2.0
 #ifndef NO_BCACHEFS_FS
 
 #include "bcachefs.h"
 #include "chardev.h"
+#include "dirent.h"
 #include "fs.h"
+#include "fs-common.h"
 #include "fs-ioctl.h"
 #include "quota.h"
 
 #include <linux/compat.h>
+#include <linux/fsnotify.h>
 #include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include <linux/writeback.h>
 
 #define FS_IOC_GOINGDOWN            _IOR('X', 125, __u32)
+#define FSOP_GOING_FLAGS_DEFAULT       0x0     /* going down */
+#define FSOP_GOING_FLAGS_LOGFLUSH      0x1     /* flush log but not data */
+#define FSOP_GOING_FLAGS_NOLOGFLUSH    0x2     /* don't flush log nor data */
 
-/* Inode flags: */
+struct flags_set {
+       unsigned                mask;
+       unsigned                flags;
 
-/* bcachefs inode flags -> vfs inode flags: */
-static const unsigned bch_flags_to_vfs[] = {
-       [__BCH_INODE_SYNC]      = S_SYNC,
-       [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
-       [__BCH_INODE_APPEND]    = S_APPEND,
-       [__BCH_INODE_NOATIME]   = S_NOATIME,
-};
-
-/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-static const unsigned bch_flags_to_uflags[] = {
-       [__BCH_INODE_SYNC]      = FS_SYNC_FL,
-       [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
-       [__BCH_INODE_APPEND]    = FS_APPEND_FL,
-       [__BCH_INODE_NODUMP]    = FS_NODUMP_FL,
-       [__BCH_INODE_NOATIME]   = FS_NOATIME_FL,
-};
+       unsigned                projid;
 
-/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-static const unsigned bch_flags_to_xflags[] = {
-       [__BCH_INODE_SYNC]      = FS_XFLAG_SYNC,
-       [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
-       [__BCH_INODE_APPEND]    = FS_XFLAG_APPEND,
-       [__BCH_INODE_NODUMP]    = FS_XFLAG_NODUMP,
-       [__BCH_INODE_NOATIME]   = FS_XFLAG_NOATIME,
-       //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
+       bool                    set_projinherit;
+       bool                    projinherit;
 };
 
-#define map_flags(_map, _in)                                           \
-({                                                                     \
-       unsigned _i, _out = 0;                                          \
-                                                                       \
-       for (_i = 0; _i < ARRAY_SIZE(_map); _i++)                       \
-               if ((_in) & (1 << _i))                                  \
-                       (_out) |= _map[_i];                             \
-       (_out);                                                         \
-})
-
-#define map_flags_rev(_map, _in)                                       \
-({                                                                     \
-       unsigned _i, _out = 0;                                          \
-                                                                       \
-       for (_i = 0; _i < ARRAY_SIZE(_map); _i++)                       \
-               if ((_in) & _map[_i]) {                                 \
-                       (_out) |= 1 << _i;                              \
-                       (_in) &= ~_map[_i];                             \
-               }                                                       \
-       (_out);                                                         \
-})
-
-#define set_flags(_map, _in, _out)                                     \
-do {                                                                   \
-       unsigned _i;                                                    \
-                                                                       \
-       for (_i = 0; _i < ARRAY_SIZE(_map); _i++)                       \
-               if ((_in) & (1 << _i))                                  \
-                       (_out) |= _map[_i];                             \
-               else                                                    \
-                       (_out) &= ~_map[_i];                            \
-} while (0)
-
-/* Set VFS inode flags from bcachefs inode: */
-void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-{
-       set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-}
-
 static int bch2_inode_flags_set(struct bch_inode_info *inode,
                                struct bch_inode_unpacked *bi,
                                void *p)
 {
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
        /*
         * We're relying on btree locking here for exclusion with other ioctl
         * calls - use the flags in the btree (@bi), not inode->i_flags:
         */
-       unsigned newflags = *((unsigned *) p);
-       unsigned oldflags = bi->bi_flags;
+       struct flags_set *s = p;
+       unsigned newflags = s->flags;
+       unsigned oldflags = bi->bi_flags & s->mask;
 
        if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
            !capable(CAP_LINUX_IMMUTABLE))
                return -EPERM;
 
-       if (!S_ISREG(inode->v.i_mode) &&
-           !S_ISDIR(inode->v.i_mode) &&
+       if (!S_ISREG(bi->bi_mode) &&
+           !S_ISDIR(bi->bi_mode) &&
            (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
                return -EINVAL;
 
-       bi->bi_flags = newflags;
-       inode->v.i_ctime = current_time(&inode->v);
+       if (s->set_projinherit) {
+               bi->bi_fields_set &= ~(1 << Inode_opt_project);
+               bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
+       }
+
+       bi->bi_flags &= ~s->mask;
+       bi->bi_flags |= newflags;
+
+       bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
        return 0;
 }
 
@@ -116,13 +77,14 @@ static int bch2_ioc_setflags(struct bch_fs *c,
                             struct bch_inode_info *inode,
                             void __user *arg)
 {
-       unsigned flags, uflags;
+       struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
+       unsigned uflags;
        int ret;
 
        if (get_user(uflags, (int __user *) arg))
                return -EFAULT;
 
-       flags = map_flags_rev(bch_flags_to_uflags, uflags);
+       s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
        if (uflags)
                return -EOPNOTSUPP;
 
@@ -131,16 +93,14 @@ static int bch2_ioc_setflags(struct bch_fs *c,
                return ret;
 
        inode_lock(&inode->v);
-       if (!inode_owner_or_capable(&inode->v)) {
+       if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
                ret = -EACCES;
                goto setflags_out;
        }
 
        mutex_lock(&inode->ei_update_lock);
-       ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags);
-
-       if (!ret)
-               bch2_inode_flags_to_vfs(inode);
+       ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
+                              ATTR_CTIME);
        mutex_unlock(&inode->ei_update_lock);
 
 setflags_out:
@@ -155,30 +115,27 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
        struct fsxattr fa = { 0 };
 
        fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
+
+       if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
+               fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
+
        fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
 
        return copy_to_user(arg, &fa, sizeof(fa));
 }
 
-static int bch2_set_projid(struct bch_fs *c,
-                          struct bch_inode_info *inode,
-                          u32 projid)
+static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
+                                     struct bch_inode_unpacked *bi,
+                                     void *p)
 {
-       struct bch_qid qid = inode->ei_qid;
-       int ret;
+       struct flags_set *s = p;
 
-       if (projid == inode->ei_qid.q[QTYP_PRJ])
-               return 0;
-
-       qid.q[QTYP_PRJ] = projid;
-
-       ret = bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid,
-                                 inode->v.i_blocks);
-       if (ret)
-               return ret;
+       if (s->projid != bi->bi_project) {
+               bi->bi_fields_set |= 1U << Inode_opt_project;
+               bi->bi_project = s->projid;
+       }
 
-       inode->ei_qid.q[QTYP_PRJ] = projid;
-       return 0;
+       return bch2_inode_flags_set(inode, bi, p);
 }
 
 static int bch2_ioc_fssetxattr(struct bch_fs *c,
@@ -186,23 +143,36 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
                               struct bch_inode_info *inode,
                               struct fsxattr __user *arg)
 {
+       struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
        struct fsxattr fa;
-       unsigned flags;
        int ret;
 
        if (copy_from_user(&fa, arg, sizeof(fa)))
                return -EFAULT;
 
-       flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
+       s.set_projinherit = true;
+       s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
+       fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
+
+       s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
        if (fa.fsx_xflags)
                return -EOPNOTSUPP;
 
+       if (fa.fsx_projid >= U32_MAX)
+               return -EINVAL;
+
+       /*
+        * inode fields accessible via the xattr interface are stored with a +1
+        * bias, so that 0 means unset:
+        */
+       s.projid = fa.fsx_projid + 1;
+
        ret = mnt_want_write_file(file);
        if (ret)
                return ret;
 
        inode_lock(&inode->v);
-       if (!inode_owner_or_capable(&inode->v)) {
+       if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
                ret = -EACCES;
                goto err;
        }
@@ -212,9 +182,8 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
        if (ret)
                goto err_unlock;
 
-       ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags);
-       if (!ret)
-               bch2_inode_flags_to_vfs(inode);
+       ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+                              ATTR_CTIME);
 err_unlock:
        mutex_unlock(&inode->ei_update_lock);
 err:
@@ -223,42 +192,347 @@ err:
        return ret;
 }
 
+static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode,
+                                  struct bch_inode_unpacked *bi,
+                                  void *p)
+{
+       struct bch_inode_info *dir = p;
+
+       return !bch2_reinherit_attrs(bi, &dir->ei_inode);
+}
+
+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
+                                   struct file *file,
+                                   struct bch_inode_info *src,
+                                   const char __user *name)
+{
+       struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode);
+       struct bch_inode_info *dst;
+       struct inode *vinode = NULL;
+       char *kname = NULL;
+       struct qstr qstr;
+       int ret = 0;
+       subvol_inum inum;
+
+       kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
+       if (!kname)
+               return -ENOMEM;
+
+       ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
+       if (unlikely(ret < 0))
+               goto err1;
+
+       qstr.len        = ret;
+       qstr.name       = kname;
+
+       ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum);
+       if (ret)
+               goto err1;
+
+       vinode = bch2_vfs_inode_get(c, inum);
+       ret = PTR_ERR_OR_ZERO(vinode);
+       if (ret)
+               goto err1;
+
+       dst = to_bch_ei(vinode);
+
+       ret = mnt_want_write_file(file);
+       if (ret)
+               goto err2;
+
+       bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+       if (inode_attr_changing(src, dst, Inode_opt_project)) {
+               ret = bch2_fs_quota_transfer(c, dst,
+                                            src->ei_qid,
+                                            1 << QTYP_PRJ,
+                                            KEY_TYPE_QUOTA_PREALLOC);
+               if (ret)
+                       goto err3;
+       }
+
+       ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
+err3:
+       bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+       /* return true if we did work */
+       if (ret >= 0)
+               ret = !ret;
+
+       mnt_drop_write_file(file);
+err2:
+       iput(vinode);
+err1:
+       kfree(kname);
+
+       return ret;
+}
+
+static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
+{
+       u32 flags;
+       int ret = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (get_user(flags, arg))
+               return -EFAULT;
+
+       bch_notice(c, "shutdown by ioctl type %u", flags);
+
+       down_write(&c->vfs_sb->s_umount);
+
+       switch (flags) {
+       case FSOP_GOING_FLAGS_DEFAULT:
+               ret = freeze_bdev(c->vfs_sb->s_bdev);
+               if (ret)
+                       goto err;
+
+               bch2_journal_flush(&c->journal);
+               c->vfs_sb->s_flags |= SB_RDONLY;
+               bch2_fs_emergency_read_only(c);
+               thaw_bdev(c->vfs_sb->s_bdev);
+               break;
+
+       case FSOP_GOING_FLAGS_LOGFLUSH:
+               bch2_journal_flush(&c->journal);
+               fallthrough;
+
+       case FSOP_GOING_FLAGS_NOLOGFLUSH:
+               c->vfs_sb->s_flags |= SB_RDONLY;
+               bch2_fs_emergency_read_only(c);
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+err:
+       up_write(&c->vfs_sb->s_umount);
+       return ret;
+}
+
+static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
+                               struct bch_ioctl_subvolume arg)
+{
+       struct inode *dir;
+       struct bch_inode_info *inode;
+       struct user_namespace *s_user_ns;
+       struct dentry *dst_dentry;
+       struct path src_path, dst_path;
+       int how = LOOKUP_FOLLOW;
+       int error;
+       subvol_inum snapshot_src = { 0 };
+       unsigned lookup_flags = 0;
+       unsigned create_flags = BCH_CREATE_SUBVOL;
+
+       if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE|
+                         BCH_SUBVOL_SNAPSHOT_RO))
+               return -EINVAL;
+
+       if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
+           (arg.src_ptr ||
+            (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
+               return -EINVAL;
+
+       if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
+               create_flags |= BCH_CREATE_SNAPSHOT;
+
+       if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
+               create_flags |= BCH_CREATE_SNAPSHOT_RO;
+
+       /* why do we need this lock? */
+       down_read(&c->vfs_sb->s_umount);
+
+       if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
+               sync_inodes_sb(c->vfs_sb);
+retry:
+       if (arg.src_ptr) {
+               error = user_path_at(arg.dirfd,
+                               (const char __user *)(unsigned long)arg.src_ptr,
+                               how, &src_path);
+               if (error)
+                       goto err1;
+
+               if (src_path.dentry->d_sb->s_fs_info != c) {
+                       path_put(&src_path);
+                       error = -EXDEV;
+                       goto err1;
+               }
+
+               snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
+       }
+
+       dst_dentry = user_path_create(arg.dirfd,
+                       (const char __user *)(unsigned long)arg.dst_ptr,
+                       &dst_path, lookup_flags);
+       error = PTR_ERR_OR_ZERO(dst_dentry);
+       if (error)
+               goto err2;
+
+       if (dst_dentry->d_sb->s_fs_info != c) {
+               error = -EXDEV;
+               goto err3;
+       }
+
+       if (dst_dentry->d_inode) {
+               error = -EEXIST;
+               goto err3;
+       }
+
+       dir = dst_path.dentry->d_inode;
+       if (IS_DEADDIR(dir)) {
+               error = -BCH_ERR_ENOENT_directory_dead;
+               goto err3;
+       }
+
+       s_user_ns = dir->i_sb->s_user_ns;
+       if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
+           !kgid_has_mapping(s_user_ns, current_fsgid())) {
+               error = -EOVERFLOW;
+               goto err3;
+       }
+
+       error = inode_permission(file_mnt_idmap(filp),
+                                dir, MAY_WRITE | MAY_EXEC);
+       if (error)
+               goto err3;
+
+       if (!IS_POSIXACL(dir))
+               arg.mode &= ~current_umask();
+
+       error = security_path_mkdir(&dst_path, dst_dentry, arg.mode);
+       if (error)
+               goto err3;
+
+       if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
+           !arg.src_ptr)
+               snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol;
+
+       inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
+                             dst_dentry, arg.mode|S_IFDIR,
+                             0, snapshot_src, create_flags);
+       error = PTR_ERR_OR_ZERO(inode);
+       if (error)
+               goto err3;
+
+       d_instantiate(dst_dentry, &inode->v);
+       fsnotify_mkdir(dir, dst_dentry);
+err3:
+       done_path_create(&dst_path, dst_dentry);
+err2:
+       if (arg.src_ptr)
+               path_put(&src_path);
+
+       if (retry_estale(error, lookup_flags)) {
+               lookup_flags |= LOOKUP_REVAL;
+               goto retry;
+       }
+err1:
+       up_read(&c->vfs_sb->s_umount);
+
+       return error;
+}
+
+static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
+                               struct bch_ioctl_subvolume arg)
+{
+       struct path path;
+       struct inode *dir;
+       int ret = 0;
+
+       if (arg.flags)
+               return -EINVAL;
+
+       ret = user_path_at(arg.dirfd,
+                       (const char __user *)(unsigned long)arg.dst_ptr,
+                       LOOKUP_FOLLOW, &path);
+       if (ret)
+               return ret;
+
+       if (path.dentry->d_sb->s_fs_info != c) {
+               ret = -EXDEV;
+               goto err;
+       }
+
+       dir = path.dentry->d_parent->d_inode;
+
+       ret = __bch2_unlink(dir, path.dentry, true);
+       if (ret)
+               goto err;
+
+       fsnotify_rmdir(dir, path.dentry);
+       d_delete(path.dentry);
+err:
+       path_put(&path);
+       return ret;
+}
+
 long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
        struct bch_inode_info *inode = file_bch_inode(file);
-       struct super_block *sb = inode->v.i_sb;
-       struct bch_fs *c = sb->s_fs_info;
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       long ret;
 
        switch (cmd) {
        case FS_IOC_GETFLAGS:
-               return bch2_ioc_getflags(inode, (int __user *) arg);
+               ret = bch2_ioc_getflags(inode, (int __user *) arg);
+               break;
 
        case FS_IOC_SETFLAGS:
-               return bch2_ioc_setflags(c, file, inode, (int __user *) arg);
+               ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
+               break;
 
        case FS_IOC_FSGETXATTR:
-               return bch2_ioc_fsgetxattr(inode, (void __user *) arg);
+               ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
+               break;
+
        case FS_IOC_FSSETXATTR:
-               return bch2_ioc_fssetxattr(c, file, inode, (void __user *) arg);
+               ret = bch2_ioc_fssetxattr(c, file, inode,
+                                         (void __user *) arg);
+               break;
+
+       case BCHFS_IOC_REINHERIT_ATTRS:
+               ret = bch2_ioc_reinherit_attrs(c, file, inode,
+                                              (void __user *) arg);
+               break;
 
        case FS_IOC_GETVERSION:
-               return -ENOTTY;
+               ret = -ENOTTY;
+               break;
+
        case FS_IOC_SETVERSION:
-               return -ENOTTY;
+               ret = -ENOTTY;
+               break;
 
        case FS_IOC_GOINGDOWN:
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
+               ret = bch2_ioc_goingdown(c, (u32 __user *) arg);
+               break;
 
-               down_write(&sb->s_umount);
-               sb->s_flags |= MS_RDONLY;
-               bch2_fs_emergency_read_only(c);
-               up_write(&sb->s_umount);
-               return 0;
+       case BCH_IOCTL_SUBVOLUME_CREATE: {
+               struct bch_ioctl_subvolume i;
+
+               ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
+                       ? -EFAULT
+                       : bch2_ioctl_subvolume_create(c, file, i);
+               break;
+       }
+
+       case BCH_IOCTL_SUBVOLUME_DESTROY: {
+               struct bch_ioctl_subvolume i;
+
+               ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
+                       ? -EFAULT
+                       : bch2_ioctl_subvolume_destroy(c, file, i);
+               break;
+       }
 
        default:
-               return bch2_fs_ioctl(c, cmd, (void __user *) arg);
+               ret = bch2_fs_ioctl(c, cmd, (void __user *) arg);
+               break;
        }
+
+       return bch2_err_class(ret);
 }
 
 #ifdef CONFIG_COMPAT