1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_CHARDEV
5 #include "bcachefs_ioctl.h"
15 #include <linux/cdev.h>
16 #include <linux/device.h>
18 #include <linux/ioctl.h>
19 #include <linux/major.h>
20 #include <linux/sched/task.h>
21 #include <linux/slab.h>
22 #include <linux/thread_with_file.h>
23 #include <linux/uaccess.h>
26 static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
28 return copy_to_user(to, from, n) ? -EFAULT : 0;
31 /* returns with ref on ca->ref */
32 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
37 if (flags & BCH_BY_INDEX) {
38 if (dev >= c->sb.nr_devices)
39 return ERR_PTR(-EINVAL);
42 ca = rcu_dereference(c->devs[dev]);
44 percpu_ref_get(&ca->ref);
48 return ERR_PTR(-EINVAL);
52 path = strndup_user((const char __user *)
53 (unsigned long) dev, PATH_MAX);
55 return ERR_CAST(path);
57 ca = bch2_dev_lookup(c, path);
65 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
67 struct bch_ioctl_assemble arg;
69 u64 *user_devs = NULL;
74 if (copy_from_user(&arg, user_arg, sizeof(arg)))
77 if (arg.flags || arg.pad)
80 user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
84 devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
86 if (copy_from_user(user_devs, user_arg->devs,
87 sizeof(u64) * arg.nr_devs))
90 for (i = 0; i < arg.nr_devs; i++) {
91 devs[i] = strndup_user((const char __user *)(unsigned long)
94 ret= PTR_ERR_OR_ZERO(devs[i]);
99 c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
100 ret = PTR_ERR_OR_ZERO(c);
105 for (i = 0; i < arg.nr_devs; i++)
111 static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
113 struct bch_ioctl_incremental arg;
117 if (copy_from_user(&arg, user_arg, sizeof(arg)))
120 if (arg.flags || arg.pad)
123 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
124 ret = PTR_ERR_OR_ZERO(path);
128 err = bch2_fs_open_incremental(path);
132 pr_err("Could not register bcachefs devices: %s", err);
141 struct thread_with_stdio thr;
145 struct bch_opts opts;
148 static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
150 struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
152 for (size_t i = 0; i < thr->nr_devs; i++)
158 static void bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
160 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
161 struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
163 thr->thr.thr.ret = PTR_ERR_OR_ZERO(c);
164 if (!thr->thr.thr.ret)
168 static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
170 struct bch_ioctl_fsck_offline arg;
171 struct fsck_thread *thr = NULL;
175 if (copy_from_user(&arg, user_arg, sizeof(arg)))
181 if (!capable(CAP_SYS_ADMIN))
184 if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
185 !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
186 !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
191 thr->opts = bch2_opts_empty();
192 thr->nr_devs = arg.nr_devs;
194 if (copy_from_user(devs, &user_arg->devs[0],
195 array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
200 for (size_t i = 0; i < arg.nr_devs; i++) {
201 thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
202 ret = PTR_ERR_OR_ZERO(thr->devs[i]);
208 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
210 ret = PTR_ERR_OR_ZERO(optstr) ?:
211 bch2_parse_mount_opts(NULL, &thr->opts, optstr);
218 opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
220 ret = run_thread_with_stdio(&thr->thr,
221 bch2_fsck_thread_exit,
222 bch2_fsck_offline_thread_fn);
226 bch2_fsck_thread_exit(&thr->thr);
227 pr_err("ret %s", bch2_err_str(ret));
233 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
239 case BCH_IOCTL_ASSEMBLE:
240 return bch2_ioctl_assemble(arg);
241 case BCH_IOCTL_INCREMENTAL:
242 return bch2_ioctl_incremental(arg);
244 case BCH_IOCTL_FSCK_OFFLINE: {
245 ret = bch2_ioctl_fsck_offline(arg);
254 ret = bch2_err_class(ret);
258 static long bch2_ioctl_query_uuid(struct bch_fs *c,
259 struct bch_ioctl_query_uuid __user *user_arg)
261 return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
262 sizeof(c->sb.user_uuid));
266 static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
268 if (!capable(CAP_SYS_ADMIN))
271 if (arg.flags || arg.pad)
274 return bch2_fs_start(c);
277 static long bch2_ioctl_stop(struct bch_fs *c)
279 if (!capable(CAP_SYS_ADMIN))
287 static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
292 if (!capable(CAP_SYS_ADMIN))
295 if (arg.flags || arg.pad)
298 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
299 ret = PTR_ERR_OR_ZERO(path);
303 ret = bch2_dev_add(c, path);
309 static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
313 if (!capable(CAP_SYS_ADMIN))
316 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
317 BCH_FORCE_IF_METADATA_LOST|
318 BCH_FORCE_IF_DEGRADED|
323 ca = bch2_device_lookup(c, arg.dev, arg.flags);
327 return bch2_dev_remove(c, ca, arg.flags);
330 static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
335 if (!capable(CAP_SYS_ADMIN))
338 if (arg.flags || arg.pad)
341 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
342 ret = PTR_ERR_OR_ZERO(path);
346 ret = bch2_dev_online(c, path);
351 static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
356 if (!capable(CAP_SYS_ADMIN))
359 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
360 BCH_FORCE_IF_METADATA_LOST|
361 BCH_FORCE_IF_DEGRADED|
366 ca = bch2_device_lookup(c, arg.dev, arg.flags);
370 ret = bch2_dev_offline(c, ca, arg.flags);
371 percpu_ref_put(&ca->ref);
375 static long bch2_ioctl_disk_set_state(struct bch_fs *c,
376 struct bch_ioctl_disk_set_state arg)
381 if (!capable(CAP_SYS_ADMIN))
384 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
385 BCH_FORCE_IF_METADATA_LOST|
386 BCH_FORCE_IF_DEGRADED|
388 arg.pad[0] || arg.pad[1] || arg.pad[2] ||
389 arg.new_state >= BCH_MEMBER_STATE_NR)
392 ca = bch2_device_lookup(c, arg.dev, arg.flags);
396 ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
398 bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
400 percpu_ref_put(&ca->ref);
404 struct bch_data_ctx {
405 struct thread_with_file thr;
408 struct bch_ioctl_data arg;
409 struct bch_move_stats stats;
412 static int bch2_data_thread(void *arg)
414 struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
416 ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
417 ctx->stats.data_type = U8_MAX;
421 static int bch2_data_job_release(struct inode *inode, struct file *file)
423 struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
425 thread_with_file_exit(&ctx->thr);
430 static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
431 size_t len, loff_t *ppos)
433 struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
434 struct bch_fs *c = ctx->c;
435 struct bch_ioctl_data_event e = {
436 .type = BCH_DATA_EVENT_PROGRESS,
437 .p.data_type = ctx->stats.data_type,
438 .p.btree_id = ctx->stats.pos.btree,
439 .p.pos = ctx->stats.pos.pos,
440 .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
441 .p.sectors_total = bch2_fs_usage_read_short(c).used,
447 return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
450 static const struct file_operations bcachefs_data_ops = {
451 .release = bch2_data_job_release,
452 .read = bch2_data_job_read,
456 static long bch2_ioctl_data(struct bch_fs *c,
457 struct bch_ioctl_data arg)
459 struct bch_data_ctx *ctx;
462 if (!capable(CAP_SYS_ADMIN))
465 if (arg.op >= BCH_DATA_OP_NR || arg.flags)
468 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
475 ret = run_thread_with_file(&ctx->thr,
483 static long bch2_ioctl_fs_usage(struct bch_fs *c,
484 struct bch_ioctl_fs_usage __user *user_arg)
486 struct bch_ioctl_fs_usage *arg = NULL;
487 struct bch_replicas_usage *dst_e, *dst_end;
488 struct bch_fs_usage_online *src;
489 u32 replica_entries_bytes;
493 if (!test_bit(BCH_FS_started, &c->flags))
496 if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
499 arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
503 src = bch2_fs_usage_read(c);
509 arg->capacity = c->capacity;
510 arg->used = bch2_fs_sectors_used(c, src);
511 arg->online_reserved = src->online_reserved;
513 for (i = 0; i < BCH_REPLICAS_MAX; i++)
514 arg->persistent_reserved[i] = src->u.persistent_reserved[i];
516 dst_e = arg->replicas;
517 dst_end = (void *) arg->replicas + replica_entries_bytes;
519 for (i = 0; i < c->replicas.nr; i++) {
520 struct bch_replicas_entry_v1 *src_e =
521 cpu_replicas_entry(&c->replicas, i);
523 /* check that we have enough space for one replicas entry */
524 if (dst_e + 1 > dst_end) {
529 dst_e->sectors = src->u.replicas[i];
532 /* recheck after setting nr_devs: */
533 if (replicas_usage_next(dst_e) > dst_end) {
538 memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
540 dst_e = replicas_usage_next(dst_e);
543 arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
545 percpu_up_read(&c->mark_lock);
551 ret = copy_to_user_errcode(user_arg, arg,
552 sizeof(*arg) + arg->replica_entries_bytes);
558 /* obsolete, didn't allow for new data types: */
559 static long bch2_ioctl_dev_usage(struct bch_fs *c,
560 struct bch_ioctl_dev_usage __user *user_arg)
562 struct bch_ioctl_dev_usage arg;
563 struct bch_dev_usage src;
567 if (!test_bit(BCH_FS_started, &c->flags))
570 if (copy_from_user(&arg, user_arg, sizeof(arg)))
573 if ((arg.flags & ~BCH_BY_INDEX) ||
579 ca = bch2_device_lookup(c, arg.dev, arg.flags);
583 src = bch2_dev_usage_read(ca);
585 arg.state = ca->mi.state;
586 arg.bucket_size = ca->mi.bucket_size;
587 arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
589 for (i = 0; i < BCH_DATA_NR; i++) {
590 arg.d[i].buckets = src.d[i].buckets;
591 arg.d[i].sectors = src.d[i].sectors;
592 arg.d[i].fragmented = src.d[i].fragmented;
595 percpu_ref_put(&ca->ref);
597 return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
600 static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
601 struct bch_ioctl_dev_usage_v2 __user *user_arg)
603 struct bch_ioctl_dev_usage_v2 arg;
604 struct bch_dev_usage src;
608 if (!test_bit(BCH_FS_started, &c->flags))
611 if (copy_from_user(&arg, user_arg, sizeof(arg)))
614 if ((arg.flags & ~BCH_BY_INDEX) ||
620 ca = bch2_device_lookup(c, arg.dev, arg.flags);
624 src = bch2_dev_usage_read(ca);
626 arg.state = ca->mi.state;
627 arg.bucket_size = ca->mi.bucket_size;
628 arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR);
629 arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
631 ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
635 for (unsigned i = 0; i < arg.nr_data_types; i++) {
636 struct bch_ioctl_dev_usage_type t = {
637 .buckets = src.d[i].buckets,
638 .sectors = src.d[i].sectors,
639 .fragmented = src.d[i].fragmented,
642 ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
647 percpu_ref_put(&ca->ref);
651 static long bch2_ioctl_read_super(struct bch_fs *c,
652 struct bch_ioctl_read_super arg)
654 struct bch_dev *ca = NULL;
658 if (!capable(CAP_SYS_ADMIN))
661 if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
665 mutex_lock(&c->sb_lock);
667 if (arg.flags & BCH_READ_DEV) {
668 ca = bch2_device_lookup(c, arg.dev, arg.flags);
680 if (vstruct_bytes(sb) > arg.size) {
685 ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
688 if (!IS_ERR_OR_NULL(ca))
689 percpu_ref_put(&ca->ref);
690 mutex_unlock(&c->sb_lock);
694 static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
695 struct bch_ioctl_disk_get_idx arg)
697 dev_t dev = huge_decode_dev(arg.dev);
699 if (!capable(CAP_SYS_ADMIN))
705 for_each_online_member(c, ca)
706 if (ca->dev == dev) {
707 percpu_ref_put(&ca->io_ref);
711 return -BCH_ERR_ENOENT_dev_idx_not_found;
714 static long bch2_ioctl_disk_resize(struct bch_fs *c,
715 struct bch_ioctl_disk_resize arg)
720 if (!capable(CAP_SYS_ADMIN))
723 if ((arg.flags & ~BCH_BY_INDEX) ||
727 ca = bch2_device_lookup(c, arg.dev, arg.flags);
731 ret = bch2_dev_resize(c, ca, arg.nbuckets);
733 percpu_ref_put(&ca->ref);
737 static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
738 struct bch_ioctl_disk_resize_journal arg)
743 if (!capable(CAP_SYS_ADMIN))
746 if ((arg.flags & ~BCH_BY_INDEX) ||
750 if (arg.nbuckets > U32_MAX)
753 ca = bch2_device_lookup(c, arg.dev, arg.flags);
757 ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
759 percpu_ref_put(&ca->ref);
763 static void bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio)
765 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
766 struct bch_fs *c = thr->c;
768 c->stdio_filter = current;
769 c->stdio = &thr->thr.stdio;
772 * XXX: can we figure out a way to do this without mucking with c->opts?
774 unsigned old_fix_errors = c->opts.fix_errors;
775 if (opt_defined(thr->opts, fix_errors))
776 c->opts.fix_errors = thr->opts.fix_errors;
778 c->opts.fix_errors = FSCK_FIX_ask;
781 set_bit(BCH_FS_fsck_running, &c->flags);
783 c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
784 int ret = bch2_run_online_recovery_passes(c);
786 clear_bit(BCH_FS_fsck_running, &c->flags);
790 c->stdio_filter = NULL;
791 c->opts.fix_errors = old_fix_errors;
793 up(&c->online_fsck_mutex);
797 static long bch2_ioctl_fsck_online(struct bch_fs *c,
798 struct bch_ioctl_fsck_online arg)
800 struct fsck_thread *thr = NULL;
806 if (!capable(CAP_SYS_ADMIN))
809 if (!bch2_ro_ref_tryget(c))
812 if (down_trylock(&c->online_fsck_mutex)) {
817 thr = kzalloc(sizeof(*thr), GFP_KERNEL);
824 thr->opts = bch2_opts_empty();
827 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
829 ret = PTR_ERR_OR_ZERO(optstr) ?:
830 bch2_parse_mount_opts(c, &thr->opts, optstr);
837 ret = run_thread_with_stdio(&thr->thr,
838 bch2_fsck_thread_exit,
839 bch2_fsck_online_thread_fn);
844 bch2_fsck_thread_exit(&thr->thr);
845 up(&c->online_fsck_mutex);
851 #define BCH_IOCTL(_name, _argtype) \
855 if (copy_from_user(&i, arg, sizeof(i))) \
857 ret = bch2_ioctl_##_name(c, i); \
861 long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
866 case BCH_IOCTL_QUERY_UUID:
867 return bch2_ioctl_query_uuid(c, arg);
868 case BCH_IOCTL_FS_USAGE:
869 return bch2_ioctl_fs_usage(c, arg);
870 case BCH_IOCTL_DEV_USAGE:
871 return bch2_ioctl_dev_usage(c, arg);
872 case BCH_IOCTL_DEV_USAGE_V2:
873 return bch2_ioctl_dev_usage_v2(c, arg);
875 case BCH_IOCTL_START:
876 BCH_IOCTL(start, struct bch_ioctl_start);
878 return bch2_ioctl_stop(c);
880 case BCH_IOCTL_READ_SUPER:
881 BCH_IOCTL(read_super, struct bch_ioctl_read_super);
882 case BCH_IOCTL_DISK_GET_IDX:
883 BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
886 if (!test_bit(BCH_FS_started, &c->flags))
890 case BCH_IOCTL_DISK_ADD:
891 BCH_IOCTL(disk_add, struct bch_ioctl_disk);
892 case BCH_IOCTL_DISK_REMOVE:
893 BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
894 case BCH_IOCTL_DISK_ONLINE:
895 BCH_IOCTL(disk_online, struct bch_ioctl_disk);
896 case BCH_IOCTL_DISK_OFFLINE:
897 BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
898 case BCH_IOCTL_DISK_SET_STATE:
899 BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
901 BCH_IOCTL(data, struct bch_ioctl_data);
902 case BCH_IOCTL_DISK_RESIZE:
903 BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
904 case BCH_IOCTL_DISK_RESIZE_JOURNAL:
905 BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
906 case BCH_IOCTL_FSCK_ONLINE:
907 BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
913 ret = bch2_err_class(ret);
917 static DEFINE_IDR(bch_chardev_minor);
919 static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
921 unsigned minor = iminor(file_inode(filp));
922 struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
923 void __user *arg = (void __user *) v;
926 ? bch2_fs_ioctl(c, cmd, arg)
927 : bch2_global_ioctl(cmd, arg);
930 static const struct file_operations bch_chardev_fops = {
931 .owner = THIS_MODULE,
932 .unlocked_ioctl = bch2_chardev_ioctl,
933 .open = nonseekable_open,
936 static int bch_chardev_major;
937 static struct class *bch_chardev_class;
938 static struct device *bch_chardev;
940 void bch2_fs_chardev_exit(struct bch_fs *c)
942 if (!IS_ERR_OR_NULL(c->chardev))
943 device_unregister(c->chardev);
945 idr_remove(&bch_chardev_minor, c->minor);
948 int bch2_fs_chardev_init(struct bch_fs *c)
950 c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
954 c->chardev = device_create(bch_chardev_class, NULL,
955 MKDEV(bch_chardev_major, c->minor), c,
956 "bcachefs%u-ctl", c->minor);
957 if (IS_ERR(c->chardev))
958 return PTR_ERR(c->chardev);
963 void bch2_chardev_exit(void)
965 if (!IS_ERR_OR_NULL(bch_chardev_class))
966 device_destroy(bch_chardev_class,
967 MKDEV(bch_chardev_major, U8_MAX));
968 if (!IS_ERR_OR_NULL(bch_chardev_class))
969 class_destroy(bch_chardev_class);
970 if (bch_chardev_major > 0)
971 unregister_chrdev(bch_chardev_major, "bcachefs");
974 int __init bch2_chardev_init(void)
976 bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
977 if (bch_chardev_major < 0)
978 return bch_chardev_major;
980 bch_chardev_class = class_create("bcachefs");
981 if (IS_ERR(bch_chardev_class))
982 return PTR_ERR(bch_chardev_class);
984 bch_chardev = device_create(bch_chardev_class, NULL,
985 MKDEV(bch_chardev_major, U8_MAX),
986 NULL, "bcachefs-ctl");
987 if (IS_ERR(bch_chardev))
988 return PTR_ERR(bch_chardev);
993 #endif /* NO_BCACHEFS_CHARDEV */