]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/chardev.c
Update bcachefs sources to 1a739db0b256 bcachefs; guard against overflow in btree...
[bcachefs-tools-debian] / libbcachefs / chardev.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_CHARDEV
3
4 #include "bcachefs.h"
5 #include "bcachefs_ioctl.h"
6 #include "buckets.h"
7 #include "chardev.h"
8 #include "journal.h"
9 #include "move.h"
10 #include "recovery.h"
11 #include "replicas.h"
12 #include "super.h"
13 #include "super-io.h"
14
15 #include <linux/anon_inodes.h>
16 #include <linux/cdev.h>
17 #include <linux/device.h>
18 #include <linux/file.h>
19 #include <linux/fs.h>
20 #include <linux/ioctl.h>
21 #include <linux/kthread.h>
22 #include <linux/major.h>
23 #include <linux/poll.h>
24 #include <linux/sched/task.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27
28 __must_check
29 static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
30 {
31         return copy_to_user(to, from, n) ? -EFAULT : 0;
32 }
33
34 struct thread_with_file {
35         struct task_struct      *task;
36         int                     ret;
37         bool                    done;
38 };
39
40 static void thread_with_file_exit(struct thread_with_file *thr)
41 {
42         if (thr->task) {
43                 kthread_stop(thr->task);
44                 put_task_struct(thr->task);
45         }
46 }
47
48 __printf(4, 0)
49 static int run_thread_with_file(struct thread_with_file *thr,
50                                 const struct file_operations *fops,
51                                 int (*fn)(void *), const char *fmt, ...)
52 {
53         va_list args;
54         struct file *file = NULL;
55         int ret, fd = -1;
56         struct printbuf name = PRINTBUF;
57         unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
58
59         va_start(args, fmt);
60         prt_vprintf(&name, fmt, args);
61         va_end(args);
62
63         thr->ret = 0;
64         thr->task = kthread_create(fn, thr, name.buf);
65         ret = PTR_ERR_OR_ZERO(thr->task);
66         if (ret)
67                 goto err;
68
69         ret = get_unused_fd_flags(fd_flags);
70         if (ret < 0)
71                 goto err_stop_task;
72         fd = ret;
73
74         file = anon_inode_getfile(name.buf, fops, thr, fd_flags);
75         ret = PTR_ERR_OR_ZERO(file);
76         if (ret)
77                 goto err_put_fd;
78
79         fd_install(fd, file);
80         get_task_struct(thr->task);
81         wake_up_process(thr->task);
82         printbuf_exit(&name);
83         return fd;
84 err_put_fd:
85         put_unused_fd(fd);
86 err_stop_task:
87         kthread_stop(thr->task);
88 err:
89         printbuf_exit(&name);
90         return ret;
91 }
92
93 /* returns with ref on ca->ref */
94 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
95                                           unsigned flags)
96 {
97         struct bch_dev *ca;
98
99         if (flags & BCH_BY_INDEX) {
100                 if (dev >= c->sb.nr_devices)
101                         return ERR_PTR(-EINVAL);
102
103                 rcu_read_lock();
104                 ca = rcu_dereference(c->devs[dev]);
105                 if (ca)
106                         percpu_ref_get(&ca->ref);
107                 rcu_read_unlock();
108
109                 if (!ca)
110                         return ERR_PTR(-EINVAL);
111         } else {
112                 char *path;
113
114                 path = strndup_user((const char __user *)
115                                     (unsigned long) dev, PATH_MAX);
116                 if (IS_ERR(path))
117                         return ERR_CAST(path);
118
119                 ca = bch2_dev_lookup(c, path);
120                 kfree(path);
121         }
122
123         return ca;
124 }
125
126 #if 0
127 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
128 {
129         struct bch_ioctl_assemble arg;
130         struct bch_fs *c;
131         u64 *user_devs = NULL;
132         char **devs = NULL;
133         unsigned i;
134         int ret = -EFAULT;
135
136         if (copy_from_user(&arg, user_arg, sizeof(arg)))
137                 return -EFAULT;
138
139         if (arg.flags || arg.pad)
140                 return -EINVAL;
141
142         user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
143         if (!user_devs)
144                 return -ENOMEM;
145
146         devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
147
148         if (copy_from_user(user_devs, user_arg->devs,
149                            sizeof(u64) * arg.nr_devs))
150                 goto err;
151
152         for (i = 0; i < arg.nr_devs; i++) {
153                 devs[i] = strndup_user((const char __user *)(unsigned long)
154                                        user_devs[i],
155                                        PATH_MAX);
156                 ret= PTR_ERR_OR_ZERO(devs[i]);
157                 if (ret)
158                         goto err;
159         }
160
161         c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
162         ret = PTR_ERR_OR_ZERO(c);
163         if (!ret)
164                 closure_put(&c->cl);
165 err:
166         if (devs)
167                 for (i = 0; i < arg.nr_devs; i++)
168                         kfree(devs[i]);
169         kfree(devs);
170         return ret;
171 }
172
173 static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
174 {
175         struct bch_ioctl_incremental arg;
176         const char *err;
177         char *path;
178
179         if (copy_from_user(&arg, user_arg, sizeof(arg)))
180                 return -EFAULT;
181
182         if (arg.flags || arg.pad)
183                 return -EINVAL;
184
185         path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
186         ret = PTR_ERR_OR_ZERO(path);
187         if (ret)
188                 return ret;
189
190         err = bch2_fs_open_incremental(path);
191         kfree(path);
192
193         if (err) {
194                 pr_err("Could not register bcachefs devices: %s", err);
195                 return -EINVAL;
196         }
197
198         return 0;
199 }
200 #endif
201
202 struct fsck_thread {
203         struct thread_with_file thr;
204         struct printbuf         buf;
205         struct bch_fs           *c;
206         char                    **devs;
207         size_t                  nr_devs;
208         struct bch_opts         opts;
209
210         struct log_output       output;
211         DARRAY(char)            output2;
212 };
213
214 static void bch2_fsck_thread_free(struct fsck_thread *thr)
215 {
216         thread_with_file_exit(&thr->thr);
217         if (thr->devs)
218                 for (size_t i = 0; i < thr->nr_devs; i++)
219                         kfree(thr->devs[i]);
220         darray_exit(&thr->output2);
221         printbuf_exit(&thr->output.buf);
222         kfree(thr->devs);
223         kfree(thr);
224 }
225
226 static int bch2_fsck_thread_release(struct inode *inode, struct file *file)
227 {
228         struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
229
230         bch2_fsck_thread_free(thr);
231         return 0;
232 }
233
234 static bool fsck_thread_ready(struct fsck_thread *thr)
235 {
236         return thr->output.buf.pos ||
237                 thr->output2.nr ||
238                 thr->thr.done;
239 }
240
241 static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf,
242                                      size_t len, loff_t *ppos)
243 {
244         struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
245         size_t copied = 0, b;
246         int ret = 0;
247
248         if ((file->f_flags & O_NONBLOCK) &&
249             !fsck_thread_ready(thr))
250                 return -EAGAIN;
251
252         ret = wait_event_interruptible(thr->output.wait,
253                         fsck_thread_ready(thr));
254         if (ret)
255                 return ret;
256
257         if (thr->thr.done)
258                 return 0;
259
260         while (len) {
261                 ret = darray_make_room(&thr->output2, thr->output.buf.pos);
262                 if (ret)
263                         break;
264
265                 spin_lock_irq(&thr->output.lock);
266                 b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos);
267
268                 memcpy(&darray_top(thr->output2), thr->output.buf.buf, b);
269                 memmove(thr->output.buf.buf,
270                         thr->output.buf.buf + b,
271                         thr->output.buf.pos - b);
272
273                 thr->output2.nr += b;
274                 thr->output.buf.pos -= b;
275                 spin_unlock_irq(&thr->output.lock);
276
277                 b = min(len, thr->output2.nr);
278                 if (!b)
279                         break;
280
281                 b -= copy_to_user(buf, thr->output2.data, b);
282                 if (!b) {
283                         ret = -EFAULT;
284                         break;
285                 }
286
287                 copied  += b;
288                 buf     += b;
289                 len     -= b;
290
291                 memmove(thr->output2.data,
292                         thr->output2.data + b,
293                         thr->output2.nr - b);
294                 thr->output2.nr -= b;
295         }
296
297         return copied ?: ret;
298 }
299
300 static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait)
301 {
302         struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
303
304         poll_wait(file, &thr->output.wait, wait);
305
306         return fsck_thread_ready(thr)
307                 ? EPOLLIN|EPOLLHUP
308                 : 0;
309 }
310
311 static const struct file_operations fsck_thread_ops = {
312         .release        = bch2_fsck_thread_release,
313         .read           = bch2_fsck_thread_read,
314         .poll           = bch2_fsck_thread_poll,
315         .llseek         = no_llseek,
316 };
317
318 static int bch2_fsck_offline_thread_fn(void *arg)
319 {
320         struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
321         struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
322
323         thr->thr.ret = PTR_ERR_OR_ZERO(c);
324         if (!thr->thr.ret)
325                 bch2_fs_stop(c);
326
327         thr->thr.done = true;
328         wake_up(&thr->output.wait);
329         return 0;
330 }
331
332 static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
333 {
334         struct bch_ioctl_fsck_offline arg;
335         struct fsck_thread *thr = NULL;
336         u64 *devs = NULL;
337         long ret = 0;
338
339         if (copy_from_user(&arg, user_arg, sizeof(arg)))
340                 return -EFAULT;
341
342         if (arg.flags)
343                 return -EINVAL;
344
345         if (!capable(CAP_SYS_ADMIN))
346                 return -EPERM;
347
348         if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
349             !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
350             !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
351                 ret = -ENOMEM;
352                 goto err;
353         }
354
355         thr->opts = bch2_opts_empty();
356         thr->nr_devs = arg.nr_devs;
357         thr->output.buf = PRINTBUF;
358         thr->output.buf.atomic++;
359         spin_lock_init(&thr->output.lock);
360         init_waitqueue_head(&thr->output.wait);
361         darray_init(&thr->output2);
362
363         if (copy_from_user(devs, &user_arg->devs[0], sizeof(user_arg->devs[0]) * arg.nr_devs)) {
364                 ret = -EINVAL;
365                 goto err;
366         }
367
368         for (size_t i = 0; i < arg.nr_devs; i++) {
369                 thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
370                 ret = PTR_ERR_OR_ZERO(thr->devs[i]);
371                 if (ret)
372                         goto err;
373         }
374
375         if (arg.opts) {
376                 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
377
378                 ret =   PTR_ERR_OR_ZERO(optstr) ?:
379                         bch2_parse_mount_opts(NULL, &thr->opts, optstr);
380                 kfree(optstr);
381
382                 if (ret)
383                         goto err;
384         }
385
386         opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output);
387
388         ret = run_thread_with_file(&thr->thr,
389                                    &fsck_thread_ops,
390                                    bch2_fsck_offline_thread_fn,
391                                    "bch-fsck");
392 err:
393         if (ret < 0) {
394                 if (thr)
395                         bch2_fsck_thread_free(thr);
396                 pr_err("ret %s", bch2_err_str(ret));
397         }
398         kfree(devs);
399         return ret;
400 }
401
402 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
403 {
404         long ret;
405
406         switch (cmd) {
407 #if 0
408         case BCH_IOCTL_ASSEMBLE:
409                 return bch2_ioctl_assemble(arg);
410         case BCH_IOCTL_INCREMENTAL:
411                 return bch2_ioctl_incremental(arg);
412 #endif
413         case BCH_IOCTL_FSCK_OFFLINE: {
414                 ret = bch2_ioctl_fsck_offline(arg);
415                 break;
416         }
417         default:
418                 ret = -ENOTTY;
419                 break;
420         }
421
422         if (ret < 0)
423                 ret = bch2_err_class(ret);
424         return ret;
425 }
426
427 static long bch2_ioctl_query_uuid(struct bch_fs *c,
428                         struct bch_ioctl_query_uuid __user *user_arg)
429 {
430         return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
431                                     sizeof(c->sb.user_uuid));
432 }
433
434 #if 0
435 static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
436 {
437         if (!capable(CAP_SYS_ADMIN))
438                 return -EPERM;
439
440         if (arg.flags || arg.pad)
441                 return -EINVAL;
442
443         return bch2_fs_start(c);
444 }
445
446 static long bch2_ioctl_stop(struct bch_fs *c)
447 {
448         if (!capable(CAP_SYS_ADMIN))
449                 return -EPERM;
450
451         bch2_fs_stop(c);
452         return 0;
453 }
454 #endif
455
456 static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
457 {
458         char *path;
459         int ret;
460
461         if (!capable(CAP_SYS_ADMIN))
462                 return -EPERM;
463
464         if (arg.flags || arg.pad)
465                 return -EINVAL;
466
467         path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
468         ret = PTR_ERR_OR_ZERO(path);
469         if (ret)
470                 return ret;
471
472         ret = bch2_dev_add(c, path);
473         kfree(path);
474
475         return ret;
476 }
477
478 static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
479 {
480         struct bch_dev *ca;
481
482         if (!capable(CAP_SYS_ADMIN))
483                 return -EPERM;
484
485         if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
486                            BCH_FORCE_IF_METADATA_LOST|
487                            BCH_FORCE_IF_DEGRADED|
488                            BCH_BY_INDEX)) ||
489             arg.pad)
490                 return -EINVAL;
491
492         ca = bch2_device_lookup(c, arg.dev, arg.flags);
493         if (IS_ERR(ca))
494                 return PTR_ERR(ca);
495
496         return bch2_dev_remove(c, ca, arg.flags);
497 }
498
499 static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
500 {
501         char *path;
502         int ret;
503
504         if (!capable(CAP_SYS_ADMIN))
505                 return -EPERM;
506
507         if (arg.flags || arg.pad)
508                 return -EINVAL;
509
510         path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
511         ret = PTR_ERR_OR_ZERO(path);
512         if (ret)
513                 return ret;
514
515         ret = bch2_dev_online(c, path);
516         kfree(path);
517         return ret;
518 }
519
520 static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
521 {
522         struct bch_dev *ca;
523         int ret;
524
525         if (!capable(CAP_SYS_ADMIN))
526                 return -EPERM;
527
528         if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
529                            BCH_FORCE_IF_METADATA_LOST|
530                            BCH_FORCE_IF_DEGRADED|
531                            BCH_BY_INDEX)) ||
532             arg.pad)
533                 return -EINVAL;
534
535         ca = bch2_device_lookup(c, arg.dev, arg.flags);
536         if (IS_ERR(ca))
537                 return PTR_ERR(ca);
538
539         ret = bch2_dev_offline(c, ca, arg.flags);
540         percpu_ref_put(&ca->ref);
541         return ret;
542 }
543
544 static long bch2_ioctl_disk_set_state(struct bch_fs *c,
545                         struct bch_ioctl_disk_set_state arg)
546 {
547         struct bch_dev *ca;
548         int ret;
549
550         if (!capable(CAP_SYS_ADMIN))
551                 return -EPERM;
552
553         if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
554                            BCH_FORCE_IF_METADATA_LOST|
555                            BCH_FORCE_IF_DEGRADED|
556                            BCH_BY_INDEX)) ||
557             arg.pad[0] || arg.pad[1] || arg.pad[2] ||
558             arg.new_state >= BCH_MEMBER_STATE_NR)
559                 return -EINVAL;
560
561         ca = bch2_device_lookup(c, arg.dev, arg.flags);
562         if (IS_ERR(ca))
563                 return PTR_ERR(ca);
564
565         ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
566         if (ret)
567                 bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
568
569         percpu_ref_put(&ca->ref);
570         return ret;
571 }
572
573 struct bch_data_ctx {
574         struct thread_with_file         thr;
575
576         struct bch_fs                   *c;
577         struct bch_ioctl_data           arg;
578         struct bch_move_stats           stats;
579 };
580
581 static int bch2_data_thread(void *arg)
582 {
583         struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
584
585         ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
586         ctx->stats.data_type = U8_MAX;
587         return 0;
588 }
589
590 static int bch2_data_job_release(struct inode *inode, struct file *file)
591 {
592         struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
593
594         thread_with_file_exit(&ctx->thr);
595         kfree(ctx);
596         return 0;
597 }
598
599 static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
600                                   size_t len, loff_t *ppos)
601 {
602         struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
603         struct bch_fs *c = ctx->c;
604         struct bch_ioctl_data_event e = {
605                 .type                   = BCH_DATA_EVENT_PROGRESS,
606                 .p.data_type            = ctx->stats.data_type,
607                 .p.btree_id             = ctx->stats.pos.btree,
608                 .p.pos                  = ctx->stats.pos.pos,
609                 .p.sectors_done         = atomic64_read(&ctx->stats.sectors_seen),
610                 .p.sectors_total        = bch2_fs_usage_read_short(c).used,
611         };
612
613         if (len < sizeof(e))
614                 return -EINVAL;
615
616         return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
617 }
618
619 static const struct file_operations bcachefs_data_ops = {
620         .release        = bch2_data_job_release,
621         .read           = bch2_data_job_read,
622         .llseek         = no_llseek,
623 };
624
625 static long bch2_ioctl_data(struct bch_fs *c,
626                             struct bch_ioctl_data arg)
627 {
628         struct bch_data_ctx *ctx;
629         int ret;
630
631         if (!capable(CAP_SYS_ADMIN))
632                 return -EPERM;
633
634         if (arg.op >= BCH_DATA_OP_NR || arg.flags)
635                 return -EINVAL;
636
637         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
638         if (!ctx)
639                 return -ENOMEM;
640
641         ctx->c = c;
642         ctx->arg = arg;
643
644         ret = run_thread_with_file(&ctx->thr,
645                                    &bcachefs_data_ops,
646                                    bch2_data_thread,
647                                    "bch-data/%s", c->name);
648         if (ret < 0)
649                 kfree(ctx);
650         return ret;
651 }
652
653 static long bch2_ioctl_fs_usage(struct bch_fs *c,
654                                 struct bch_ioctl_fs_usage __user *user_arg)
655 {
656         struct bch_ioctl_fs_usage *arg = NULL;
657         struct bch_replicas_usage *dst_e, *dst_end;
658         struct bch_fs_usage_online *src;
659         u32 replica_entries_bytes;
660         unsigned i;
661         int ret = 0;
662
663         if (!test_bit(BCH_FS_started, &c->flags))
664                 return -EINVAL;
665
666         if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
667                 return -EFAULT;
668
669         arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
670         if (!arg)
671                 return -ENOMEM;
672
673         src = bch2_fs_usage_read(c);
674         if (!src) {
675                 ret = -ENOMEM;
676                 goto err;
677         }
678
679         arg->capacity           = c->capacity;
680         arg->used               = bch2_fs_sectors_used(c, src);
681         arg->online_reserved    = src->online_reserved;
682
683         for (i = 0; i < BCH_REPLICAS_MAX; i++)
684                 arg->persistent_reserved[i] = src->u.persistent_reserved[i];
685
686         dst_e   = arg->replicas;
687         dst_end = (void *) arg->replicas + replica_entries_bytes;
688
689         for (i = 0; i < c->replicas.nr; i++) {
690                 struct bch_replicas_entry_v1 *src_e =
691                         cpu_replicas_entry(&c->replicas, i);
692
693                 /* check that we have enough space for one replicas entry */
694                 if (dst_e + 1 > dst_end) {
695                         ret = -ERANGE;
696                         break;
697                 }
698
699                 dst_e->sectors          = src->u.replicas[i];
700                 dst_e->r                = *src_e;
701
702                 /* recheck after setting nr_devs: */
703                 if (replicas_usage_next(dst_e) > dst_end) {
704                         ret = -ERANGE;
705                         break;
706                 }
707
708                 memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
709
710                 dst_e = replicas_usage_next(dst_e);
711         }
712
713         arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
714
715         percpu_up_read(&c->mark_lock);
716         kfree(src);
717
718         if (ret)
719                 goto err;
720
721         ret = copy_to_user_errcode(user_arg, arg,
722                         sizeof(*arg) + arg->replica_entries_bytes);
723 err:
724         kfree(arg);
725         return ret;
726 }
727
728 /* obsolete, didn't allow for new data types: */
729 static long bch2_ioctl_dev_usage(struct bch_fs *c,
730                                  struct bch_ioctl_dev_usage __user *user_arg)
731 {
732         struct bch_ioctl_dev_usage arg;
733         struct bch_dev_usage src;
734         struct bch_dev *ca;
735         unsigned i;
736
737         if (!test_bit(BCH_FS_started, &c->flags))
738                 return -EINVAL;
739
740         if (copy_from_user(&arg, user_arg, sizeof(arg)))
741                 return -EFAULT;
742
743         if ((arg.flags & ~BCH_BY_INDEX) ||
744             arg.pad[0] ||
745             arg.pad[1] ||
746             arg.pad[2])
747                 return -EINVAL;
748
749         ca = bch2_device_lookup(c, arg.dev, arg.flags);
750         if (IS_ERR(ca))
751                 return PTR_ERR(ca);
752
753         src = bch2_dev_usage_read(ca);
754
755         arg.state               = ca->mi.state;
756         arg.bucket_size         = ca->mi.bucket_size;
757         arg.nr_buckets          = ca->mi.nbuckets - ca->mi.first_bucket;
758
759         for (i = 0; i < BCH_DATA_NR; i++) {
760                 arg.d[i].buckets        = src.d[i].buckets;
761                 arg.d[i].sectors        = src.d[i].sectors;
762                 arg.d[i].fragmented     = src.d[i].fragmented;
763         }
764
765         percpu_ref_put(&ca->ref);
766
767         return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
768 }
769
770 static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
771                                  struct bch_ioctl_dev_usage_v2 __user *user_arg)
772 {
773         struct bch_ioctl_dev_usage_v2 arg;
774         struct bch_dev_usage src;
775         struct bch_dev *ca;
776         int ret = 0;
777
778         if (!test_bit(BCH_FS_started, &c->flags))
779                 return -EINVAL;
780
781         if (copy_from_user(&arg, user_arg, sizeof(arg)))
782                 return -EFAULT;
783
784         if ((arg.flags & ~BCH_BY_INDEX) ||
785             arg.pad[0] ||
786             arg.pad[1] ||
787             arg.pad[2])
788                 return -EINVAL;
789
790         ca = bch2_device_lookup(c, arg.dev, arg.flags);
791         if (IS_ERR(ca))
792                 return PTR_ERR(ca);
793
794         src = bch2_dev_usage_read(ca);
795
796         arg.state               = ca->mi.state;
797         arg.bucket_size         = ca->mi.bucket_size;
798         arg.nr_data_types       = min(arg.nr_data_types, BCH_DATA_NR);
799         arg.nr_buckets          = ca->mi.nbuckets - ca->mi.first_bucket;
800
801         ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
802         if (ret)
803                 goto err;
804
805         for (unsigned i = 0; i < arg.nr_data_types; i++) {
806                 struct bch_ioctl_dev_usage_type t = {
807                         .buckets        = src.d[i].buckets,
808                         .sectors        = src.d[i].sectors,
809                         .fragmented     = src.d[i].fragmented,
810                 };
811
812                 ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
813                 if (ret)
814                         goto err;
815         }
816 err:
817         percpu_ref_put(&ca->ref);
818         return ret;
819 }
820
821 static long bch2_ioctl_read_super(struct bch_fs *c,
822                                   struct bch_ioctl_read_super arg)
823 {
824         struct bch_dev *ca = NULL;
825         struct bch_sb *sb;
826         int ret = 0;
827
828         if (!capable(CAP_SYS_ADMIN))
829                 return -EPERM;
830
831         if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
832             arg.pad)
833                 return -EINVAL;
834
835         mutex_lock(&c->sb_lock);
836
837         if (arg.flags & BCH_READ_DEV) {
838                 ca = bch2_device_lookup(c, arg.dev, arg.flags);
839
840                 if (IS_ERR(ca)) {
841                         ret = PTR_ERR(ca);
842                         goto err;
843                 }
844
845                 sb = ca->disk_sb.sb;
846         } else {
847                 sb = c->disk_sb.sb;
848         }
849
850         if (vstruct_bytes(sb) > arg.size) {
851                 ret = -ERANGE;
852                 goto err;
853         }
854
855         ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
856                                    vstruct_bytes(sb));
857 err:
858         if (!IS_ERR_OR_NULL(ca))
859                 percpu_ref_put(&ca->ref);
860         mutex_unlock(&c->sb_lock);
861         return ret;
862 }
863
864 static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
865                                     struct bch_ioctl_disk_get_idx arg)
866 {
867         dev_t dev = huge_decode_dev(arg.dev);
868
869         if (!capable(CAP_SYS_ADMIN))
870                 return -EPERM;
871
872         if (!dev)
873                 return -EINVAL;
874
875         for_each_online_member(c, ca)
876                 if (ca->dev == dev) {
877                         percpu_ref_put(&ca->io_ref);
878                         return ca->dev_idx;
879                 }
880
881         return -BCH_ERR_ENOENT_dev_idx_not_found;
882 }
883
884 static long bch2_ioctl_disk_resize(struct bch_fs *c,
885                                    struct bch_ioctl_disk_resize arg)
886 {
887         struct bch_dev *ca;
888         int ret;
889
890         if (!capable(CAP_SYS_ADMIN))
891                 return -EPERM;
892
893         if ((arg.flags & ~BCH_BY_INDEX) ||
894             arg.pad)
895                 return -EINVAL;
896
897         ca = bch2_device_lookup(c, arg.dev, arg.flags);
898         if (IS_ERR(ca))
899                 return PTR_ERR(ca);
900
901         ret = bch2_dev_resize(c, ca, arg.nbuckets);
902
903         percpu_ref_put(&ca->ref);
904         return ret;
905 }
906
907 static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
908                                    struct bch_ioctl_disk_resize_journal arg)
909 {
910         struct bch_dev *ca;
911         int ret;
912
913         if (!capable(CAP_SYS_ADMIN))
914                 return -EPERM;
915
916         if ((arg.flags & ~BCH_BY_INDEX) ||
917             arg.pad)
918                 return -EINVAL;
919
920         if (arg.nbuckets > U32_MAX)
921                 return -EINVAL;
922
923         ca = bch2_device_lookup(c, arg.dev, arg.flags);
924         if (IS_ERR(ca))
925                 return PTR_ERR(ca);
926
927         ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
928
929         percpu_ref_put(&ca->ref);
930         return ret;
931 }
932
933 static int bch2_fsck_online_thread_fn(void *arg)
934 {
935         struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
936         struct bch_fs *c = thr->c;
937
938         c->output_filter = current;
939         c->output = &thr->output;
940
941         /*
942          * XXX: can we figure out a way to do this without mucking with c->opts?
943          */
944         if (opt_defined(thr->opts, fix_errors))
945                 c->opts.fix_errors = thr->opts.fix_errors;
946         c->opts.fsck = true;
947
948         c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
949         bch2_run_online_recovery_passes(c);
950
951         c->output = NULL;
952         c->output_filter = NULL;
953
954         thr->thr.done = true;
955         wake_up(&thr->output.wait);
956
957         up(&c->online_fsck_mutex);
958         bch2_ro_ref_put(c);
959         return 0;
960 }
961
962 static long bch2_ioctl_fsck_online(struct bch_fs *c,
963                                    struct bch_ioctl_fsck_online arg)
964 {
965         struct fsck_thread *thr = NULL;
966         long ret = 0;
967
968         if (arg.flags)
969                 return -EINVAL;
970
971         if (!capable(CAP_SYS_ADMIN))
972                 return -EPERM;
973
974         if (!bch2_ro_ref_tryget(c))
975                 return -EROFS;
976
977         if (down_trylock(&c->online_fsck_mutex)) {
978                 bch2_ro_ref_put(c);
979                 return -EAGAIN;
980         }
981
982         thr = kzalloc(sizeof(*thr), GFP_KERNEL);
983         if (!thr) {
984                 ret = -ENOMEM;
985                 goto err;
986         }
987
988         thr->c = c;
989         thr->opts = bch2_opts_empty();
990         thr->output.buf = PRINTBUF;
991         thr->output.buf.atomic++;
992         spin_lock_init(&thr->output.lock);
993         init_waitqueue_head(&thr->output.wait);
994         darray_init(&thr->output2);
995
996         if (arg.opts) {
997                 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
998
999                 ret =   PTR_ERR_OR_ZERO(optstr) ?:
1000                         bch2_parse_mount_opts(c, &thr->opts, optstr);
1001                 kfree(optstr);
1002
1003                 if (ret)
1004                         goto err;
1005         }
1006
1007         ret = run_thread_with_file(&thr->thr,
1008                                    &fsck_thread_ops,
1009                                    bch2_fsck_online_thread_fn,
1010                                    "bch-fsck");
1011 err:
1012         if (ret < 0) {
1013                 bch_err_fn(c, ret);
1014                 if (thr)
1015                         bch2_fsck_thread_free(thr);
1016                 up(&c->online_fsck_mutex);
1017                 bch2_ro_ref_put(c);
1018         }
1019         return ret;
1020 }
1021
1022 #define BCH_IOCTL(_name, _argtype)                                      \
1023 do {                                                                    \
1024         _argtype i;                                                     \
1025                                                                         \
1026         if (copy_from_user(&i, arg, sizeof(i)))                         \
1027                 return -EFAULT;                                         \
1028         ret = bch2_ioctl_##_name(c, i);                                 \
1029         goto out;                                                       \
1030 } while (0)
1031
1032 long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
1033 {
1034         long ret;
1035
1036         switch (cmd) {
1037         case BCH_IOCTL_QUERY_UUID:
1038                 return bch2_ioctl_query_uuid(c, arg);
1039         case BCH_IOCTL_FS_USAGE:
1040                 return bch2_ioctl_fs_usage(c, arg);
1041         case BCH_IOCTL_DEV_USAGE:
1042                 return bch2_ioctl_dev_usage(c, arg);
1043         case BCH_IOCTL_DEV_USAGE_V2:
1044                 return bch2_ioctl_dev_usage_v2(c, arg);
1045 #if 0
1046         case BCH_IOCTL_START:
1047                 BCH_IOCTL(start, struct bch_ioctl_start);
1048         case BCH_IOCTL_STOP:
1049                 return bch2_ioctl_stop(c);
1050 #endif
1051         case BCH_IOCTL_READ_SUPER:
1052                 BCH_IOCTL(read_super, struct bch_ioctl_read_super);
1053         case BCH_IOCTL_DISK_GET_IDX:
1054                 BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
1055         }
1056
1057         if (!test_bit(BCH_FS_started, &c->flags))
1058                 return -EINVAL;
1059
1060         switch (cmd) {
1061         case BCH_IOCTL_DISK_ADD:
1062                 BCH_IOCTL(disk_add, struct bch_ioctl_disk);
1063         case BCH_IOCTL_DISK_REMOVE:
1064                 BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
1065         case BCH_IOCTL_DISK_ONLINE:
1066                 BCH_IOCTL(disk_online, struct bch_ioctl_disk);
1067         case BCH_IOCTL_DISK_OFFLINE:
1068                 BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
1069         case BCH_IOCTL_DISK_SET_STATE:
1070                 BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
1071         case BCH_IOCTL_DATA:
1072                 BCH_IOCTL(data, struct bch_ioctl_data);
1073         case BCH_IOCTL_DISK_RESIZE:
1074                 BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
1075         case BCH_IOCTL_DISK_RESIZE_JOURNAL:
1076                 BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
1077         case BCH_IOCTL_FSCK_ONLINE:
1078                 BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
1079         default:
1080                 return -ENOTTY;
1081         }
1082 out:
1083         if (ret < 0)
1084                 ret = bch2_err_class(ret);
1085         return ret;
1086 }
1087
1088 static DEFINE_IDR(bch_chardev_minor);
1089
1090 static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
1091 {
1092         unsigned minor = iminor(file_inode(filp));
1093         struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
1094         void __user *arg = (void __user *) v;
1095
1096         return c
1097                 ? bch2_fs_ioctl(c, cmd, arg)
1098                 : bch2_global_ioctl(cmd, arg);
1099 }
1100
1101 static const struct file_operations bch_chardev_fops = {
1102         .owner          = THIS_MODULE,
1103         .unlocked_ioctl = bch2_chardev_ioctl,
1104         .open           = nonseekable_open,
1105 };
1106
1107 static int bch_chardev_major;
1108 static struct class *bch_chardev_class;
1109 static struct device *bch_chardev;
1110
1111 void bch2_fs_chardev_exit(struct bch_fs *c)
1112 {
1113         if (!IS_ERR_OR_NULL(c->chardev))
1114                 device_unregister(c->chardev);
1115         if (c->minor >= 0)
1116                 idr_remove(&bch_chardev_minor, c->minor);
1117 }
1118
1119 int bch2_fs_chardev_init(struct bch_fs *c)
1120 {
1121         c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
1122         if (c->minor < 0)
1123                 return c->minor;
1124
1125         c->chardev = device_create(bch_chardev_class, NULL,
1126                                    MKDEV(bch_chardev_major, c->minor), c,
1127                                    "bcachefs%u-ctl", c->minor);
1128         if (IS_ERR(c->chardev))
1129                 return PTR_ERR(c->chardev);
1130
1131         return 0;
1132 }
1133
1134 void bch2_chardev_exit(void)
1135 {
1136         if (!IS_ERR_OR_NULL(bch_chardev_class))
1137                 device_destroy(bch_chardev_class,
1138                                MKDEV(bch_chardev_major, U8_MAX));
1139         if (!IS_ERR_OR_NULL(bch_chardev_class))
1140                 class_destroy(bch_chardev_class);
1141         if (bch_chardev_major > 0)
1142                 unregister_chrdev(bch_chardev_major, "bcachefs");
1143 }
1144
1145 int __init bch2_chardev_init(void)
1146 {
1147         bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
1148         if (bch_chardev_major < 0)
1149                 return bch_chardev_major;
1150
1151         bch_chardev_class = class_create("bcachefs");
1152         if (IS_ERR(bch_chardev_class))
1153                 return PTR_ERR(bch_chardev_class);
1154
1155         bch_chardev = device_create(bch_chardev_class, NULL,
1156                                     MKDEV(bch_chardev_major, U8_MAX),
1157                                     NULL, "bcachefs-ctl");
1158         if (IS_ERR(bch_chardev))
1159                 return PTR_ERR(bch_chardev);
1160
1161         return 0;
1162 }
1163
1164 #endif /* NO_BCACHEFS_CHARDEV */