]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/chardev.c
Update bcachefs sources to 2a6125decb43 bcachefs: bch_sb_field_downgrade
[bcachefs-tools-debian] / libbcachefs / chardev.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_CHARDEV
3
4 #include "bcachefs.h"
5 #include "bcachefs_ioctl.h"
6 #include "buckets.h"
7 #include "chardev.h"
8 #include "journal.h"
9 #include "move.h"
10 #include "recovery.h"
11 #include "replicas.h"
12 #include "super.h"
13 #include "super-io.h"
14
15 #include <linux/anon_inodes.h>
16 #include <linux/cdev.h>
17 #include <linux/device.h>
18 #include <linux/file.h>
19 #include <linux/fs.h>
20 #include <linux/ioctl.h>
21 #include <linux/kthread.h>
22 #include <linux/major.h>
23 #include <linux/poll.h>
24 #include <linux/sched/task.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27
28 __must_check
29 static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
30 {
31         return copy_to_user(to, from, n) ? -EFAULT : 0;
32 }
33
34 struct thread_with_file {
35         struct task_struct      *task;
36         int                     ret;
37         bool                    done;
38 };
39
40 static void thread_with_file_exit(struct thread_with_file *thr)
41 {
42         if (thr->task) {
43                 kthread_stop(thr->task);
44                 put_task_struct(thr->task);
45         }
46 }
47
48 __printf(4, 0)
49 static int run_thread_with_file(struct thread_with_file *thr,
50                                 const struct file_operations *fops,
51                                 int (*fn)(void *), const char *fmt, ...)
52 {
53         va_list args;
54         struct file *file = NULL;
55         int ret, fd = -1;
56         struct printbuf name = PRINTBUF;
57         unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
58
59         va_start(args, fmt);
60         prt_vprintf(&name, fmt, args);
61         va_end(args);
62
63         thr->ret = 0;
64         thr->task = kthread_create(fn, thr, name.buf);
65         ret = PTR_ERR_OR_ZERO(thr->task);
66         if (ret)
67                 goto err;
68
69         ret = get_unused_fd_flags(fd_flags);
70         if (ret < 0)
71                 goto err_stop_task;
72         fd = ret;
73
74         file = anon_inode_getfile(name.buf, fops, thr, fd_flags);
75         ret = PTR_ERR_OR_ZERO(file);
76         if (ret)
77                 goto err_put_fd;
78
79         fd_install(fd, file);
80         get_task_struct(thr->task);
81         wake_up_process(thr->task);
82         printbuf_exit(&name);
83         return fd;
84 err_put_fd:
85         put_unused_fd(fd);
86 err_stop_task:
87         kthread_stop(thr->task);
88 err:
89         printbuf_exit(&name);
90         return ret;
91 }
92
93 /* returns with ref on ca->ref */
94 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
95                                           unsigned flags)
96 {
97         struct bch_dev *ca;
98
99         if (flags & BCH_BY_INDEX) {
100                 if (dev >= c->sb.nr_devices)
101                         return ERR_PTR(-EINVAL);
102
103                 rcu_read_lock();
104                 ca = rcu_dereference(c->devs[dev]);
105                 if (ca)
106                         percpu_ref_get(&ca->ref);
107                 rcu_read_unlock();
108
109                 if (!ca)
110                         return ERR_PTR(-EINVAL);
111         } else {
112                 char *path;
113
114                 path = strndup_user((const char __user *)
115                                     (unsigned long) dev, PATH_MAX);
116                 if (IS_ERR(path))
117                         return ERR_CAST(path);
118
119                 ca = bch2_dev_lookup(c, path);
120                 kfree(path);
121         }
122
123         return ca;
124 }
125
126 #if 0
127 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
128 {
129         struct bch_ioctl_assemble arg;
130         struct bch_fs *c;
131         u64 *user_devs = NULL;
132         char **devs = NULL;
133         unsigned i;
134         int ret = -EFAULT;
135
136         if (copy_from_user(&arg, user_arg, sizeof(arg)))
137                 return -EFAULT;
138
139         if (arg.flags || arg.pad)
140                 return -EINVAL;
141
142         user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
143         if (!user_devs)
144                 return -ENOMEM;
145
146         devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
147
148         if (copy_from_user(user_devs, user_arg->devs,
149                            sizeof(u64) * arg.nr_devs))
150                 goto err;
151
152         for (i = 0; i < arg.nr_devs; i++) {
153                 devs[i] = strndup_user((const char __user *)(unsigned long)
154                                        user_devs[i],
155                                        PATH_MAX);
156                 ret= PTR_ERR_OR_ZERO(devs[i]);
157                 if (ret)
158                         goto err;
159         }
160
161         c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
162         ret = PTR_ERR_OR_ZERO(c);
163         if (!ret)
164                 closure_put(&c->cl);
165 err:
166         if (devs)
167                 for (i = 0; i < arg.nr_devs; i++)
168                         kfree(devs[i]);
169         kfree(devs);
170         return ret;
171 }
172
173 static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
174 {
175         struct bch_ioctl_incremental arg;
176         const char *err;
177         char *path;
178
179         if (copy_from_user(&arg, user_arg, sizeof(arg)))
180                 return -EFAULT;
181
182         if (arg.flags || arg.pad)
183                 return -EINVAL;
184
185         path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
186         ret = PTR_ERR_OR_ZERO(path);
187         if (ret)
188                 return ret;
189
190         err = bch2_fs_open_incremental(path);
191         kfree(path);
192
193         if (err) {
194                 pr_err("Could not register bcachefs devices: %s", err);
195                 return -EINVAL;
196         }
197
198         return 0;
199 }
200 #endif
201
202 struct fsck_thread {
203         struct thread_with_file thr;
204         struct printbuf         buf;
205         struct bch_fs           *c;
206         char                    **devs;
207         size_t                  nr_devs;
208         struct bch_opts         opts;
209
210         struct log_output       output;
211         DARRAY(char)            output2;
212 };
213
214 static void bch2_fsck_thread_free(struct fsck_thread *thr)
215 {
216         thread_with_file_exit(&thr->thr);
217         if (thr->devs)
218                 for (size_t i = 0; i < thr->nr_devs; i++)
219                         kfree(thr->devs[i]);
220         darray_exit(&thr->output2);
221         printbuf_exit(&thr->output.buf);
222         kfree(thr->devs);
223         kfree(thr);
224 }
225
226 static int bch2_fsck_thread_release(struct inode *inode, struct file *file)
227 {
228         struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
229
230         bch2_fsck_thread_free(thr);
231         return 0;
232 }
233
234 static bool fsck_thread_ready(struct fsck_thread *thr)
235 {
236         return thr->output.buf.pos ||
237                 thr->output2.nr ||
238                 thr->thr.done;
239 }
240
241 static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf,
242                                      size_t len, loff_t *ppos)
243 {
244         struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
245         size_t copied = 0, b;
246         int ret = 0;
247
248         if ((file->f_flags & O_NONBLOCK) &&
249             !fsck_thread_ready(thr))
250                 return -EAGAIN;
251
252         ret = wait_event_interruptible(thr->output.wait,
253                         fsck_thread_ready(thr));
254         if (ret)
255                 return ret;
256
257         if (thr->thr.done)
258                 return 0;
259
260         while (len) {
261                 ret = darray_make_room(&thr->output2, thr->output.buf.pos);
262                 if (ret)
263                         break;
264
265                 spin_lock_irq(&thr->output.lock);
266                 b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos);
267
268                 memcpy(&darray_top(thr->output2), thr->output.buf.buf, b);
269                 memmove(thr->output.buf.buf,
270                         thr->output.buf.buf + b,
271                         thr->output.buf.pos - b);
272
273                 thr->output2.nr += b;
274                 thr->output.buf.pos -= b;
275                 spin_unlock_irq(&thr->output.lock);
276
277                 b = min(len, thr->output2.nr);
278                 if (!b)
279                         break;
280
281                 b -= copy_to_user(buf, thr->output2.data, b);
282                 if (!b) {
283                         ret = -EFAULT;
284                         break;
285                 }
286
287                 copied  += b;
288                 buf     += b;
289                 len     -= b;
290
291                 memmove(thr->output2.data,
292                         thr->output2.data + b,
293                         thr->output2.nr - b);
294                 thr->output2.nr -= b;
295         }
296
297         return copied ?: ret;
298 }
299
300 static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait)
301 {
302         struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
303
304         poll_wait(file, &thr->output.wait, wait);
305
306         return fsck_thread_ready(thr)
307                 ? EPOLLIN|EPOLLHUP
308                 : 0;
309 }
310
311 static const struct file_operations fsck_thread_ops = {
312         .release        = bch2_fsck_thread_release,
313         .read           = bch2_fsck_thread_read,
314         .poll           = bch2_fsck_thread_poll,
315         .llseek         = no_llseek,
316 };
317
318 static int bch2_fsck_offline_thread_fn(void *arg)
319 {
320         struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
321         struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
322
323         thr->thr.ret = PTR_ERR_OR_ZERO(c);
324         if (!thr->thr.ret)
325                 bch2_fs_stop(c);
326
327         thr->thr.done = true;
328         wake_up(&thr->output.wait);
329         return 0;
330 }
331
332 static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
333 {
334         struct bch_ioctl_fsck_offline arg;
335         struct fsck_thread *thr = NULL;
336         u64 *devs = NULL;
337         long ret = 0;
338
339         if (copy_from_user(&arg, user_arg, sizeof(arg)))
340                 return -EFAULT;
341
342         if (arg.flags)
343                 return -EINVAL;
344
345         if (!capable(CAP_SYS_ADMIN))
346                 return -EPERM;
347
348         if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
349             !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
350             !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
351                 ret = -ENOMEM;
352                 goto err;
353         }
354
355         thr->opts = bch2_opts_empty();
356         thr->nr_devs = arg.nr_devs;
357         thr->output.buf = PRINTBUF;
358         thr->output.buf.atomic++;
359         spin_lock_init(&thr->output.lock);
360         init_waitqueue_head(&thr->output.wait);
361         darray_init(&thr->output2);
362
363         if (copy_from_user(devs, &user_arg->devs[0],
364                            array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
365                 ret = -EINVAL;
366                 goto err;
367         }
368
369         for (size_t i = 0; i < arg.nr_devs; i++) {
370                 thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
371                 ret = PTR_ERR_OR_ZERO(thr->devs[i]);
372                 if (ret)
373                         goto err;
374         }
375
376         if (arg.opts) {
377                 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
378
379                 ret =   PTR_ERR_OR_ZERO(optstr) ?:
380                         bch2_parse_mount_opts(NULL, &thr->opts, optstr);
381                 kfree(optstr);
382
383                 if (ret)
384                         goto err;
385         }
386
387         opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output);
388
389         ret = run_thread_with_file(&thr->thr,
390                                    &fsck_thread_ops,
391                                    bch2_fsck_offline_thread_fn,
392                                    "bch-fsck");
393 err:
394         if (ret < 0) {
395                 if (thr)
396                         bch2_fsck_thread_free(thr);
397                 pr_err("ret %s", bch2_err_str(ret));
398         }
399         kfree(devs);
400         return ret;
401 }
402
403 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
404 {
405         long ret;
406
407         switch (cmd) {
408 #if 0
409         case BCH_IOCTL_ASSEMBLE:
410                 return bch2_ioctl_assemble(arg);
411         case BCH_IOCTL_INCREMENTAL:
412                 return bch2_ioctl_incremental(arg);
413 #endif
414         case BCH_IOCTL_FSCK_OFFLINE: {
415                 ret = bch2_ioctl_fsck_offline(arg);
416                 break;
417         }
418         default:
419                 ret = -ENOTTY;
420                 break;
421         }
422
423         if (ret < 0)
424                 ret = bch2_err_class(ret);
425         return ret;
426 }
427
428 static long bch2_ioctl_query_uuid(struct bch_fs *c,
429                         struct bch_ioctl_query_uuid __user *user_arg)
430 {
431         return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
432                                     sizeof(c->sb.user_uuid));
433 }
434
435 #if 0
436 static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
437 {
438         if (!capable(CAP_SYS_ADMIN))
439                 return -EPERM;
440
441         if (arg.flags || arg.pad)
442                 return -EINVAL;
443
444         return bch2_fs_start(c);
445 }
446
447 static long bch2_ioctl_stop(struct bch_fs *c)
448 {
449         if (!capable(CAP_SYS_ADMIN))
450                 return -EPERM;
451
452         bch2_fs_stop(c);
453         return 0;
454 }
455 #endif
456
457 static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
458 {
459         char *path;
460         int ret;
461
462         if (!capable(CAP_SYS_ADMIN))
463                 return -EPERM;
464
465         if (arg.flags || arg.pad)
466                 return -EINVAL;
467
468         path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
469         ret = PTR_ERR_OR_ZERO(path);
470         if (ret)
471                 return ret;
472
473         ret = bch2_dev_add(c, path);
474         kfree(path);
475
476         return ret;
477 }
478
479 static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
480 {
481         struct bch_dev *ca;
482
483         if (!capable(CAP_SYS_ADMIN))
484                 return -EPERM;
485
486         if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
487                            BCH_FORCE_IF_METADATA_LOST|
488                            BCH_FORCE_IF_DEGRADED|
489                            BCH_BY_INDEX)) ||
490             arg.pad)
491                 return -EINVAL;
492
493         ca = bch2_device_lookup(c, arg.dev, arg.flags);
494         if (IS_ERR(ca))
495                 return PTR_ERR(ca);
496
497         return bch2_dev_remove(c, ca, arg.flags);
498 }
499
500 static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
501 {
502         char *path;
503         int ret;
504
505         if (!capable(CAP_SYS_ADMIN))
506                 return -EPERM;
507
508         if (arg.flags || arg.pad)
509                 return -EINVAL;
510
511         path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
512         ret = PTR_ERR_OR_ZERO(path);
513         if (ret)
514                 return ret;
515
516         ret = bch2_dev_online(c, path);
517         kfree(path);
518         return ret;
519 }
520
521 static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
522 {
523         struct bch_dev *ca;
524         int ret;
525
526         if (!capable(CAP_SYS_ADMIN))
527                 return -EPERM;
528
529         if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
530                            BCH_FORCE_IF_METADATA_LOST|
531                            BCH_FORCE_IF_DEGRADED|
532                            BCH_BY_INDEX)) ||
533             arg.pad)
534                 return -EINVAL;
535
536         ca = bch2_device_lookup(c, arg.dev, arg.flags);
537         if (IS_ERR(ca))
538                 return PTR_ERR(ca);
539
540         ret = bch2_dev_offline(c, ca, arg.flags);
541         percpu_ref_put(&ca->ref);
542         return ret;
543 }
544
545 static long bch2_ioctl_disk_set_state(struct bch_fs *c,
546                         struct bch_ioctl_disk_set_state arg)
547 {
548         struct bch_dev *ca;
549         int ret;
550
551         if (!capable(CAP_SYS_ADMIN))
552                 return -EPERM;
553
554         if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
555                            BCH_FORCE_IF_METADATA_LOST|
556                            BCH_FORCE_IF_DEGRADED|
557                            BCH_BY_INDEX)) ||
558             arg.pad[0] || arg.pad[1] || arg.pad[2] ||
559             arg.new_state >= BCH_MEMBER_STATE_NR)
560                 return -EINVAL;
561
562         ca = bch2_device_lookup(c, arg.dev, arg.flags);
563         if (IS_ERR(ca))
564                 return PTR_ERR(ca);
565
566         ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
567         if (ret)
568                 bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
569
570         percpu_ref_put(&ca->ref);
571         return ret;
572 }
573
574 struct bch_data_ctx {
575         struct thread_with_file         thr;
576
577         struct bch_fs                   *c;
578         struct bch_ioctl_data           arg;
579         struct bch_move_stats           stats;
580 };
581
582 static int bch2_data_thread(void *arg)
583 {
584         struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
585
586         ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
587         ctx->stats.data_type = U8_MAX;
588         return 0;
589 }
590
591 static int bch2_data_job_release(struct inode *inode, struct file *file)
592 {
593         struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
594
595         thread_with_file_exit(&ctx->thr);
596         kfree(ctx);
597         return 0;
598 }
599
600 static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
601                                   size_t len, loff_t *ppos)
602 {
603         struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
604         struct bch_fs *c = ctx->c;
605         struct bch_ioctl_data_event e = {
606                 .type                   = BCH_DATA_EVENT_PROGRESS,
607                 .p.data_type            = ctx->stats.data_type,
608                 .p.btree_id             = ctx->stats.pos.btree,
609                 .p.pos                  = ctx->stats.pos.pos,
610                 .p.sectors_done         = atomic64_read(&ctx->stats.sectors_seen),
611                 .p.sectors_total        = bch2_fs_usage_read_short(c).used,
612         };
613
614         if (len < sizeof(e))
615                 return -EINVAL;
616
617         return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
618 }
619
620 static const struct file_operations bcachefs_data_ops = {
621         .release        = bch2_data_job_release,
622         .read           = bch2_data_job_read,
623         .llseek         = no_llseek,
624 };
625
626 static long bch2_ioctl_data(struct bch_fs *c,
627                             struct bch_ioctl_data arg)
628 {
629         struct bch_data_ctx *ctx;
630         int ret;
631
632         if (!capable(CAP_SYS_ADMIN))
633                 return -EPERM;
634
635         if (arg.op >= BCH_DATA_OP_NR || arg.flags)
636                 return -EINVAL;
637
638         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
639         if (!ctx)
640                 return -ENOMEM;
641
642         ctx->c = c;
643         ctx->arg = arg;
644
645         ret = run_thread_with_file(&ctx->thr,
646                                    &bcachefs_data_ops,
647                                    bch2_data_thread,
648                                    "bch-data/%s", c->name);
649         if (ret < 0)
650                 kfree(ctx);
651         return ret;
652 }
653
654 static long bch2_ioctl_fs_usage(struct bch_fs *c,
655                                 struct bch_ioctl_fs_usage __user *user_arg)
656 {
657         struct bch_ioctl_fs_usage *arg = NULL;
658         struct bch_replicas_usage *dst_e, *dst_end;
659         struct bch_fs_usage_online *src;
660         u32 replica_entries_bytes;
661         unsigned i;
662         int ret = 0;
663
664         if (!test_bit(BCH_FS_started, &c->flags))
665                 return -EINVAL;
666
667         if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
668                 return -EFAULT;
669
670         arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
671         if (!arg)
672                 return -ENOMEM;
673
674         src = bch2_fs_usage_read(c);
675         if (!src) {
676                 ret = -ENOMEM;
677                 goto err;
678         }
679
680         arg->capacity           = c->capacity;
681         arg->used               = bch2_fs_sectors_used(c, src);
682         arg->online_reserved    = src->online_reserved;
683
684         for (i = 0; i < BCH_REPLICAS_MAX; i++)
685                 arg->persistent_reserved[i] = src->u.persistent_reserved[i];
686
687         dst_e   = arg->replicas;
688         dst_end = (void *) arg->replicas + replica_entries_bytes;
689
690         for (i = 0; i < c->replicas.nr; i++) {
691                 struct bch_replicas_entry_v1 *src_e =
692                         cpu_replicas_entry(&c->replicas, i);
693
694                 /* check that we have enough space for one replicas entry */
695                 if (dst_e + 1 > dst_end) {
696                         ret = -ERANGE;
697                         break;
698                 }
699
700                 dst_e->sectors          = src->u.replicas[i];
701                 dst_e->r                = *src_e;
702
703                 /* recheck after setting nr_devs: */
704                 if (replicas_usage_next(dst_e) > dst_end) {
705                         ret = -ERANGE;
706                         break;
707                 }
708
709                 memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
710
711                 dst_e = replicas_usage_next(dst_e);
712         }
713
714         arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
715
716         percpu_up_read(&c->mark_lock);
717         kfree(src);
718
719         if (ret)
720                 goto err;
721
722         ret = copy_to_user_errcode(user_arg, arg,
723                         sizeof(*arg) + arg->replica_entries_bytes);
724 err:
725         kfree(arg);
726         return ret;
727 }
728
729 /* obsolete, didn't allow for new data types: */
730 static long bch2_ioctl_dev_usage(struct bch_fs *c,
731                                  struct bch_ioctl_dev_usage __user *user_arg)
732 {
733         struct bch_ioctl_dev_usage arg;
734         struct bch_dev_usage src;
735         struct bch_dev *ca;
736         unsigned i;
737
738         if (!test_bit(BCH_FS_started, &c->flags))
739                 return -EINVAL;
740
741         if (copy_from_user(&arg, user_arg, sizeof(arg)))
742                 return -EFAULT;
743
744         if ((arg.flags & ~BCH_BY_INDEX) ||
745             arg.pad[0] ||
746             arg.pad[1] ||
747             arg.pad[2])
748                 return -EINVAL;
749
750         ca = bch2_device_lookup(c, arg.dev, arg.flags);
751         if (IS_ERR(ca))
752                 return PTR_ERR(ca);
753
754         src = bch2_dev_usage_read(ca);
755
756         arg.state               = ca->mi.state;
757         arg.bucket_size         = ca->mi.bucket_size;
758         arg.nr_buckets          = ca->mi.nbuckets - ca->mi.first_bucket;
759
760         for (i = 0; i < BCH_DATA_NR; i++) {
761                 arg.d[i].buckets        = src.d[i].buckets;
762                 arg.d[i].sectors        = src.d[i].sectors;
763                 arg.d[i].fragmented     = src.d[i].fragmented;
764         }
765
766         percpu_ref_put(&ca->ref);
767
768         return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
769 }
770
771 static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
772                                  struct bch_ioctl_dev_usage_v2 __user *user_arg)
773 {
774         struct bch_ioctl_dev_usage_v2 arg;
775         struct bch_dev_usage src;
776         struct bch_dev *ca;
777         int ret = 0;
778
779         if (!test_bit(BCH_FS_started, &c->flags))
780                 return -EINVAL;
781
782         if (copy_from_user(&arg, user_arg, sizeof(arg)))
783                 return -EFAULT;
784
785         if ((arg.flags & ~BCH_BY_INDEX) ||
786             arg.pad[0] ||
787             arg.pad[1] ||
788             arg.pad[2])
789                 return -EINVAL;
790
791         ca = bch2_device_lookup(c, arg.dev, arg.flags);
792         if (IS_ERR(ca))
793                 return PTR_ERR(ca);
794
795         src = bch2_dev_usage_read(ca);
796
797         arg.state               = ca->mi.state;
798         arg.bucket_size         = ca->mi.bucket_size;
799         arg.nr_data_types       = min(arg.nr_data_types, BCH_DATA_NR);
800         arg.nr_buckets          = ca->mi.nbuckets - ca->mi.first_bucket;
801
802         ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
803         if (ret)
804                 goto err;
805
806         for (unsigned i = 0; i < arg.nr_data_types; i++) {
807                 struct bch_ioctl_dev_usage_type t = {
808                         .buckets        = src.d[i].buckets,
809                         .sectors        = src.d[i].sectors,
810                         .fragmented     = src.d[i].fragmented,
811                 };
812
813                 ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
814                 if (ret)
815                         goto err;
816         }
817 err:
818         percpu_ref_put(&ca->ref);
819         return ret;
820 }
821
822 static long bch2_ioctl_read_super(struct bch_fs *c,
823                                   struct bch_ioctl_read_super arg)
824 {
825         struct bch_dev *ca = NULL;
826         struct bch_sb *sb;
827         int ret = 0;
828
829         if (!capable(CAP_SYS_ADMIN))
830                 return -EPERM;
831
832         if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
833             arg.pad)
834                 return -EINVAL;
835
836         mutex_lock(&c->sb_lock);
837
838         if (arg.flags & BCH_READ_DEV) {
839                 ca = bch2_device_lookup(c, arg.dev, arg.flags);
840
841                 if (IS_ERR(ca)) {
842                         ret = PTR_ERR(ca);
843                         goto err;
844                 }
845
846                 sb = ca->disk_sb.sb;
847         } else {
848                 sb = c->disk_sb.sb;
849         }
850
851         if (vstruct_bytes(sb) > arg.size) {
852                 ret = -ERANGE;
853                 goto err;
854         }
855
856         ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
857                                    vstruct_bytes(sb));
858 err:
859         if (!IS_ERR_OR_NULL(ca))
860                 percpu_ref_put(&ca->ref);
861         mutex_unlock(&c->sb_lock);
862         return ret;
863 }
864
865 static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
866                                     struct bch_ioctl_disk_get_idx arg)
867 {
868         dev_t dev = huge_decode_dev(arg.dev);
869
870         if (!capable(CAP_SYS_ADMIN))
871                 return -EPERM;
872
873         if (!dev)
874                 return -EINVAL;
875
876         for_each_online_member(c, ca)
877                 if (ca->dev == dev) {
878                         percpu_ref_put(&ca->io_ref);
879                         return ca->dev_idx;
880                 }
881
882         return -BCH_ERR_ENOENT_dev_idx_not_found;
883 }
884
885 static long bch2_ioctl_disk_resize(struct bch_fs *c,
886                                    struct bch_ioctl_disk_resize arg)
887 {
888         struct bch_dev *ca;
889         int ret;
890
891         if (!capable(CAP_SYS_ADMIN))
892                 return -EPERM;
893
894         if ((arg.flags & ~BCH_BY_INDEX) ||
895             arg.pad)
896                 return -EINVAL;
897
898         ca = bch2_device_lookup(c, arg.dev, arg.flags);
899         if (IS_ERR(ca))
900                 return PTR_ERR(ca);
901
902         ret = bch2_dev_resize(c, ca, arg.nbuckets);
903
904         percpu_ref_put(&ca->ref);
905         return ret;
906 }
907
908 static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
909                                    struct bch_ioctl_disk_resize_journal arg)
910 {
911         struct bch_dev *ca;
912         int ret;
913
914         if (!capable(CAP_SYS_ADMIN))
915                 return -EPERM;
916
917         if ((arg.flags & ~BCH_BY_INDEX) ||
918             arg.pad)
919                 return -EINVAL;
920
921         if (arg.nbuckets > U32_MAX)
922                 return -EINVAL;
923
924         ca = bch2_device_lookup(c, arg.dev, arg.flags);
925         if (IS_ERR(ca))
926                 return PTR_ERR(ca);
927
928         ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
929
930         percpu_ref_put(&ca->ref);
931         return ret;
932 }
933
934 static int bch2_fsck_online_thread_fn(void *arg)
935 {
936         struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
937         struct bch_fs *c = thr->c;
938
939         c->output_filter = current;
940         c->output = &thr->output;
941
942         /*
943          * XXX: can we figure out a way to do this without mucking with c->opts?
944          */
945         if (opt_defined(thr->opts, fix_errors))
946                 c->opts.fix_errors = thr->opts.fix_errors;
947         c->opts.fsck = true;
948
949         c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
950         bch2_run_online_recovery_passes(c);
951
952         c->output = NULL;
953         c->output_filter = NULL;
954
955         thr->thr.done = true;
956         wake_up(&thr->output.wait);
957
958         up(&c->online_fsck_mutex);
959         bch2_ro_ref_put(c);
960         return 0;
961 }
962
963 static long bch2_ioctl_fsck_online(struct bch_fs *c,
964                                    struct bch_ioctl_fsck_online arg)
965 {
966         struct fsck_thread *thr = NULL;
967         long ret = 0;
968
969         if (arg.flags)
970                 return -EINVAL;
971
972         if (!capable(CAP_SYS_ADMIN))
973                 return -EPERM;
974
975         if (!bch2_ro_ref_tryget(c))
976                 return -EROFS;
977
978         if (down_trylock(&c->online_fsck_mutex)) {
979                 bch2_ro_ref_put(c);
980                 return -EAGAIN;
981         }
982
983         thr = kzalloc(sizeof(*thr), GFP_KERNEL);
984         if (!thr) {
985                 ret = -ENOMEM;
986                 goto err;
987         }
988
989         thr->c = c;
990         thr->opts = bch2_opts_empty();
991         thr->output.buf = PRINTBUF;
992         thr->output.buf.atomic++;
993         spin_lock_init(&thr->output.lock);
994         init_waitqueue_head(&thr->output.wait);
995         darray_init(&thr->output2);
996
997         if (arg.opts) {
998                 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
999
1000                 ret =   PTR_ERR_OR_ZERO(optstr) ?:
1001                         bch2_parse_mount_opts(c, &thr->opts, optstr);
1002                 kfree(optstr);
1003
1004                 if (ret)
1005                         goto err;
1006         }
1007
1008         ret = run_thread_with_file(&thr->thr,
1009                                    &fsck_thread_ops,
1010                                    bch2_fsck_online_thread_fn,
1011                                    "bch-fsck");
1012 err:
1013         if (ret < 0) {
1014                 bch_err_fn(c, ret);
1015                 if (thr)
1016                         bch2_fsck_thread_free(thr);
1017                 up(&c->online_fsck_mutex);
1018                 bch2_ro_ref_put(c);
1019         }
1020         return ret;
1021 }
1022
1023 #define BCH_IOCTL(_name, _argtype)                                      \
1024 do {                                                                    \
1025         _argtype i;                                                     \
1026                                                                         \
1027         if (copy_from_user(&i, arg, sizeof(i)))                         \
1028                 return -EFAULT;                                         \
1029         ret = bch2_ioctl_##_name(c, i);                                 \
1030         goto out;                                                       \
1031 } while (0)
1032
1033 long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
1034 {
1035         long ret;
1036
1037         switch (cmd) {
1038         case BCH_IOCTL_QUERY_UUID:
1039                 return bch2_ioctl_query_uuid(c, arg);
1040         case BCH_IOCTL_FS_USAGE:
1041                 return bch2_ioctl_fs_usage(c, arg);
1042         case BCH_IOCTL_DEV_USAGE:
1043                 return bch2_ioctl_dev_usage(c, arg);
1044         case BCH_IOCTL_DEV_USAGE_V2:
1045                 return bch2_ioctl_dev_usage_v2(c, arg);
1046 #if 0
1047         case BCH_IOCTL_START:
1048                 BCH_IOCTL(start, struct bch_ioctl_start);
1049         case BCH_IOCTL_STOP:
1050                 return bch2_ioctl_stop(c);
1051 #endif
1052         case BCH_IOCTL_READ_SUPER:
1053                 BCH_IOCTL(read_super, struct bch_ioctl_read_super);
1054         case BCH_IOCTL_DISK_GET_IDX:
1055                 BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
1056         }
1057
1058         if (!test_bit(BCH_FS_started, &c->flags))
1059                 return -EINVAL;
1060
1061         switch (cmd) {
1062         case BCH_IOCTL_DISK_ADD:
1063                 BCH_IOCTL(disk_add, struct bch_ioctl_disk);
1064         case BCH_IOCTL_DISK_REMOVE:
1065                 BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
1066         case BCH_IOCTL_DISK_ONLINE:
1067                 BCH_IOCTL(disk_online, struct bch_ioctl_disk);
1068         case BCH_IOCTL_DISK_OFFLINE:
1069                 BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
1070         case BCH_IOCTL_DISK_SET_STATE:
1071                 BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
1072         case BCH_IOCTL_DATA:
1073                 BCH_IOCTL(data, struct bch_ioctl_data);
1074         case BCH_IOCTL_DISK_RESIZE:
1075                 BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
1076         case BCH_IOCTL_DISK_RESIZE_JOURNAL:
1077                 BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
1078         case BCH_IOCTL_FSCK_ONLINE:
1079                 BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
1080         default:
1081                 return -ENOTTY;
1082         }
1083 out:
1084         if (ret < 0)
1085                 ret = bch2_err_class(ret);
1086         return ret;
1087 }
1088
1089 static DEFINE_IDR(bch_chardev_minor);
1090
1091 static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
1092 {
1093         unsigned minor = iminor(file_inode(filp));
1094         struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
1095         void __user *arg = (void __user *) v;
1096
1097         return c
1098                 ? bch2_fs_ioctl(c, cmd, arg)
1099                 : bch2_global_ioctl(cmd, arg);
1100 }
1101
1102 static const struct file_operations bch_chardev_fops = {
1103         .owner          = THIS_MODULE,
1104         .unlocked_ioctl = bch2_chardev_ioctl,
1105         .open           = nonseekable_open,
1106 };
1107
1108 static int bch_chardev_major;
1109 static struct class *bch_chardev_class;
1110 static struct device *bch_chardev;
1111
1112 void bch2_fs_chardev_exit(struct bch_fs *c)
1113 {
1114         if (!IS_ERR_OR_NULL(c->chardev))
1115                 device_unregister(c->chardev);
1116         if (c->minor >= 0)
1117                 idr_remove(&bch_chardev_minor, c->minor);
1118 }
1119
1120 int bch2_fs_chardev_init(struct bch_fs *c)
1121 {
1122         c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
1123         if (c->minor < 0)
1124                 return c->minor;
1125
1126         c->chardev = device_create(bch_chardev_class, NULL,
1127                                    MKDEV(bch_chardev_major, c->minor), c,
1128                                    "bcachefs%u-ctl", c->minor);
1129         if (IS_ERR(c->chardev))
1130                 return PTR_ERR(c->chardev);
1131
1132         return 0;
1133 }
1134
1135 void bch2_chardev_exit(void)
1136 {
1137         if (!IS_ERR_OR_NULL(bch_chardev_class))
1138                 device_destroy(bch_chardev_class,
1139                                MKDEV(bch_chardev_major, U8_MAX));
1140         if (!IS_ERR_OR_NULL(bch_chardev_class))
1141                 class_destroy(bch_chardev_class);
1142         if (bch_chardev_major > 0)
1143                 unregister_chrdev(bch_chardev_major, "bcachefs");
1144 }
1145
1146 int __init bch2_chardev_init(void)
1147 {
1148         bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
1149         if (bch_chardev_major < 0)
1150                 return bch_chardev_major;
1151
1152         bch_chardev_class = class_create("bcachefs");
1153         if (IS_ERR(bch_chardev_class))
1154                 return PTR_ERR(bch_chardev_class);
1155
1156         bch_chardev = device_create(bch_chardev_class, NULL,
1157                                     MKDEV(bch_chardev_major, U8_MAX),
1158                                     NULL, "bcachefs-ctl");
1159         if (IS_ERR(bch_chardev))
1160                 return PTR_ERR(bch_chardev);
1161
1162         return 0;
1163 }
1164
1165 #endif /* NO_BCACHEFS_CHARDEV */