]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/sysfs.c
5f2bc933b0e93918043fddf3f554f6b97a3bad9c
[bcachefs-tools-debian] / libbcachefs / sysfs.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * bcache sysfs interfaces
4  *
5  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6  * Copyright 2012 Google, Inc.
7  */
8
9 #ifndef NO_BCACHEFS_SYSFS
10
11 #include "bcachefs.h"
12 #include "alloc_background.h"
13 #include "sysfs.h"
14 #include "btree_cache.h"
15 #include "btree_io.h"
16 #include "btree_iter.h"
17 #include "btree_update.h"
18 #include "btree_update_interior.h"
19 #include "btree_gc.h"
20 #include "buckets.h"
21 #include "clock.h"
22 #include "disk_groups.h"
23 #include "ec.h"
24 #include "inode.h"
25 #include "journal.h"
26 #include "keylist.h"
27 #include "move.h"
28 #include "opts.h"
29 #include "rebalance.h"
30 #include "replicas.h"
31 #include "super-io.h"
32 #include "tests.h"
33
34 #include <linux/blkdev.h>
35 #include <linux/sort.h>
36 #include <linux/sched/clock.h>
37
38 #include "util.h"
39
40 #define SYSFS_OPS(type)                                                 \
41 struct sysfs_ops type ## _sysfs_ops = {                                 \
42         .show   = type ## _show,                                        \
43         .store  = type ## _store                                        \
44 }
45
46 #define SHOW(fn)                                                        \
47 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
48                            char *buf)                                   \
49
50 #define STORE(fn)                                                       \
51 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
52                             const char *buf, size_t size)               \
53
54 #define __sysfs_attribute(_name, _mode)                                 \
55         static struct attribute sysfs_##_name =                         \
56                 { .name = #_name, .mode = _mode }
57
58 #define write_attribute(n)      __sysfs_attribute(n, S_IWUSR)
59 #define read_attribute(n)       __sysfs_attribute(n, S_IRUGO)
60 #define rw_attribute(n)         __sysfs_attribute(n, S_IRUGO|S_IWUSR)
61
62 #define sysfs_printf(file, fmt, ...)                                    \
63 do {                                                                    \
64         if (attr == &sysfs_ ## file)                                    \
65                 return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
66 } while (0)
67
68 #define sysfs_print(file, var)                                          \
69 do {                                                                    \
70         if (attr == &sysfs_ ## file)                                    \
71                 return snprint(buf, PAGE_SIZE, var);                    \
72 } while (0)
73
74 #define sysfs_hprint(file, val)                                         \
75 do {                                                                    \
76         if (attr == &sysfs_ ## file) {                                  \
77                 struct printbuf out = _PBUF(buf, PAGE_SIZE);            \
78                 bch2_hprint(&out, val);                                 \
79                 pr_buf(&out, "\n");                                     \
80                 return out.pos - buf;                                   \
81         }                                                               \
82 } while (0)
83
84 #define var_printf(_var, fmt)   sysfs_printf(_var, fmt, var(_var))
85 #define var_print(_var)         sysfs_print(_var, var(_var))
86 #define var_hprint(_var)        sysfs_hprint(_var, var(_var))
87
88 #define sysfs_strtoul(file, var)                                        \
89 do {                                                                    \
90         if (attr == &sysfs_ ## file)                                    \
91                 return strtoul_safe(buf, var) ?: (ssize_t) size;        \
92 } while (0)
93
94 #define sysfs_strtoul_clamp(file, var, min, max)                        \
95 do {                                                                    \
96         if (attr == &sysfs_ ## file)                                    \
97                 return strtoul_safe_clamp(buf, var, min, max)           \
98                         ?: (ssize_t) size;                              \
99 } while (0)
100
101 #define strtoul_or_return(cp)                                           \
102 ({                                                                      \
103         unsigned long _v;                                               \
104         int _r = kstrtoul(cp, 10, &_v);                                 \
105         if (_r)                                                         \
106                 return _r;                                              \
107         _v;                                                             \
108 })
109
110 #define strtoul_restrict_or_return(cp, min, max)                        \
111 ({                                                                      \
112         unsigned long __v = 0;                                          \
113         int _r = strtoul_safe_restrict(cp, __v, min, max);              \
114         if (_r)                                                         \
115                 return _r;                                              \
116         __v;                                                            \
117 })
118
119 #define strtoi_h_or_return(cp)                                          \
120 ({                                                                      \
121         u64 _v;                                                         \
122         int _r = strtoi_h(cp, &_v);                                     \
123         if (_r)                                                         \
124                 return _r;                                              \
125         _v;                                                             \
126 })
127
128 #define sysfs_hatoi(file, var)                                          \
129 do {                                                                    \
130         if (attr == &sysfs_ ## file)                                    \
131                 return strtoi_h(buf, &var) ?: (ssize_t) size;           \
132 } while (0)
133
134 write_attribute(trigger_journal_flush);
135 write_attribute(trigger_btree_coalesce);
136 write_attribute(trigger_gc);
137 write_attribute(trigger_alloc_write);
138 write_attribute(prune_cache);
139 rw_attribute(btree_gc_periodic);
140
141 read_attribute(uuid);
142 read_attribute(minor);
143 read_attribute(bucket_size);
144 read_attribute(block_size);
145 read_attribute(btree_node_size);
146 read_attribute(first_bucket);
147 read_attribute(nbuckets);
148 read_attribute(durability);
149 read_attribute(iodone);
150
151 read_attribute(io_latency_read);
152 read_attribute(io_latency_write);
153 read_attribute(io_latency_stats_read);
154 read_attribute(io_latency_stats_write);
155 read_attribute(congested);
156
157 read_attribute(bucket_quantiles_last_read);
158 read_attribute(bucket_quantiles_last_write);
159 read_attribute(bucket_quantiles_fragmentation);
160 read_attribute(bucket_quantiles_oldest_gen);
161
162 read_attribute(reserve_stats);
163 read_attribute(btree_cache_size);
164 read_attribute(compression_stats);
165 read_attribute(journal_debug);
166 read_attribute(journal_pins);
167 read_attribute(btree_updates);
168 read_attribute(dirty_btree_nodes);
169 read_attribute(btree_transactions);
170
171 read_attribute(internal_uuid);
172
173 read_attribute(has_data);
174 read_attribute(alloc_debug);
175 write_attribute(wake_allocator);
176
177 read_attribute(read_realloc_races);
178 read_attribute(extent_migrate_done);
179 read_attribute(extent_migrate_raced);
180
181 rw_attribute(journal_write_delay_ms);
182 rw_attribute(journal_reclaim_delay_ms);
183
184 rw_attribute(discard);
185 rw_attribute(cache_replacement_policy);
186 rw_attribute(label);
187
188 rw_attribute(copy_gc_enabled);
189 sysfs_pd_controller_attribute(copy_gc);
190
191 rw_attribute(rebalance_enabled);
192 sysfs_pd_controller_attribute(rebalance);
193 read_attribute(rebalance_work);
194 rw_attribute(promote_whole_extents);
195
196 read_attribute(new_stripes);
197
198 rw_attribute(pd_controllers_update_seconds);
199
200 read_attribute(meta_replicas_have);
201 read_attribute(data_replicas_have);
202
203 read_attribute(io_timers_read);
204 read_attribute(io_timers_write);
205
206 #ifdef CONFIG_BCACHEFS_TESTS
207 write_attribute(perf_test);
208 #endif /* CONFIG_BCACHEFS_TESTS */
209
210 #define BCH_DEBUG_PARAM(name, description)                              \
211         rw_attribute(name);
212
213         BCH_DEBUG_PARAMS()
214 #undef BCH_DEBUG_PARAM
215
216 #define x(_name)                                                \
217         static struct attribute sysfs_time_stat_##_name =               \
218                 { .name = #_name, .mode = S_IRUGO };
219         BCH_TIME_STATS()
220 #undef x
221
222 static struct attribute sysfs_state_rw = {
223         .name = "state",
224         .mode = S_IRUGO
225 };
226
227 static size_t bch2_btree_cache_size(struct bch_fs *c)
228 {
229         size_t ret = 0;
230         struct btree *b;
231
232         mutex_lock(&c->btree_cache.lock);
233         list_for_each_entry(b, &c->btree_cache.live, list)
234                 ret += btree_bytes(c);
235
236         mutex_unlock(&c->btree_cache.lock);
237         return ret;
238 }
239
240 static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
241 {
242         struct printbuf out = _PBUF(buf, PAGE_SIZE);
243         struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
244
245         if (!fs_usage)
246                 return -ENOMEM;
247
248         bch2_fs_usage_to_text(&out, c, fs_usage);
249
250         percpu_up_read(&c->mark_lock);
251
252         kfree(fs_usage);
253
254         return out.pos - buf;
255 }
256
257 static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
258 {
259         struct btree_trans trans;
260         struct btree_iter *iter;
261         struct bkey_s_c k;
262         u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
263             nr_compressed_extents = 0,
264             compressed_sectors_compressed = 0,
265             compressed_sectors_uncompressed = 0;
266         int ret;
267
268         if (!test_bit(BCH_FS_STARTED, &c->flags))
269                 return -EPERM;
270
271         bch2_trans_init(&trans, c, 0, 0);
272
273         for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
274                 if (k.k->type == KEY_TYPE_extent) {
275                         struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
276                         const union bch_extent_entry *entry;
277                         struct extent_ptr_decoded p;
278
279                         extent_for_each_ptr_decode(e, p, entry) {
280                                 if (!crc_is_compressed(p.crc)) {
281                                         nr_uncompressed_extents++;
282                                         uncompressed_sectors += e.k->size;
283                                 } else {
284                                         nr_compressed_extents++;
285                                         compressed_sectors_compressed +=
286                                                 p.crc.compressed_size;
287                                         compressed_sectors_uncompressed +=
288                                                 p.crc.uncompressed_size;
289                                 }
290
291                                 /* only looking at the first ptr */
292                                 break;
293                         }
294                 }
295
296         ret = bch2_trans_exit(&trans) ?: ret;
297         if (ret)
298                 return ret;
299
300         return scnprintf(buf, PAGE_SIZE,
301                         "uncompressed data:\n"
302                         "       nr extents:                     %llu\n"
303                         "       size (bytes):                   %llu\n"
304                         "compressed data:\n"
305                         "       nr extents:                     %llu\n"
306                         "       compressed size (bytes):        %llu\n"
307                         "       uncompressed size (bytes):      %llu\n",
308                         nr_uncompressed_extents,
309                         uncompressed_sectors << 9,
310                         nr_compressed_extents,
311                         compressed_sectors_compressed << 9,
312                         compressed_sectors_uncompressed << 9);
313 }
314
315 static ssize_t bch2_new_stripes(struct bch_fs *c, char *buf)
316 {
317         char *out = buf, *end = buf + PAGE_SIZE;
318         struct ec_stripe_head *h;
319         struct ec_stripe_new *s;
320
321         mutex_lock(&c->ec_new_stripe_lock);
322         list_for_each_entry(h, &c->ec_new_stripe_list, list) {
323                 out += scnprintf(out, end - out,
324                                  "target %u algo %u redundancy %u:\n",
325                                  h->target, h->algo, h->redundancy);
326
327                 if (h->s)
328                         out += scnprintf(out, end - out,
329                                          "\tpending: blocks %u allocated %u\n",
330                                          h->s->blocks.nr,
331                                          bitmap_weight(h->s->blocks_allocated,
332                                                        h->s->blocks.nr));
333
334                 mutex_lock(&h->lock);
335                 list_for_each_entry(s, &h->stripes, list)
336                         out += scnprintf(out, end - out,
337                                          "\tin flight: blocks %u allocated %u pin %u\n",
338                                          s->blocks.nr,
339                                          bitmap_weight(s->blocks_allocated,
340                                                        s->blocks.nr),
341                                          atomic_read(&s->pin));
342                 mutex_unlock(&h->lock);
343
344         }
345         mutex_unlock(&c->ec_new_stripe_lock);
346
347         return out - buf;
348 }
349
350 SHOW(bch2_fs)
351 {
352         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
353
354         sysfs_print(minor,                      c->minor);
355         sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
356
357         sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
358         sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
359
360         sysfs_print(block_size,                 block_bytes(c));
361         sysfs_print(btree_node_size,            btree_bytes(c));
362         sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
363
364         sysfs_print(read_realloc_races,
365                     atomic_long_read(&c->read_realloc_races));
366         sysfs_print(extent_migrate_done,
367                     atomic_long_read(&c->extent_migrate_done));
368         sysfs_print(extent_migrate_raced,
369                     atomic_long_read(&c->extent_migrate_raced));
370
371         sysfs_printf(btree_gc_periodic, "%u",   (int) c->btree_gc_periodic);
372
373         sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
374
375         sysfs_print(pd_controllers_update_seconds,
376                     c->pd_controllers_update_seconds);
377
378         sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
379         sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
380
381         if (attr == &sysfs_rebalance_work)
382                 return bch2_rebalance_work_show(c, buf);
383
384         sysfs_print(promote_whole_extents,      c->promote_whole_extents);
385
386         sysfs_printf(meta_replicas_have, "%i",  bch2_replicas_online(c, true));
387         sysfs_printf(data_replicas_have, "%i",  bch2_replicas_online(c, false));
388
389         /* Debugging: */
390
391         if (attr == &sysfs_alloc_debug)
392                 return show_fs_alloc_debug(c, buf);
393
394         if (attr == &sysfs_journal_debug)
395                 return bch2_journal_print_debug(&c->journal, buf);
396
397         if (attr == &sysfs_journal_pins)
398                 return bch2_journal_print_pins(&c->journal, buf);
399
400         if (attr == &sysfs_btree_updates)
401                 return bch2_btree_updates_print(c, buf);
402
403         if (attr == &sysfs_dirty_btree_nodes)
404                 return bch2_dirty_btree_nodes_print(c, buf);
405         if (attr == &sysfs_btree_transactions) {
406                 struct printbuf out = _PBUF(buf, PAGE_SIZE);
407
408                 bch2_btree_trans_to_text(&out, c);
409                 return out.pos - buf;
410         }
411
412         if (attr == &sysfs_compression_stats)
413                 return bch2_compression_stats(c, buf);
414
415         if (attr == &sysfs_new_stripes)
416                 return bch2_new_stripes(c, buf);
417
418         if (attr == &sysfs_io_timers_read)
419                 return bch2_io_timers_show(&c->io_clock[READ], buf);
420         if (attr == &sysfs_io_timers_write)
421                 return bch2_io_timers_show(&c->io_clock[WRITE], buf);
422
423 #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
424         BCH_DEBUG_PARAMS()
425 #undef BCH_DEBUG_PARAM
426
427         return 0;
428 }
429
430 STORE(__bch2_fs)
431 {
432         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
433
434         sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
435         sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
436
437         if (attr == &sysfs_btree_gc_periodic) {
438                 ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
439                         ?: (ssize_t) size;
440
441                 wake_up_process(c->gc_thread);
442                 return ret;
443         }
444
445         if (attr == &sysfs_copy_gc_enabled) {
446                 struct bch_dev *ca;
447                 unsigned i;
448                 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
449                         ?: (ssize_t) size;
450
451                 for_each_member_device(ca, c, i)
452                         if (ca->copygc_thread)
453                                 wake_up_process(ca->copygc_thread);
454                 return ret;
455         }
456
457         if (attr == &sysfs_rebalance_enabled) {
458                 ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
459                         ?: (ssize_t) size;
460
461                 rebalance_wakeup(c);
462                 return ret;
463         }
464
465         sysfs_strtoul(pd_controllers_update_seconds,
466                       c->pd_controllers_update_seconds);
467         sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
468
469         sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
470
471         /* Debugging: */
472
473 #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
474         BCH_DEBUG_PARAMS()
475 #undef BCH_DEBUG_PARAM
476
477         if (!test_bit(BCH_FS_STARTED, &c->flags))
478                 return -EPERM;
479
480         /* Debugging: */
481
482         if (attr == &sysfs_trigger_journal_flush)
483                 bch2_journal_meta_async(&c->journal, NULL);
484
485         if (attr == &sysfs_trigger_btree_coalesce)
486                 bch2_coalesce(c);
487
488         if (attr == &sysfs_trigger_gc)
489                 bch2_gc(c, NULL, false, false);
490
491         if (attr == &sysfs_trigger_alloc_write) {
492                 bool wrote;
493
494                 bch2_alloc_write(c, 0, &wrote);
495         }
496
497         if (attr == &sysfs_prune_cache) {
498                 struct shrink_control sc;
499
500                 sc.gfp_mask = GFP_KERNEL;
501                 sc.nr_to_scan = strtoul_or_return(buf);
502                 c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
503         }
504 #ifdef CONFIG_BCACHEFS_TESTS
505         if (attr == &sysfs_perf_test) {
506                 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
507                 char *test              = strsep(&p, " \t\n");
508                 char *nr_str            = strsep(&p, " \t\n");
509                 char *threads_str       = strsep(&p, " \t\n");
510                 unsigned threads;
511                 u64 nr;
512                 int ret = -EINVAL;
513
514                 if (threads_str &&
515                     !(ret = kstrtouint(threads_str, 10, &threads)) &&
516                     !(ret = bch2_strtoull_h(nr_str, &nr)))
517                         bch2_btree_perf_test(c, test, nr, threads);
518                 else
519                         size = ret;
520                 kfree(tmp);
521         }
522 #endif
523         return size;
524 }
525
526 STORE(bch2_fs)
527 {
528         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
529
530         mutex_lock(&c->state_lock);
531         size = __bch2_fs_store(kobj, attr, buf, size);
532         mutex_unlock(&c->state_lock);
533
534         return size;
535 }
536 SYSFS_OPS(bch2_fs);
537
538 struct attribute *bch2_fs_files[] = {
539         &sysfs_minor,
540         &sysfs_block_size,
541         &sysfs_btree_node_size,
542         &sysfs_btree_cache_size,
543
544         &sysfs_meta_replicas_have,
545         &sysfs_data_replicas_have,
546
547         &sysfs_journal_write_delay_ms,
548         &sysfs_journal_reclaim_delay_ms,
549
550         &sysfs_promote_whole_extents,
551
552         &sysfs_compression_stats,
553
554 #ifdef CONFIG_BCACHEFS_TESTS
555         &sysfs_perf_test,
556 #endif
557         NULL
558 };
559
560 /* internal dir - just a wrapper */
561
562 SHOW(bch2_fs_internal)
563 {
564         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
565         return bch2_fs_show(&c->kobj, attr, buf);
566 }
567
568 STORE(bch2_fs_internal)
569 {
570         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
571         return bch2_fs_store(&c->kobj, attr, buf, size);
572 }
573 SYSFS_OPS(bch2_fs_internal);
574
575 struct attribute *bch2_fs_internal_files[] = {
576         &sysfs_alloc_debug,
577         &sysfs_journal_debug,
578         &sysfs_journal_pins,
579         &sysfs_btree_updates,
580         &sysfs_dirty_btree_nodes,
581         &sysfs_btree_transactions,
582
583         &sysfs_read_realloc_races,
584         &sysfs_extent_migrate_done,
585         &sysfs_extent_migrate_raced,
586
587         &sysfs_trigger_journal_flush,
588         &sysfs_trigger_btree_coalesce,
589         &sysfs_trigger_gc,
590         &sysfs_trigger_alloc_write,
591         &sysfs_prune_cache,
592
593         &sysfs_copy_gc_enabled,
594
595         &sysfs_rebalance_enabled,
596         &sysfs_rebalance_work,
597         sysfs_pd_controller_files(rebalance),
598
599         &sysfs_new_stripes,
600
601         &sysfs_io_timers_read,
602         &sysfs_io_timers_write,
603
604         &sysfs_internal_uuid,
605
606 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
607         BCH_DEBUG_PARAMS()
608 #undef BCH_DEBUG_PARAM
609
610         NULL
611 };
612
613 /* options */
614
615 SHOW(bch2_fs_opts_dir)
616 {
617         struct printbuf out = _PBUF(buf, PAGE_SIZE);
618         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
619         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
620         int id = opt - bch2_opt_table;
621         u64 v = bch2_opt_get_by_id(&c->opts, id);
622
623         bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
624         pr_buf(&out, "\n");
625
626         return out.pos - buf;
627 }
628
629 STORE(bch2_fs_opts_dir)
630 {
631         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
632         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
633         int ret, id = opt - bch2_opt_table;
634         char *tmp;
635         u64 v;
636
637         tmp = kstrdup(buf, GFP_KERNEL);
638         if (!tmp)
639                 return -ENOMEM;
640
641         ret = bch2_opt_parse(c, opt, strim(tmp), &v);
642         kfree(tmp);
643
644         if (ret < 0)
645                 return ret;
646
647         ret = bch2_opt_check_may_set(c, id, v);
648         if (ret < 0)
649                 return ret;
650
651         if (opt->set_sb != SET_NO_SB_OPT) {
652                 mutex_lock(&c->sb_lock);
653                 opt->set_sb(c->disk_sb.sb, v);
654                 bch2_write_super(c);
655                 mutex_unlock(&c->sb_lock);
656         }
657
658         bch2_opt_set_by_id(&c->opts, id, v);
659
660         if ((id == Opt_background_target ||
661              id == Opt_background_compression) && v) {
662                 bch2_rebalance_add_work(c, S64_MAX);
663                 rebalance_wakeup(c);
664         }
665
666         return size;
667 }
668 SYSFS_OPS(bch2_fs_opts_dir);
669
670 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
671
672 int bch2_opts_create_sysfs_files(struct kobject *kobj)
673 {
674         const struct bch_option *i;
675         int ret;
676
677         for (i = bch2_opt_table;
678              i < bch2_opt_table + bch2_opts_nr;
679              i++) {
680                 if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME)))
681                         continue;
682
683                 ret = sysfs_create_file(kobj, &i->attr);
684                 if (ret)
685                         return ret;
686         }
687
688         return 0;
689 }
690
691 /* time stats */
692
693 SHOW(bch2_fs_time_stats)
694 {
695         struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
696
697 #define x(name)                                         \
698         if (attr == &sysfs_time_stat_##name)                            \
699                 return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
700                                              buf, PAGE_SIZE);
701         BCH_TIME_STATS()
702 #undef x
703
704         return 0;
705 }
706
707 STORE(bch2_fs_time_stats)
708 {
709         return size;
710 }
711 SYSFS_OPS(bch2_fs_time_stats);
712
713 struct attribute *bch2_fs_time_stats_files[] = {
714 #define x(name)                                         \
715         &sysfs_time_stat_##name,
716         BCH_TIME_STATS()
717 #undef x
718         NULL
719 };
720
721 typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
722                                  size_t, void *);
723
724 static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
725                                   size_t b, void *private)
726 {
727         int rw = (private ? 1 : 0);
728
729         return bucket_last_io(c, bucket(ca, b), rw);
730 }
731
732 static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
733                                        size_t b, void *private)
734 {
735         struct bucket *g = bucket(ca, b);
736         return bucket_sectors_used(g->mark);
737 }
738
739 static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
740                                      size_t b, void *private)
741 {
742         return bucket_gc_gen(ca, b);
743 }
744
745 static int unsigned_cmp(const void *_l, const void *_r)
746 {
747         const unsigned *l = _l;
748         const unsigned *r = _r;
749
750         return cmp_int(*l, *r);
751 }
752
753 static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
754                               char *buf, bucket_map_fn *fn, void *private)
755 {
756         size_t i, n;
757         /* Compute 31 quantiles */
758         unsigned q[31], *p;
759         ssize_t ret = 0;
760
761         down_read(&ca->bucket_lock);
762         n = ca->mi.nbuckets;
763
764         p = vzalloc(n * sizeof(unsigned));
765         if (!p) {
766                 up_read(&ca->bucket_lock);
767                 return -ENOMEM;
768         }
769
770         for (i = ca->mi.first_bucket; i < n; i++)
771                 p[i] = fn(c, ca, i, private);
772
773         sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
774         up_read(&ca->bucket_lock);
775
776         while (n &&
777                !p[n - 1])
778                 --n;
779
780         for (i = 0; i < ARRAY_SIZE(q); i++)
781                 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
782
783         vfree(p);
784
785         for (i = 0; i < ARRAY_SIZE(q); i++)
786                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
787                                  "%u ", q[i]);
788         buf[ret - 1] = '\n';
789
790         return ret;
791 }
792
793 static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
794 {
795         struct printbuf out = _PBUF(buf, PAGE_SIZE);
796         enum alloc_reserve i;
797
798         spin_lock(&ca->fs->freelist_lock);
799
800         pr_buf(&out, "free_inc:\t%zu\t%zu\n",
801                fifo_used(&ca->free_inc),
802                ca->free_inc.size);
803
804         for (i = 0; i < RESERVE_NR; i++)
805                 pr_buf(&out, "free[%u]:\t%zu\t%zu\n", i,
806                        fifo_used(&ca->free[i]),
807                        ca->free[i].size);
808
809         spin_unlock(&ca->fs->freelist_lock);
810
811         return out.pos - buf;
812 }
813
814 static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
815 {
816         struct bch_fs *c = ca->fs;
817         struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
818         unsigned i, nr[BCH_DATA_NR];
819
820         memset(nr, 0, sizeof(nr));
821
822         for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
823                 nr[c->open_buckets[i].type]++;
824
825         return scnprintf(buf, PAGE_SIZE,
826                 "free_inc:               %zu/%zu\n"
827                 "free[RESERVE_BTREE]:    %zu/%zu\n"
828                 "free[RESERVE_MOVINGGC]: %zu/%zu\n"
829                 "free[RESERVE_NONE]:     %zu/%zu\n"
830                 "buckets:\n"
831                 "    capacity:           %llu\n"
832                 "    alloc:              %llu\n"
833                 "    sb:                 %llu\n"
834                 "    journal:            %llu\n"
835                 "    meta:               %llu\n"
836                 "    user:               %llu\n"
837                 "    cached:             %llu\n"
838                 "    erasure coded:      %llu\n"
839                 "    available:          %lli\n"
840                 "sectors:\n"
841                 "    sb:                 %llu\n"
842                 "    journal:            %llu\n"
843                 "    meta:               %llu\n"
844                 "    user:               %llu\n"
845                 "    cached:             %llu\n"
846                 "    fragmented:         %llu\n"
847                 "    copygc threshold:   %llu\n"
848                 "freelist_wait:          %s\n"
849                 "open buckets:           %u/%u (reserved %u)\n"
850                 "open_buckets_wait:      %s\n"
851                 "open_buckets_btree:     %u\n"
852                 "open_buckets_user:      %u\n"
853                 "btree reserve cache:    %u\n",
854                 fifo_used(&ca->free_inc),               ca->free_inc.size,
855                 fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
856                 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
857                 fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
858                 ca->mi.nbuckets - ca->mi.first_bucket,
859                 stats.buckets_alloc,
860                 stats.buckets[BCH_DATA_SB],
861                 stats.buckets[BCH_DATA_JOURNAL],
862                 stats.buckets[BCH_DATA_BTREE],
863                 stats.buckets[BCH_DATA_USER],
864                 stats.buckets[BCH_DATA_CACHED],
865                 stats.buckets_ec,
866                 ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
867                 stats.sectors[BCH_DATA_SB],
868                 stats.sectors[BCH_DATA_JOURNAL],
869                 stats.sectors[BCH_DATA_BTREE],
870                 stats.sectors[BCH_DATA_USER],
871                 stats.sectors[BCH_DATA_CACHED],
872                 stats.sectors_fragmented,
873                 ca->copygc_threshold,
874                 c->freelist_wait.list.first             ? "waiting" : "empty",
875                 c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
876                 BTREE_NODE_OPEN_BUCKET_RESERVE,
877                 c->open_buckets_wait.list.first         ? "waiting" : "empty",
878                 nr[BCH_DATA_BTREE],
879                 nr[BCH_DATA_USER],
880                 c->btree_reserve_cache_nr);
881 }
882
883 static const char * const bch2_rw[] = {
884         "read",
885         "write",
886         NULL
887 };
888
889 static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
890 {
891         struct printbuf out = _PBUF(buf, PAGE_SIZE);
892         int rw, i;
893
894         for (rw = 0; rw < 2; rw++) {
895                 pr_buf(&out, "%s:\n", bch2_rw[rw]);
896
897                 for (i = 1; i < BCH_DATA_NR; i++)
898                         pr_buf(&out, "%-12s:%12llu\n",
899                                bch2_data_types[i],
900                                percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
901         }
902
903         return out.pos - buf;
904 }
905
906 SHOW(bch2_dev)
907 {
908         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
909         struct bch_fs *c = ca->fs;
910         struct printbuf out = _PBUF(buf, PAGE_SIZE);
911
912         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
913
914         sysfs_print(bucket_size,        bucket_bytes(ca));
915         sysfs_print(block_size,         block_bytes(c));
916         sysfs_print(first_bucket,       ca->mi.first_bucket);
917         sysfs_print(nbuckets,           ca->mi.nbuckets);
918         sysfs_print(durability,         ca->mi.durability);
919         sysfs_print(discard,            ca->mi.discard);
920
921         if (attr == &sysfs_label) {
922                 if (ca->mi.group) {
923                         mutex_lock(&c->sb_lock);
924                         bch2_disk_path_to_text(&out, &c->disk_sb,
925                                                ca->mi.group - 1);
926                         mutex_unlock(&c->sb_lock);
927                 }
928
929                 pr_buf(&out, "\n");
930                 return out.pos - buf;
931         }
932
933         if (attr == &sysfs_has_data) {
934                 bch2_flags_to_text(&out, bch2_data_types,
935                                    bch2_dev_has_data(c, ca));
936                 pr_buf(&out, "\n");
937                 return out.pos - buf;
938         }
939
940         sysfs_pd_controller_show(copy_gc, &ca->copygc_pd);
941
942         if (attr == &sysfs_cache_replacement_policy) {
943                 bch2_string_opt_to_text(&out,
944                                         bch2_cache_replacement_policies,
945                                         ca->mi.replacement);
946                 pr_buf(&out, "\n");
947                 return out.pos - buf;
948         }
949
950         if (attr == &sysfs_state_rw) {
951                 bch2_string_opt_to_text(&out, bch2_dev_state,
952                                         ca->mi.state);
953                 pr_buf(&out, "\n");
954                 return out.pos - buf;
955         }
956
957         if (attr == &sysfs_iodone)
958                 return show_dev_iodone(ca, buf);
959
960         sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
961         sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
962
963         if (attr == &sysfs_io_latency_stats_read)
964                 return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
965         if (attr == &sysfs_io_latency_stats_write)
966                 return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
967
968         sysfs_printf(congested,                 "%u%%",
969                      clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
970                      * 100 / CONGESTED_MAX);
971
972         if (attr == &sysfs_bucket_quantiles_last_read)
973                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
974         if (attr == &sysfs_bucket_quantiles_last_write)
975                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
976         if (attr == &sysfs_bucket_quantiles_fragmentation)
977                 return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
978         if (attr == &sysfs_bucket_quantiles_oldest_gen)
979                 return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
980
981         if (attr == &sysfs_reserve_stats)
982                 return show_reserve_stats(ca, buf);
983         if (attr == &sysfs_alloc_debug)
984                 return show_dev_alloc_debug(ca, buf);
985
986         return 0;
987 }
988
989 STORE(bch2_dev)
990 {
991         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
992         struct bch_fs *c = ca->fs;
993         struct bch_member *mi;
994
995         sysfs_pd_controller_store(copy_gc, &ca->copygc_pd);
996
997         if (attr == &sysfs_discard) {
998                 bool v = strtoul_or_return(buf);
999
1000                 mutex_lock(&c->sb_lock);
1001                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
1002
1003                 if (v != BCH_MEMBER_DISCARD(mi)) {
1004                         SET_BCH_MEMBER_DISCARD(mi, v);
1005                         bch2_write_super(c);
1006                 }
1007                 mutex_unlock(&c->sb_lock);
1008         }
1009
1010         if (attr == &sysfs_cache_replacement_policy) {
1011                 ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
1012
1013                 if (v < 0)
1014                         return v;
1015
1016                 mutex_lock(&c->sb_lock);
1017                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
1018
1019                 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
1020                         SET_BCH_MEMBER_REPLACEMENT(mi, v);
1021                         bch2_write_super(c);
1022                 }
1023                 mutex_unlock(&c->sb_lock);
1024         }
1025
1026         if (attr == &sysfs_label) {
1027                 char *tmp;
1028                 int ret;
1029
1030                 tmp = kstrdup(buf, GFP_KERNEL);
1031                 if (!tmp)
1032                         return -ENOMEM;
1033
1034                 ret = bch2_dev_group_set(c, ca, strim(tmp));
1035                 kfree(tmp);
1036                 if (ret)
1037                         return ret;
1038         }
1039
1040         if (attr == &sysfs_wake_allocator)
1041                 bch2_wake_allocator(ca);
1042
1043         return size;
1044 }
1045 SYSFS_OPS(bch2_dev);
1046
1047 struct attribute *bch2_dev_files[] = {
1048         &sysfs_uuid,
1049         &sysfs_bucket_size,
1050         &sysfs_block_size,
1051         &sysfs_first_bucket,
1052         &sysfs_nbuckets,
1053         &sysfs_durability,
1054
1055         /* settings: */
1056         &sysfs_discard,
1057         &sysfs_cache_replacement_policy,
1058         &sysfs_state_rw,
1059         &sysfs_label,
1060
1061         &sysfs_has_data,
1062         &sysfs_iodone,
1063
1064         &sysfs_io_latency_read,
1065         &sysfs_io_latency_write,
1066         &sysfs_io_latency_stats_read,
1067         &sysfs_io_latency_stats_write,
1068         &sysfs_congested,
1069
1070         /* alloc info - other stats: */
1071         &sysfs_bucket_quantiles_last_read,
1072         &sysfs_bucket_quantiles_last_write,
1073         &sysfs_bucket_quantiles_fragmentation,
1074         &sysfs_bucket_quantiles_oldest_gen,
1075
1076         &sysfs_reserve_stats,
1077
1078         /* debug: */
1079         &sysfs_alloc_debug,
1080         &sysfs_wake_allocator,
1081
1082         sysfs_pd_controller_files(copy_gc),
1083         NULL
1084 };
1085
1086 #endif  /* _BCACHEFS_SYSFS_H_ */