]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/sysfs.c
Update bcachefs sources to 5e73602f6c bcachefs: Fix for fsck hanging
[bcachefs-tools-debian] / libbcachefs / sysfs.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * bcache sysfs interfaces
4  *
5  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6  * Copyright 2012 Google, Inc.
7  */
8
9 #ifndef NO_BCACHEFS_SYSFS
10
11 #include "bcachefs.h"
12 #include "alloc_background.h"
13 #include "sysfs.h"
14 #include "btree_cache.h"
15 #include "btree_io.h"
16 #include "btree_iter.h"
17 #include "btree_key_cache.h"
18 #include "btree_update.h"
19 #include "btree_update_interior.h"
20 #include "btree_gc.h"
21 #include "buckets.h"
22 #include "clock.h"
23 #include "disk_groups.h"
24 #include "ec.h"
25 #include "inode.h"
26 #include "journal.h"
27 #include "keylist.h"
28 #include "move.h"
29 #include "opts.h"
30 #include "rebalance.h"
31 #include "replicas.h"
32 #include "super-io.h"
33 #include "tests.h"
34
35 #include <linux/blkdev.h>
36 #include <linux/sort.h>
37 #include <linux/sched/clock.h>
38
39 #include "util.h"
40
41 #define SYSFS_OPS(type)                                                 \
42 struct sysfs_ops type ## _sysfs_ops = {                                 \
43         .show   = type ## _show,                                        \
44         .store  = type ## _store                                        \
45 }
46
47 #define SHOW(fn)                                                        \
48 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
49                            char *buf)                                   \
50
51 #define STORE(fn)                                                       \
52 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
53                             const char *buf, size_t size)               \
54
55 #define __sysfs_attribute(_name, _mode)                                 \
56         static struct attribute sysfs_##_name =                         \
57                 { .name = #_name, .mode = _mode }
58
59 #define write_attribute(n)      __sysfs_attribute(n, S_IWUSR)
60 #define read_attribute(n)       __sysfs_attribute(n, S_IRUGO)
61 #define rw_attribute(n)         __sysfs_attribute(n, S_IRUGO|S_IWUSR)
62
63 #define sysfs_printf(file, fmt, ...)                                    \
64 do {                                                                    \
65         if (attr == &sysfs_ ## file)                                    \
66                 return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
67 } while (0)
68
69 #define sysfs_print(file, var)                                          \
70 do {                                                                    \
71         if (attr == &sysfs_ ## file)                                    \
72                 return snprint(buf, PAGE_SIZE, var);                    \
73 } while (0)
74
75 #define sysfs_hprint(file, val)                                         \
76 do {                                                                    \
77         if (attr == &sysfs_ ## file) {                                  \
78                 bch2_hprint(&out, val);                                 \
79                 pr_buf(&out, "\n");                                     \
80                 return out.pos - buf;                                   \
81         }                                                               \
82 } while (0)
83
84 #define var_printf(_var, fmt)   sysfs_printf(_var, fmt, var(_var))
85 #define var_print(_var)         sysfs_print(_var, var(_var))
86 #define var_hprint(_var)        sysfs_hprint(_var, var(_var))
87
88 #define sysfs_strtoul(file, var)                                        \
89 do {                                                                    \
90         if (attr == &sysfs_ ## file)                                    \
91                 return strtoul_safe(buf, var) ?: (ssize_t) size;        \
92 } while (0)
93
94 #define sysfs_strtoul_clamp(file, var, min, max)                        \
95 do {                                                                    \
96         if (attr == &sysfs_ ## file)                                    \
97                 return strtoul_safe_clamp(buf, var, min, max)           \
98                         ?: (ssize_t) size;                              \
99 } while (0)
100
101 #define strtoul_or_return(cp)                                           \
102 ({                                                                      \
103         unsigned long _v;                                               \
104         int _r = kstrtoul(cp, 10, &_v);                                 \
105         if (_r)                                                         \
106                 return _r;                                              \
107         _v;                                                             \
108 })
109
110 #define strtoul_restrict_or_return(cp, min, max)                        \
111 ({                                                                      \
112         unsigned long __v = 0;                                          \
113         int _r = strtoul_safe_restrict(cp, __v, min, max);              \
114         if (_r)                                                         \
115                 return _r;                                              \
116         __v;                                                            \
117 })
118
119 #define strtoi_h_or_return(cp)                                          \
120 ({                                                                      \
121         u64 _v;                                                         \
122         int _r = strtoi_h(cp, &_v);                                     \
123         if (_r)                                                         \
124                 return _r;                                              \
125         _v;                                                             \
126 })
127
128 #define sysfs_hatoi(file, var)                                          \
129 do {                                                                    \
130         if (attr == &sysfs_ ## file)                                    \
131                 return strtoi_h(buf, &var) ?: (ssize_t) size;           \
132 } while (0)
133
134 write_attribute(trigger_journal_flush);
135 write_attribute(trigger_btree_coalesce);
136 write_attribute(trigger_gc);
137 write_attribute(prune_cache);
138 rw_attribute(btree_gc_periodic);
139
140 read_attribute(uuid);
141 read_attribute(minor);
142 read_attribute(bucket_size);
143 read_attribute(block_size);
144 read_attribute(btree_node_size);
145 read_attribute(first_bucket);
146 read_attribute(nbuckets);
147 read_attribute(durability);
148 read_attribute(iodone);
149
150 read_attribute(io_latency_read);
151 read_attribute(io_latency_write);
152 read_attribute(io_latency_stats_read);
153 read_attribute(io_latency_stats_write);
154 read_attribute(congested);
155
156 read_attribute(bucket_quantiles_last_read);
157 read_attribute(bucket_quantiles_last_write);
158 read_attribute(bucket_quantiles_fragmentation);
159 read_attribute(bucket_quantiles_oldest_gen);
160
161 read_attribute(reserve_stats);
162 read_attribute(btree_cache_size);
163 read_attribute(compression_stats);
164 read_attribute(journal_debug);
165 read_attribute(journal_pins);
166 read_attribute(btree_updates);
167 read_attribute(dirty_btree_nodes);
168 read_attribute(btree_cache);
169 read_attribute(btree_key_cache);
170 read_attribute(btree_transactions);
171 read_attribute(stripes_heap);
172
173 read_attribute(internal_uuid);
174
175 read_attribute(has_data);
176 read_attribute(alloc_debug);
177 write_attribute(wake_allocator);
178
179 read_attribute(read_realloc_races);
180 read_attribute(extent_migrate_done);
181 read_attribute(extent_migrate_raced);
182
183 rw_attribute(journal_write_delay_ms);
184 rw_attribute(journal_reclaim_delay_ms);
185
186 rw_attribute(discard);
187 rw_attribute(cache_replacement_policy);
188 rw_attribute(label);
189
190 rw_attribute(copy_gc_enabled);
191 sysfs_pd_controller_attribute(copy_gc);
192
193 rw_attribute(rebalance_enabled);
194 sysfs_pd_controller_attribute(rebalance);
195 read_attribute(rebalance_work);
196 rw_attribute(promote_whole_extents);
197
198 read_attribute(new_stripes);
199
200 rw_attribute(pd_controllers_update_seconds);
201
202 read_attribute(io_timers_read);
203 read_attribute(io_timers_write);
204
205 #ifdef CONFIG_BCACHEFS_TESTS
206 write_attribute(perf_test);
207 #endif /* CONFIG_BCACHEFS_TESTS */
208
209 #define x(_name)                                                \
210         static struct attribute sysfs_time_stat_##_name =               \
211                 { .name = #_name, .mode = S_IRUGO };
212         BCH_TIME_STATS()
213 #undef x
214
215 static struct attribute sysfs_state_rw = {
216         .name = "state",
217         .mode = S_IRUGO
218 };
219
220 static size_t bch2_btree_cache_size(struct bch_fs *c)
221 {
222         size_t ret = 0;
223         struct btree *b;
224
225         mutex_lock(&c->btree_cache.lock);
226         list_for_each_entry(b, &c->btree_cache.live, list)
227                 ret += btree_bytes(c);
228
229         mutex_unlock(&c->btree_cache.lock);
230         return ret;
231 }
232
233 static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
234 {
235         struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
236
237         if (!fs_usage)
238                 return -ENOMEM;
239
240         bch2_fs_usage_to_text(out, c, fs_usage);
241
242         percpu_up_read(&c->mark_lock);
243
244         kfree(fs_usage);
245         return 0;
246 }
247
248 static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
249 {
250         struct btree_trans trans;
251         struct btree_iter *iter;
252         struct bkey_s_c k;
253         u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
254             nr_compressed_extents = 0,
255             compressed_sectors_compressed = 0,
256             compressed_sectors_uncompressed = 0;
257         int ret;
258
259         if (!test_bit(BCH_FS_STARTED, &c->flags))
260                 return -EPERM;
261
262         bch2_trans_init(&trans, c, 0, 0);
263
264         for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
265                 if (k.k->type == KEY_TYPE_extent) {
266                         struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
267                         const union bch_extent_entry *entry;
268                         struct extent_ptr_decoded p;
269
270                         extent_for_each_ptr_decode(e, p, entry) {
271                                 if (!crc_is_compressed(p.crc)) {
272                                         nr_uncompressed_extents++;
273                                         uncompressed_sectors += e.k->size;
274                                 } else {
275                                         nr_compressed_extents++;
276                                         compressed_sectors_compressed +=
277                                                 p.crc.compressed_size;
278                                         compressed_sectors_uncompressed +=
279                                                 p.crc.uncompressed_size;
280                                 }
281
282                                 /* only looking at the first ptr */
283                                 break;
284                         }
285                 }
286
287         ret = bch2_trans_exit(&trans) ?: ret;
288         if (ret)
289                 return ret;
290
291         pr_buf(out,
292                "uncompressed data:\n"
293                "        nr extents:                     %llu\n"
294                "        size (bytes):                   %llu\n"
295                "compressed data:\n"
296                "        nr extents:                     %llu\n"
297                "        compressed size (bytes):        %llu\n"
298                "        uncompressed size (bytes):      %llu\n",
299                nr_uncompressed_extents,
300                uncompressed_sectors << 9,
301                nr_compressed_extents,
302                compressed_sectors_compressed << 9,
303                compressed_sectors_uncompressed << 9);
304         return 0;
305 }
306
307 SHOW(bch2_fs)
308 {
309         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
310         struct printbuf out = _PBUF(buf, PAGE_SIZE);
311
312         sysfs_print(minor,                      c->minor);
313         sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
314
315         sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
316         sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
317
318         sysfs_print(block_size,                 block_bytes(c));
319         sysfs_print(btree_node_size,            btree_bytes(c));
320         sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
321
322         sysfs_print(read_realloc_races,
323                     atomic_long_read(&c->read_realloc_races));
324         sysfs_print(extent_migrate_done,
325                     atomic_long_read(&c->extent_migrate_done));
326         sysfs_print(extent_migrate_raced,
327                     atomic_long_read(&c->extent_migrate_raced));
328
329         sysfs_printf(btree_gc_periodic, "%u",   (int) c->btree_gc_periodic);
330
331         sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
332
333         sysfs_print(pd_controllers_update_seconds,
334                     c->pd_controllers_update_seconds);
335
336         sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
337         sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
338         sysfs_pd_controller_show(copy_gc,       &c->copygc_pd);
339
340         if (attr == &sysfs_rebalance_work) {
341                 bch2_rebalance_work_to_text(&out, c);
342                 return out.pos - buf;
343         }
344
345         sysfs_print(promote_whole_extents,      c->promote_whole_extents);
346
347         /* Debugging: */
348
349         if (attr == &sysfs_alloc_debug)
350                 return fs_alloc_debug_to_text(&out, c) ?: out.pos - buf;
351
352         if (attr == &sysfs_journal_debug) {
353                 bch2_journal_debug_to_text(&out, &c->journal);
354                 return out.pos - buf;
355         }
356
357         if (attr == &sysfs_journal_pins) {
358                 bch2_journal_pins_to_text(&out, &c->journal);
359                 return out.pos - buf;
360         }
361
362         if (attr == &sysfs_btree_updates) {
363                 bch2_btree_updates_to_text(&out, c);
364                 return out.pos - buf;
365         }
366
367         if (attr == &sysfs_dirty_btree_nodes) {
368                 bch2_dirty_btree_nodes_to_text(&out, c);
369                 return out.pos - buf;
370         }
371
372         if (attr == &sysfs_btree_cache) {
373                 bch2_btree_cache_to_text(&out, c);
374                 return out.pos - buf;
375         }
376
377         if (attr == &sysfs_btree_key_cache) {
378                 bch2_btree_key_cache_to_text(&out, &c->btree_key_cache);
379                 return out.pos - buf;
380         }
381
382         if (attr == &sysfs_btree_transactions) {
383                 bch2_btree_trans_to_text(&out, c);
384                 return out.pos - buf;
385         }
386
387         if (attr == &sysfs_stripes_heap) {
388                 bch2_stripes_heap_to_text(&out, c);
389                 return out.pos - buf;
390         }
391
392         if (attr == &sysfs_compression_stats) {
393                 bch2_compression_stats_to_text(&out, c);
394                 return out.pos - buf;
395         }
396
397         if (attr == &sysfs_new_stripes) {
398                 bch2_new_stripes_to_text(&out, c);
399                 return out.pos - buf;
400         }
401
402         if (attr == &sysfs_io_timers_read) {
403                 bch2_io_timers_to_text(&out, &c->io_clock[READ]);
404                 return out.pos - buf;
405         }
406         if (attr == &sysfs_io_timers_write) {
407                 bch2_io_timers_to_text(&out, &c->io_clock[WRITE]);
408                 return out.pos - buf;
409         }
410
411         return 0;
412 }
413
414 STORE(bch2_fs)
415 {
416         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
417
418         sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
419         sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
420
421         if (attr == &sysfs_btree_gc_periodic) {
422                 ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
423                         ?: (ssize_t) size;
424
425                 wake_up_process(c->gc_thread);
426                 return ret;
427         }
428
429         if (attr == &sysfs_copy_gc_enabled) {
430                 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
431                         ?: (ssize_t) size;
432
433                 if (c->copygc_thread)
434                         wake_up_process(c->copygc_thread);
435                 return ret;
436         }
437
438         if (attr == &sysfs_rebalance_enabled) {
439                 ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
440                         ?: (ssize_t) size;
441
442                 rebalance_wakeup(c);
443                 return ret;
444         }
445
446         sysfs_strtoul(pd_controllers_update_seconds,
447                       c->pd_controllers_update_seconds);
448         sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
449         sysfs_pd_controller_store(copy_gc,      &c->copygc_pd);
450
451         sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
452
453         /* Debugging: */
454
455         if (!test_bit(BCH_FS_STARTED, &c->flags))
456                 return -EPERM;
457
458         /* Debugging: */
459
460         if (attr == &sysfs_trigger_journal_flush)
461                 bch2_journal_meta(&c->journal);
462
463         if (attr == &sysfs_trigger_btree_coalesce)
464                 bch2_coalesce(c);
465
466         if (attr == &sysfs_trigger_gc) {
467                 /*
468                  * Full gc is currently incompatible with btree key cache:
469                  */
470 #if 0
471                 down_read(&c->state_lock);
472                 bch2_gc(c, false, false);
473                 up_read(&c->state_lock);
474 #else
475                 bch2_gc_gens(c);
476 #endif
477         }
478
479         if (attr == &sysfs_prune_cache) {
480                 struct shrink_control sc;
481
482                 sc.gfp_mask = GFP_KERNEL;
483                 sc.nr_to_scan = strtoul_or_return(buf);
484                 c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
485         }
486
487 #ifdef CONFIG_BCACHEFS_TESTS
488         if (attr == &sysfs_perf_test) {
489                 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
490                 char *test              = strsep(&p, " \t\n");
491                 char *nr_str            = strsep(&p, " \t\n");
492                 char *threads_str       = strsep(&p, " \t\n");
493                 unsigned threads;
494                 u64 nr;
495                 int ret = -EINVAL;
496
497                 if (threads_str &&
498                     !(ret = kstrtouint(threads_str, 10, &threads)) &&
499                     !(ret = bch2_strtoull_h(nr_str, &nr)))
500                         ret = bch2_btree_perf_test(c, test, nr, threads);
501                 kfree(tmp);
502
503                 if (ret)
504                         size = ret;
505         }
506 #endif
507         return size;
508 }
509 SYSFS_OPS(bch2_fs);
510
511 struct attribute *bch2_fs_files[] = {
512         &sysfs_minor,
513         &sysfs_block_size,
514         &sysfs_btree_node_size,
515         &sysfs_btree_cache_size,
516
517         &sysfs_journal_write_delay_ms,
518         &sysfs_journal_reclaim_delay_ms,
519
520         &sysfs_promote_whole_extents,
521
522         &sysfs_compression_stats,
523
524 #ifdef CONFIG_BCACHEFS_TESTS
525         &sysfs_perf_test,
526 #endif
527         NULL
528 };
529
530 /* internal dir - just a wrapper */
531
532 SHOW(bch2_fs_internal)
533 {
534         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
535         return bch2_fs_show(&c->kobj, attr, buf);
536 }
537
538 STORE(bch2_fs_internal)
539 {
540         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
541         return bch2_fs_store(&c->kobj, attr, buf, size);
542 }
543 SYSFS_OPS(bch2_fs_internal);
544
545 struct attribute *bch2_fs_internal_files[] = {
546         &sysfs_alloc_debug,
547         &sysfs_journal_debug,
548         &sysfs_journal_pins,
549         &sysfs_btree_updates,
550         &sysfs_dirty_btree_nodes,
551         &sysfs_btree_cache,
552         &sysfs_btree_key_cache,
553         &sysfs_btree_transactions,
554         &sysfs_stripes_heap,
555
556         &sysfs_read_realloc_races,
557         &sysfs_extent_migrate_done,
558         &sysfs_extent_migrate_raced,
559
560         &sysfs_trigger_journal_flush,
561         &sysfs_trigger_btree_coalesce,
562         &sysfs_trigger_gc,
563         &sysfs_prune_cache,
564
565         &sysfs_copy_gc_enabled,
566
567         &sysfs_rebalance_enabled,
568         &sysfs_rebalance_work,
569         sysfs_pd_controller_files(rebalance),
570         sysfs_pd_controller_files(copy_gc),
571
572         &sysfs_new_stripes,
573
574         &sysfs_io_timers_read,
575         &sysfs_io_timers_write,
576
577         &sysfs_internal_uuid,
578         NULL
579 };
580
581 /* options */
582
583 SHOW(bch2_fs_opts_dir)
584 {
585         struct printbuf out = _PBUF(buf, PAGE_SIZE);
586         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
587         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
588         int id = opt - bch2_opt_table;
589         u64 v = bch2_opt_get_by_id(&c->opts, id);
590
591         bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
592         pr_buf(&out, "\n");
593
594         return out.pos - buf;
595 }
596
597 STORE(bch2_fs_opts_dir)
598 {
599         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
600         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
601         int ret, id = opt - bch2_opt_table;
602         char *tmp;
603         u64 v;
604
605         tmp = kstrdup(buf, GFP_KERNEL);
606         if (!tmp)
607                 return -ENOMEM;
608
609         ret = bch2_opt_parse(c, opt, strim(tmp), &v);
610         kfree(tmp);
611
612         if (ret < 0)
613                 return ret;
614
615         ret = bch2_opt_check_may_set(c, id, v);
616         if (ret < 0)
617                 return ret;
618
619         if (opt->set_sb != SET_NO_SB_OPT) {
620                 mutex_lock(&c->sb_lock);
621                 opt->set_sb(c->disk_sb.sb, v);
622                 bch2_write_super(c);
623                 mutex_unlock(&c->sb_lock);
624         }
625
626         bch2_opt_set_by_id(&c->opts, id, v);
627
628         if ((id == Opt_background_target ||
629              id == Opt_background_compression) && v) {
630                 bch2_rebalance_add_work(c, S64_MAX);
631                 rebalance_wakeup(c);
632         }
633
634         return size;
635 }
636 SYSFS_OPS(bch2_fs_opts_dir);
637
638 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
639
640 int bch2_opts_create_sysfs_files(struct kobject *kobj)
641 {
642         const struct bch_option *i;
643         int ret;
644
645         for (i = bch2_opt_table;
646              i < bch2_opt_table + bch2_opts_nr;
647              i++) {
648                 if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME)))
649                         continue;
650
651                 ret = sysfs_create_file(kobj, &i->attr);
652                 if (ret)
653                         return ret;
654         }
655
656         return 0;
657 }
658
659 /* time stats */
660
661 SHOW(bch2_fs_time_stats)
662 {
663         struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
664         struct printbuf out = _PBUF(buf, PAGE_SIZE);
665
666 #define x(name)                                                         \
667         if (attr == &sysfs_time_stat_##name) {                          \
668                 bch2_time_stats_to_text(&out, &c->times[BCH_TIME_##name]);\
669                 return out.pos - buf;                                   \
670         }
671         BCH_TIME_STATS()
672 #undef x
673
674         return 0;
675 }
676
677 STORE(bch2_fs_time_stats)
678 {
679         return size;
680 }
681 SYSFS_OPS(bch2_fs_time_stats);
682
683 struct attribute *bch2_fs_time_stats_files[] = {
684 #define x(name)                                         \
685         &sysfs_time_stat_##name,
686         BCH_TIME_STATS()
687 #undef x
688         NULL
689 };
690
691 typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
692                                  size_t, void *);
693
694 static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
695                                   size_t b, void *private)
696 {
697         int rw = (private ? 1 : 0);
698
699         return atomic64_read(&c->io_clock[rw].now) - bucket(ca, b)->io_time[rw];
700 }
701
702 static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
703                                        size_t b, void *private)
704 {
705         struct bucket *g = bucket(ca, b);
706         return bucket_sectors_used(g->mark);
707 }
708
709 static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
710                                      size_t b, void *private)
711 {
712         return bucket_gc_gen(bucket(ca, b));
713 }
714
715 static int unsigned_cmp(const void *_l, const void *_r)
716 {
717         const unsigned *l = _l;
718         const unsigned *r = _r;
719
720         return cmp_int(*l, *r);
721 }
722
723 static int quantiles_to_text(struct printbuf *out,
724                              struct bch_fs *c, struct bch_dev *ca,
725                              bucket_map_fn *fn, void *private)
726 {
727         size_t i, n;
728         /* Compute 31 quantiles */
729         unsigned q[31], *p;
730
731         down_read(&ca->bucket_lock);
732         n = ca->mi.nbuckets;
733
734         p = vzalloc(n * sizeof(unsigned));
735         if (!p) {
736                 up_read(&ca->bucket_lock);
737                 return -ENOMEM;
738         }
739
740         for (i = ca->mi.first_bucket; i < n; i++)
741                 p[i] = fn(c, ca, i, private);
742
743         sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
744         up_read(&ca->bucket_lock);
745
746         while (n &&
747                !p[n - 1])
748                 --n;
749
750         for (i = 0; i < ARRAY_SIZE(q); i++)
751                 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
752
753         vfree(p);
754
755         for (i = 0; i < ARRAY_SIZE(q); i++)
756                 pr_buf(out, "%u ", q[i]);
757         pr_buf(out, "\n");
758         return 0;
759 }
760
761 static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca)
762 {
763         enum alloc_reserve i;
764
765         spin_lock(&ca->fs->freelist_lock);
766
767         pr_buf(out, "free_inc:\t%zu\t%zu\n",
768                fifo_used(&ca->free_inc),
769                ca->free_inc.size);
770
771         for (i = 0; i < RESERVE_NR; i++)
772                 pr_buf(out, "free[%u]:\t%zu\t%zu\n", i,
773                        fifo_used(&ca->free[i]),
774                        ca->free[i].size);
775
776         spin_unlock(&ca->fs->freelist_lock);
777 }
778
779 static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
780 {
781         struct bch_fs *c = ca->fs;
782         struct bch_dev_usage stats = bch2_dev_usage_read(ca);
783         unsigned i, nr[BCH_DATA_NR];
784
785         memset(nr, 0, sizeof(nr));
786
787         for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
788                 nr[c->open_buckets[i].type]++;
789
790         pr_buf(out,
791                "\t\t buckets\t sectors      fragmented\n"
792                "capacity%16llu\n",
793                ca->mi.nbuckets - ca->mi.first_bucket);
794
795         for (i = 1; i < BCH_DATA_NR; i++)
796                 pr_buf(out, "%-8s%16llu%16llu%16llu\n",
797                        bch2_data_types[i], stats.d[i].buckets,
798                        stats.d[i].sectors, stats.d[i].fragmented);
799
800         pr_buf(out,
801                "ec\t%16llu\n"
802                "available%15llu\n"
803                "alloc\t%16llu\n"
804                "\n"
805                "free_inc\t\t%zu/%zu\n"
806                "free[RESERVE_MOVINGGC]\t%zu/%zu\n"
807                "free[RESERVE_NONE]\t%zu/%zu\n"
808                "freelist_wait\t\t%s\n"
809                "open buckets\t\t%u/%u (reserved %u)\n"
810                "open_buckets_wait\t%s\n"
811                "open_buckets_btree\t%u\n"
812                "open_buckets_user\t%u\n"
813                "btree reserve cache\t%u\n",
814                stats.buckets_ec,
815                __dev_buckets_available(ca, stats),
816                stats.buckets_alloc,
817                fifo_used(&ca->free_inc),                ca->free_inc.size,
818                fifo_used(&ca->free[RESERVE_MOVINGGC]),  ca->free[RESERVE_MOVINGGC].size,
819                fifo_used(&ca->free[RESERVE_NONE]),      ca->free[RESERVE_NONE].size,
820                c->freelist_wait.list.first              ? "waiting" : "empty",
821                c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
822                BTREE_NODE_OPEN_BUCKET_RESERVE,
823                c->open_buckets_wait.list.first          ? "waiting" : "empty",
824                nr[BCH_DATA_btree],
825                nr[BCH_DATA_user],
826                c->btree_reserve_cache_nr);
827 }
828
829 static const char * const bch2_rw[] = {
830         "read",
831         "write",
832         NULL
833 };
834
835 static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
836 {
837         int rw, i;
838
839         for (rw = 0; rw < 2; rw++) {
840                 pr_buf(out, "%s:\n", bch2_rw[rw]);
841
842                 for (i = 1; i < BCH_DATA_NR; i++)
843                         pr_buf(out, "%-12s:%12llu\n",
844                                bch2_data_types[i],
845                                percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
846         }
847 }
848
849 SHOW(bch2_dev)
850 {
851         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
852         struct bch_fs *c = ca->fs;
853         struct printbuf out = _PBUF(buf, PAGE_SIZE);
854
855         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
856
857         sysfs_print(bucket_size,        bucket_bytes(ca));
858         sysfs_print(block_size,         block_bytes(c));
859         sysfs_print(first_bucket,       ca->mi.first_bucket);
860         sysfs_print(nbuckets,           ca->mi.nbuckets);
861         sysfs_print(durability,         ca->mi.durability);
862         sysfs_print(discard,            ca->mi.discard);
863
864         if (attr == &sysfs_label) {
865                 if (ca->mi.group) {
866                         mutex_lock(&c->sb_lock);
867                         bch2_disk_path_to_text(&out, &c->disk_sb,
868                                                ca->mi.group - 1);
869                         mutex_unlock(&c->sb_lock);
870                 }
871
872                 pr_buf(&out, "\n");
873                 return out.pos - buf;
874         }
875
876         if (attr == &sysfs_has_data) {
877                 bch2_flags_to_text(&out, bch2_data_types,
878                                    bch2_dev_has_data(c, ca));
879                 pr_buf(&out, "\n");
880                 return out.pos - buf;
881         }
882
883         if (attr == &sysfs_cache_replacement_policy) {
884                 bch2_string_opt_to_text(&out,
885                                         bch2_cache_replacement_policies,
886                                         ca->mi.replacement);
887                 pr_buf(&out, "\n");
888                 return out.pos - buf;
889         }
890
891         if (attr == &sysfs_state_rw) {
892                 bch2_string_opt_to_text(&out, bch2_dev_state,
893                                         ca->mi.state);
894                 pr_buf(&out, "\n");
895                 return out.pos - buf;
896         }
897
898         if (attr == &sysfs_iodone) {
899                 dev_iodone_to_text(&out, ca);
900                 return out.pos - buf;
901         }
902
903         sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
904         sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
905
906         if (attr == &sysfs_io_latency_stats_read) {
907                 bch2_time_stats_to_text(&out, &ca->io_latency[READ]);
908                 return out.pos - buf;
909         }
910         if (attr == &sysfs_io_latency_stats_write) {
911                 bch2_time_stats_to_text(&out, &ca->io_latency[WRITE]);
912                 return out.pos - buf;
913         }
914
915         sysfs_printf(congested,                 "%u%%",
916                      clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
917                      * 100 / CONGESTED_MAX);
918
919         if (attr == &sysfs_bucket_quantiles_last_read)
920                 return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 0) ?: out.pos - buf;
921         if (attr == &sysfs_bucket_quantiles_last_write)
922                 return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 1) ?: out.pos - buf;
923         if (attr == &sysfs_bucket_quantiles_fragmentation)
924                 return quantiles_to_text(&out, c, ca, bucket_sectors_used_fn, NULL)  ?: out.pos - buf;
925         if (attr == &sysfs_bucket_quantiles_oldest_gen)
926                 return quantiles_to_text(&out, c, ca, bucket_oldest_gen_fn, NULL)    ?: out.pos - buf;
927
928         if (attr == &sysfs_reserve_stats) {
929                 reserve_stats_to_text(&out, ca);
930                 return out.pos - buf;
931         }
932         if (attr == &sysfs_alloc_debug) {
933                 dev_alloc_debug_to_text(&out, ca);
934                 return out.pos - buf;
935         }
936
937         return 0;
938 }
939
940 STORE(bch2_dev)
941 {
942         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
943         struct bch_fs *c = ca->fs;
944         struct bch_member *mi;
945
946         if (attr == &sysfs_discard) {
947                 bool v = strtoul_or_return(buf);
948
949                 mutex_lock(&c->sb_lock);
950                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
951
952                 if (v != BCH_MEMBER_DISCARD(mi)) {
953                         SET_BCH_MEMBER_DISCARD(mi, v);
954                         bch2_write_super(c);
955                 }
956                 mutex_unlock(&c->sb_lock);
957         }
958
959         if (attr == &sysfs_cache_replacement_policy) {
960                 ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
961
962                 if (v < 0)
963                         return v;
964
965                 mutex_lock(&c->sb_lock);
966                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
967
968                 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
969                         SET_BCH_MEMBER_REPLACEMENT(mi, v);
970                         bch2_write_super(c);
971                 }
972                 mutex_unlock(&c->sb_lock);
973         }
974
975         if (attr == &sysfs_label) {
976                 char *tmp;
977                 int ret;
978
979                 tmp = kstrdup(buf, GFP_KERNEL);
980                 if (!tmp)
981                         return -ENOMEM;
982
983                 ret = bch2_dev_group_set(c, ca, strim(tmp));
984                 kfree(tmp);
985                 if (ret)
986                         return ret;
987         }
988
989         if (attr == &sysfs_wake_allocator)
990                 bch2_wake_allocator(ca);
991
992         return size;
993 }
994 SYSFS_OPS(bch2_dev);
995
996 struct attribute *bch2_dev_files[] = {
997         &sysfs_uuid,
998         &sysfs_bucket_size,
999         &sysfs_block_size,
1000         &sysfs_first_bucket,
1001         &sysfs_nbuckets,
1002         &sysfs_durability,
1003
1004         /* settings: */
1005         &sysfs_discard,
1006         &sysfs_cache_replacement_policy,
1007         &sysfs_state_rw,
1008         &sysfs_label,
1009
1010         &sysfs_has_data,
1011         &sysfs_iodone,
1012
1013         &sysfs_io_latency_read,
1014         &sysfs_io_latency_write,
1015         &sysfs_io_latency_stats_read,
1016         &sysfs_io_latency_stats_write,
1017         &sysfs_congested,
1018
1019         /* alloc info - other stats: */
1020         &sysfs_bucket_quantiles_last_read,
1021         &sysfs_bucket_quantiles_last_write,
1022         &sysfs_bucket_quantiles_fragmentation,
1023         &sysfs_bucket_quantiles_oldest_gen,
1024
1025         &sysfs_reserve_stats,
1026
1027         /* debug: */
1028         &sysfs_alloc_debug,
1029         &sysfs_wake_allocator,
1030         NULL
1031 };
1032
1033 #endif  /* _BCACHEFS_SYSFS_H_ */