1 // SPDX-License-Identifier: GPL-2.0
3 * bcache sysfs interfaces
5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6 * Copyright 2012 Google, Inc.
9 #ifndef NO_BCACHEFS_SYSFS
12 #include "alloc_background.h"
13 #include "alloc_foreground.h"
15 #include "btree_cache.h"
17 #include "btree_iter.h"
18 #include "btree_key_cache.h"
19 #include "btree_update.h"
20 #include "btree_update_interior.h"
24 #include "disk_groups.h"
31 #include "rebalance.h"
36 #include <linux/blkdev.h>
37 #include <linux/pretty-printers.h>
38 #include <linux/sort.h>
39 #include <linux/sched/clock.h>
43 #define SYSFS_OPS(type) \
44 const struct sysfs_ops type ## _sysfs_ops = { \
45 .show = type ## _show, \
46 .store = type ## _store \
50 static ssize_t fn ## _to_text(struct printbuf *, \
51 struct kobject *, struct attribute *);\
53 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
56 struct printbuf out = PRINTBUF; \
57 ssize_t ret = fn ## _to_text(&out, kobj, attr); \
59 if (out.pos && out.buf[out.pos - 1] != '\n') \
62 if (!ret && out.allocation_failure) \
66 ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \
67 memcpy(buf, out.buf, ret); \
69 printbuf_exit(&out); \
73 static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
74 struct attribute *attr)
77 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
78 const char *buf, size_t size) \
80 #define __sysfs_attribute(_name, _mode) \
81 static struct attribute sysfs_##_name = \
82 { .name = #_name, .mode = _mode }
84 #define write_attribute(n) __sysfs_attribute(n, S_IWUSR)
85 #define read_attribute(n) __sysfs_attribute(n, S_IRUGO)
86 #define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR)
88 #define sysfs_printf(file, fmt, ...) \
90 if (attr == &sysfs_ ## file) \
91 prt_printf(out, fmt "\n", __VA_ARGS__); \
94 #define sysfs_print(file, var) \
96 if (attr == &sysfs_ ## file) \
100 #define sysfs_hprint(file, val) \
102 if (attr == &sysfs_ ## file) \
103 prt_human_readable_s64(out, val); \
106 #define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var))
107 #define var_print(_var) sysfs_print(_var, var(_var))
108 #define var_hprint(_var) sysfs_hprint(_var, var(_var))
110 #define sysfs_strtoul(file, var) \
112 if (attr == &sysfs_ ## file) \
113 return strtoul_safe(buf, var) ?: (ssize_t) size; \
116 #define sysfs_strtoul_clamp(file, var, min, max) \
118 if (attr == &sysfs_ ## file) \
119 return strtoul_safe_clamp(buf, var, min, max) \
123 #define strtoul_or_return(cp) \
126 int _r = kstrtoul(cp, 10, &_v); \
132 #define strtoul_restrict_or_return(cp, min, max) \
134 unsigned long __v = 0; \
135 int _r = strtoul_safe_restrict(cp, __v, min, max); \
141 #define strtoi_h_or_return(cp) \
144 int _r = strtoi_h(cp, &_v); \
150 #define sysfs_hatoi(file, var) \
152 if (attr == &sysfs_ ## file) \
153 return strtoi_h(buf, &var) ?: (ssize_t) size; \
156 write_attribute(trigger_gc);
157 write_attribute(trigger_discards);
158 write_attribute(prune_cache);
159 rw_attribute(btree_gc_periodic);
160 rw_attribute(gc_gens_pos);
162 read_attribute(uuid);
163 read_attribute(minor);
164 read_attribute(bucket_size);
165 read_attribute(first_bucket);
166 read_attribute(nbuckets);
167 read_attribute(durability);
168 read_attribute(iodone);
170 read_attribute(io_latency_read);
171 read_attribute(io_latency_write);
172 read_attribute(io_latency_stats_read);
173 read_attribute(io_latency_stats_write);
174 read_attribute(congested);
176 read_attribute(btree_avg_write_size);
178 read_attribute(btree_cache_size);
179 read_attribute(compression_stats);
180 read_attribute(journal_debug);
181 read_attribute(btree_updates);
182 read_attribute(btree_cache);
183 read_attribute(btree_key_cache);
184 read_attribute(btree_transactions);
185 read_attribute(stripes_heap);
186 read_attribute(open_buckets);
188 read_attribute(internal_uuid);
190 read_attribute(has_data);
191 read_attribute(alloc_debug);
193 read_attribute(read_realloc_races);
194 read_attribute(extent_migrate_done);
195 read_attribute(extent_migrate_raced);
196 read_attribute(bucket_alloc_fail);
198 #define x(t, n, ...) read_attribute(t);
199 BCH_PERSISTENT_COUNTERS()
202 rw_attribute(discard);
205 rw_attribute(copy_gc_enabled);
206 read_attribute(copy_gc_wait);
208 rw_attribute(rebalance_enabled);
209 sysfs_pd_controller_attribute(rebalance);
210 read_attribute(rebalance_work);
211 rw_attribute(promote_whole_extents);
213 read_attribute(new_stripes);
215 read_attribute(io_timers_read);
216 read_attribute(io_timers_write);
218 read_attribute(data_jobs);
220 #ifdef CONFIG_BCACHEFS_TESTS
221 write_attribute(perf_test);
222 #endif /* CONFIG_BCACHEFS_TESTS */
225 static struct attribute sysfs_time_stat_##_name = \
226 { .name = #_name, .mode = S_IRUGO };
230 static struct attribute sysfs_state_rw = {
235 static size_t bch2_btree_cache_size(struct bch_fs *c)
240 mutex_lock(&c->btree_cache.lock);
241 list_for_each_entry(b, &c->btree_cache.live, list)
242 ret += btree_bytes(c);
244 mutex_unlock(&c->btree_cache.lock);
248 static size_t bch2_btree_avg_write_size(struct bch_fs *c)
250 u64 nr = atomic64_read(&c->btree_writes_nr);
251 u64 sectors = atomic64_read(&c->btree_writes_sectors);
253 return nr ? div64_u64(sectors, nr) : 0;
256 static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
259 struct bch_move_stats *stats;
261 mutex_lock(&c->data_progress_lock);
262 list_for_each_entry(stats, &c->data_progress_list, list) {
263 prt_printf(out, "%s: data type %s btree_id %s position: ",
265 bch2_data_types[stats->data_type],
266 bch2_btree_ids[stats->btree_id]);
267 bch2_bpos_to_text(out, stats->pos);
268 prt_printf(out, "%s", "\n");
271 mutex_unlock(&c->data_progress_lock);
275 static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
277 struct btree_trans trans;
278 struct btree_iter iter;
281 u64 nr_uncompressed_extents = 0,
282 nr_compressed_extents = 0,
283 nr_incompressible_extents = 0,
284 uncompressed_sectors = 0,
285 incompressible_sectors = 0,
286 compressed_sectors_compressed = 0,
287 compressed_sectors_uncompressed = 0;
290 if (!test_bit(BCH_FS_STARTED, &c->flags))
293 bch2_trans_init(&trans, c, 0, 0);
295 for (id = 0; id < BTREE_ID_NR; id++) {
296 if (!((1U << id) & BTREE_ID_HAS_PTRS))
299 for_each_btree_key(&trans, iter, id, POS_MIN,
300 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
301 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
302 const union bch_extent_entry *entry;
303 struct extent_ptr_decoded p;
304 bool compressed = false, uncompressed = false, incompressible = false;
306 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
307 switch (p.crc.compression_type) {
308 case BCH_COMPRESSION_TYPE_none:
310 uncompressed_sectors += k.k->size;
312 case BCH_COMPRESSION_TYPE_incompressible:
313 incompressible = true;
314 incompressible_sectors += k.k->size;
317 compressed_sectors_compressed +=
318 p.crc.compressed_size;
319 compressed_sectors_uncompressed +=
320 p.crc.uncompressed_size;
327 nr_incompressible_extents++;
328 else if (uncompressed)
329 nr_uncompressed_extents++;
331 nr_compressed_extents++;
333 bch2_trans_iter_exit(&trans, &iter);
336 bch2_trans_exit(&trans);
341 prt_printf(out, "uncompressed:\n");
342 prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents);
343 prt_printf(out, " size: ");
344 prt_human_readable_u64(out, uncompressed_sectors << 9);
345 prt_printf(out, "\n");
347 prt_printf(out, "compressed:\n");
348 prt_printf(out, " nr extents: %llu\n", nr_compressed_extents);
349 prt_printf(out, " compressed size: ");
350 prt_human_readable_u64(out, compressed_sectors_compressed << 9);
351 prt_printf(out, "\n");
352 prt_printf(out, " uncompressed size: ");
353 prt_human_readable_u64(out, compressed_sectors_uncompressed << 9);
354 prt_printf(out, "\n");
356 prt_printf(out, "incompressible:\n");
357 prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents);
358 prt_printf(out, " size: ");
359 prt_human_readable_u64(out, incompressible_sectors << 9);
360 prt_printf(out, "\n");
364 static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
366 prt_printf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
367 bch2_bpos_to_text(out, c->gc_gens_pos);
368 prt_printf(out, "\n");
373 struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
375 sysfs_print(minor, c->minor);
376 sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
378 sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
379 sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
381 sysfs_print(read_realloc_races,
382 atomic_long_read(&c->read_realloc_races));
383 sysfs_print(extent_migrate_done,
384 atomic_long_read(&c->extent_migrate_done));
385 sysfs_print(extent_migrate_raced,
386 atomic_long_read(&c->extent_migrate_raced));
387 sysfs_print(bucket_alloc_fail,
388 atomic_long_read(&c->bucket_alloc_fail));
390 sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
392 if (attr == &sysfs_gc_gens_pos)
393 bch2_gc_gens_pos_to_text(out, c);
395 sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
397 sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
398 sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
399 sysfs_hprint(copy_gc_wait,
400 max(0LL, c->copygc_wait -
401 atomic64_read(&c->io_clock[WRITE].now)) << 9);
403 if (attr == &sysfs_rebalance_work)
404 bch2_rebalance_work_to_text(out, c);
406 sysfs_print(promote_whole_extents, c->promote_whole_extents);
410 if (attr == &sysfs_journal_debug)
411 bch2_journal_debug_to_text(out, &c->journal);
413 if (attr == &sysfs_btree_updates)
414 bch2_btree_updates_to_text(out, c);
416 if (attr == &sysfs_btree_cache)
417 bch2_btree_cache_to_text(out, c);
419 if (attr == &sysfs_btree_key_cache)
420 bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
422 if (attr == &sysfs_btree_transactions)
423 bch2_btree_trans_to_text(out, c);
425 if (attr == &sysfs_stripes_heap)
426 bch2_stripes_heap_to_text(out, c);
428 if (attr == &sysfs_open_buckets)
429 bch2_open_buckets_to_text(out, c);
431 if (attr == &sysfs_compression_stats)
432 bch2_compression_stats_to_text(out, c);
434 if (attr == &sysfs_new_stripes)
435 bch2_new_stripes_to_text(out, c);
437 if (attr == &sysfs_io_timers_read)
438 bch2_io_timers_to_text(out, &c->io_clock[READ]);
440 if (attr == &sysfs_io_timers_write)
441 bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
443 if (attr == &sysfs_data_jobs)
444 data_progress_to_text(out, c);
451 struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
453 if (attr == &sysfs_btree_gc_periodic) {
454 ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
457 wake_up_process(c->gc_thread);
461 if (attr == &sysfs_copy_gc_enabled) {
462 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
465 if (c->copygc_thread)
466 wake_up_process(c->copygc_thread);
470 if (attr == &sysfs_rebalance_enabled) {
471 ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
478 sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
480 sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
484 if (!test_bit(BCH_FS_STARTED, &c->flags))
489 if (!test_bit(BCH_FS_RW, &c->flags))
492 if (attr == &sysfs_prune_cache) {
493 struct shrink_control sc;
495 sc.gfp_mask = GFP_KERNEL;
496 sc.nr_to_scan = strtoul_or_return(buf);
497 c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
500 if (attr == &sysfs_trigger_gc) {
502 * Full gc is currently incompatible with btree key cache:
505 down_read(&c->state_lock);
506 bch2_gc(c, false, false);
507 up_read(&c->state_lock);
513 if (attr == &sysfs_trigger_discards)
516 #ifdef CONFIG_BCACHEFS_TESTS
517 if (attr == &sysfs_perf_test) {
518 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
519 char *test = strsep(&p, " \t\n");
520 char *nr_str = strsep(&p, " \t\n");
521 char *threads_str = strsep(&p, " \t\n");
527 !(ret = kstrtouint(threads_str, 10, &threads)) &&
528 !(ret = bch2_strtoull_h(nr_str, &nr)))
529 ret = bch2_btree_perf_test(c, test, nr, threads);
540 struct attribute *bch2_fs_files[] = {
542 &sysfs_btree_cache_size,
543 &sysfs_btree_avg_write_size,
545 &sysfs_promote_whole_extents,
547 &sysfs_compression_stats,
549 #ifdef CONFIG_BCACHEFS_TESTS
557 SHOW(bch2_fs_counters)
559 struct bch_fs *c = container_of(kobj, struct bch_fs, counters_kobj);
561 u64 counter_since_mount = 0;
563 out->tabstops[0] = 32;
565 if (attr == &sysfs_##t) { \
566 counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
567 counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
568 prt_printf(out, "since mount:"); \
570 prt_human_readable_u64(out, counter_since_mount << 9); \
573 prt_printf(out, "since filesystem creation:"); \
575 prt_human_readable_u64(out, counter << 9); \
578 BCH_PERSISTENT_COUNTERS()
583 STORE(bch2_fs_counters) {
587 SYSFS_OPS(bch2_fs_counters);
589 struct attribute *bch2_fs_counters_files[] = {
592 BCH_PERSISTENT_COUNTERS()
596 /* internal dir - just a wrapper */
598 SHOW(bch2_fs_internal)
600 struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
601 return bch2_fs_to_text(out, &c->kobj, attr);
604 STORE(bch2_fs_internal)
606 struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
607 return bch2_fs_store(&c->kobj, attr, buf, size);
609 SYSFS_OPS(bch2_fs_internal);
611 struct attribute *bch2_fs_internal_files[] = {
612 &sysfs_journal_debug,
613 &sysfs_btree_updates,
615 &sysfs_btree_key_cache,
616 &sysfs_btree_transactions,
620 &sysfs_io_timers_read,
621 &sysfs_io_timers_write,
624 &sysfs_trigger_discards,
627 &sysfs_read_realloc_races,
628 &sysfs_extent_migrate_done,
629 &sysfs_extent_migrate_raced,
630 &sysfs_bucket_alloc_fail,
634 &sysfs_copy_gc_enabled,
637 &sysfs_rebalance_enabled,
638 &sysfs_rebalance_work,
639 sysfs_pd_controller_files(rebalance),
643 &sysfs_internal_uuid,
649 SHOW(bch2_fs_opts_dir)
651 struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
652 const struct bch_option *opt = container_of(attr, struct bch_option, attr);
653 int id = opt - bch2_opt_table;
654 u64 v = bch2_opt_get_by_id(&c->opts, id);
656 bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST);
662 STORE(bch2_fs_opts_dir)
664 struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
665 const struct bch_option *opt = container_of(attr, struct bch_option, attr);
666 int ret, id = opt - bch2_opt_table;
671 * We don't need to take c->writes for correctness, but it eliminates an
672 * unsightly error message in the dmesg log when we're RO:
674 if (unlikely(!percpu_ref_tryget(&c->writes)))
677 tmp = kstrdup(buf, GFP_KERNEL);
683 ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
689 ret = bch2_opt_check_may_set(c, id, v);
693 bch2_opt_set_sb(c, opt, v);
694 bch2_opt_set_by_id(&c->opts, id, v);
696 if ((id == Opt_background_target ||
697 id == Opt_background_compression) && v) {
698 bch2_rebalance_add_work(c, S64_MAX);
704 percpu_ref_put(&c->writes);
707 SYSFS_OPS(bch2_fs_opts_dir);
709 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
711 int bch2_opts_create_sysfs_files(struct kobject *kobj)
713 const struct bch_option *i;
716 for (i = bch2_opt_table;
717 i < bch2_opt_table + bch2_opts_nr;
719 if (!(i->flags & OPT_FS))
722 ret = sysfs_create_file(kobj, &i->attr);
732 SHOW(bch2_fs_time_stats)
734 struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
737 if (attr == &sysfs_time_stat_##name) \
738 bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]);
745 STORE(bch2_fs_time_stats)
749 SYSFS_OPS(bch2_fs_time_stats);
751 struct attribute *bch2_fs_time_stats_files[] = {
753 &sysfs_time_stat_##name,
759 static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
761 struct bch_fs *c = ca->fs;
762 struct bch_dev_usage stats = bch2_dev_usage_read(ca);
763 unsigned i, nr[BCH_DATA_NR];
765 memset(nr, 0, sizeof(nr));
767 for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
768 nr[c->open_buckets[i].data_type]++;
771 "\t\t\t buckets\t sectors fragmented\n"
772 "capacity\t%16llu\n",
773 ca->mi.nbuckets - ca->mi.first_bucket);
775 for (i = 0; i < BCH_DATA_NR; i++)
776 prt_printf(out, "%-16s%16llu%16llu%16llu\n",
777 bch2_data_types[i], stats.d[i].buckets,
778 stats.d[i].sectors, stats.d[i].fragmented);
783 "freelist_wait\t\t%s\n"
784 "open buckets allocated\t%u\n"
785 "open buckets this dev\t%u\n"
786 "open buckets total\t%u\n"
787 "open_buckets_wait\t%s\n"
788 "open_buckets_btree\t%u\n"
789 "open_buckets_user\t%u\n"
790 "btree reserve cache\t%u\n",
792 c->freelist_wait.list.first ? "waiting" : "empty",
793 OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
796 c->open_buckets_wait.list.first ? "waiting" : "empty",
799 c->btree_reserve_cache_nr);
802 static const char * const bch2_rw[] = {
808 static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
812 for (rw = 0; rw < 2; rw++) {
813 prt_printf(out, "%s:\n", bch2_rw[rw]);
815 for (i = 1; i < BCH_DATA_NR; i++)
816 prt_printf(out, "%-12s:%12llu\n",
818 percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
824 struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
825 struct bch_fs *c = ca->fs;
827 sysfs_printf(uuid, "%pU\n", ca->uuid.b);
829 sysfs_print(bucket_size, bucket_bytes(ca));
830 sysfs_print(first_bucket, ca->mi.first_bucket);
831 sysfs_print(nbuckets, ca->mi.nbuckets);
832 sysfs_print(durability, ca->mi.durability);
833 sysfs_print(discard, ca->mi.discard);
835 if (attr == &sysfs_label) {
837 mutex_lock(&c->sb_lock);
838 bch2_disk_path_to_text(out, c->disk_sb.sb,
840 mutex_unlock(&c->sb_lock);
846 if (attr == &sysfs_has_data) {
847 prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca));
851 if (attr == &sysfs_state_rw) {
852 prt_string_option(out, bch2_member_states, ca->mi.state);
856 if (attr == &sysfs_iodone)
857 dev_iodone_to_text(out, ca);
859 sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
860 sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
862 if (attr == &sysfs_io_latency_stats_read)
863 bch2_time_stats_to_text(out, &ca->io_latency[READ]);
865 if (attr == &sysfs_io_latency_stats_write)
866 bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
868 sysfs_printf(congested, "%u%%",
869 clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
870 * 100 / CONGESTED_MAX);
872 if (attr == &sysfs_alloc_debug)
873 dev_alloc_debug_to_text(out, ca);
880 struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
881 struct bch_fs *c = ca->fs;
882 struct bch_member *mi;
884 if (attr == &sysfs_discard) {
885 bool v = strtoul_or_return(buf);
887 mutex_lock(&c->sb_lock);
888 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
890 if (v != BCH_MEMBER_DISCARD(mi)) {
891 SET_BCH_MEMBER_DISCARD(mi, v);
894 mutex_unlock(&c->sb_lock);
897 if (attr == &sysfs_label) {
901 tmp = kstrdup(buf, GFP_KERNEL);
905 ret = bch2_dev_group_set(c, ca, strim(tmp));
915 struct attribute *bch2_dev_files[] = {
930 &sysfs_io_latency_read,
931 &sysfs_io_latency_write,
932 &sysfs_io_latency_stats_read,
933 &sysfs_io_latency_stats_write,
941 #endif /* _BCACHEFS_SYSFS_H_ */