1 // SPDX-License-Identifier: GPL-2.0
3 * bcache sysfs interfaces
5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6 * Copyright 2012 Google, Inc.
9 #ifndef NO_BCACHEFS_SYSFS
12 #include "alloc_background.h"
13 #include "alloc_foreground.h"
15 #include "btree_cache.h"
17 #include "btree_iter.h"
18 #include "btree_key_cache.h"
19 #include "btree_update.h"
20 #include "btree_update_interior.h"
24 #include "disk_groups.h"
31 #include "rebalance.h"
36 #include <linux/blkdev.h>
37 #include <linux/sort.h>
38 #include <linux/sched/clock.h>
42 #define SYSFS_OPS(type) \
43 struct sysfs_ops type ## _sysfs_ops = { \
44 .show = type ## _show, \
45 .store = type ## _store \
49 static ssize_t fn ## _to_text(struct printbuf *, \
50 struct kobject *, struct attribute *);\
52 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
55 struct printbuf out = PRINTBUF; \
56 ssize_t ret = fn ## _to_text(&out, kobj, attr); \
58 if (!ret && out.allocation_failure) \
62 ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \
63 memcpy(buf, out.buf, ret); \
65 printbuf_exit(&out); \
69 static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
70 struct attribute *attr)
73 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
74 const char *buf, size_t size) \
76 #define __sysfs_attribute(_name, _mode) \
77 static struct attribute sysfs_##_name = \
78 { .name = #_name, .mode = _mode }
80 #define write_attribute(n) __sysfs_attribute(n, S_IWUSR)
81 #define read_attribute(n) __sysfs_attribute(n, S_IRUGO)
82 #define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR)
84 #define sysfs_printf(file, fmt, ...) \
86 if (attr == &sysfs_ ## file) \
87 pr_buf(out, fmt "\n", __VA_ARGS__); \
90 #define sysfs_print(file, var) \
92 if (attr == &sysfs_ ## file) \
96 #define sysfs_hprint(file, val) \
98 if (attr == &sysfs_ ## file) \
99 bch2_hprint(out, val); \
102 #define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var))
103 #define var_print(_var) sysfs_print(_var, var(_var))
104 #define var_hprint(_var) sysfs_hprint(_var, var(_var))
106 #define sysfs_strtoul(file, var) \
108 if (attr == &sysfs_ ## file) \
109 return strtoul_safe(buf, var) ?: (ssize_t) size; \
112 #define sysfs_strtoul_clamp(file, var, min, max) \
114 if (attr == &sysfs_ ## file) \
115 return strtoul_safe_clamp(buf, var, min, max) \
119 #define strtoul_or_return(cp) \
122 int _r = kstrtoul(cp, 10, &_v); \
128 #define strtoul_restrict_or_return(cp, min, max) \
130 unsigned long __v = 0; \
131 int _r = strtoul_safe_restrict(cp, __v, min, max); \
137 #define strtoi_h_or_return(cp) \
140 int _r = strtoi_h(cp, &_v); \
146 #define sysfs_hatoi(file, var) \
148 if (attr == &sysfs_ ## file) \
149 return strtoi_h(buf, &var) ?: (ssize_t) size; \
152 write_attribute(trigger_gc);
153 write_attribute(prune_cache);
154 rw_attribute(btree_gc_periodic);
155 rw_attribute(gc_gens_pos);
157 read_attribute(uuid);
158 read_attribute(minor);
159 read_attribute(bucket_size);
160 read_attribute(first_bucket);
161 read_attribute(nbuckets);
162 read_attribute(durability);
163 read_attribute(iodone);
165 read_attribute(io_latency_read);
166 read_attribute(io_latency_write);
167 read_attribute(io_latency_stats_read);
168 read_attribute(io_latency_stats_write);
169 read_attribute(congested);
171 read_attribute(btree_avg_write_size);
173 read_attribute(reserve_stats);
174 read_attribute(btree_cache_size);
175 read_attribute(compression_stats);
176 read_attribute(journal_debug);
177 read_attribute(journal_pins);
178 read_attribute(btree_updates);
179 read_attribute(dirty_btree_nodes);
180 read_attribute(btree_cache);
181 read_attribute(btree_key_cache);
182 read_attribute(btree_transactions);
183 read_attribute(stripes_heap);
184 read_attribute(open_buckets);
186 read_attribute(internal_uuid);
188 read_attribute(has_data);
189 read_attribute(alloc_debug);
190 write_attribute(wake_allocator);
192 read_attribute(read_realloc_races);
193 read_attribute(extent_migrate_done);
194 read_attribute(extent_migrate_raced);
196 rw_attribute(discard);
199 rw_attribute(copy_gc_enabled);
200 read_attribute(copy_gc_wait);
202 rw_attribute(rebalance_enabled);
203 sysfs_pd_controller_attribute(rebalance);
204 read_attribute(rebalance_work);
205 rw_attribute(promote_whole_extents);
207 read_attribute(new_stripes);
209 read_attribute(io_timers_read);
210 read_attribute(io_timers_write);
212 read_attribute(data_jobs);
214 #ifdef CONFIG_BCACHEFS_TESTS
215 write_attribute(perf_test);
216 #endif /* CONFIG_BCACHEFS_TESTS */
219 static struct attribute sysfs_time_stat_##_name = \
220 { .name = #_name, .mode = S_IRUGO };
224 static struct attribute sysfs_state_rw = {
229 static size_t bch2_btree_cache_size(struct bch_fs *c)
234 mutex_lock(&c->btree_cache.lock);
235 list_for_each_entry(b, &c->btree_cache.live, list)
236 ret += btree_bytes(c);
238 mutex_unlock(&c->btree_cache.lock);
242 static size_t bch2_btree_avg_write_size(struct bch_fs *c)
244 u64 nr = atomic64_read(&c->btree_writes_nr);
245 u64 sectors = atomic64_read(&c->btree_writes_sectors);
247 return nr ? div64_u64(sectors, nr) : 0;
250 static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
253 struct bch_move_stats *stats;
255 mutex_lock(&c->data_progress_lock);
256 list_for_each_entry(stats, &c->data_progress_list, list) {
257 pr_buf(out, "%s: data type %s btree_id %s position: ",
259 bch2_data_types[stats->data_type],
260 bch2_btree_ids[stats->btree_id]);
261 bch2_bpos_to_text(out, stats->pos);
262 pr_buf(out, "%s", "\n");
265 mutex_unlock(&c->data_progress_lock);
269 static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
271 struct btree_trans trans;
272 struct btree_iter iter;
275 u64 nr_uncompressed_extents = 0,
276 nr_compressed_extents = 0,
277 nr_incompressible_extents = 0,
278 uncompressed_sectors = 0,
279 incompressible_sectors = 0,
280 compressed_sectors_compressed = 0,
281 compressed_sectors_uncompressed = 0;
284 if (!test_bit(BCH_FS_STARTED, &c->flags))
287 bch2_trans_init(&trans, c, 0, 0);
289 for (id = 0; id < BTREE_ID_NR; id++) {
290 if (!((1U << id) & BTREE_ID_HAS_PTRS))
293 for_each_btree_key(&trans, iter, id, POS_MIN,
294 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
295 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
296 const union bch_extent_entry *entry;
297 struct extent_ptr_decoded p;
298 bool compressed = false, uncompressed = false, incompressible = false;
300 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
301 switch (p.crc.compression_type) {
302 case BCH_COMPRESSION_TYPE_none:
304 uncompressed_sectors += k.k->size;
306 case BCH_COMPRESSION_TYPE_incompressible:
307 incompressible = true;
308 incompressible_sectors += k.k->size;
311 compressed_sectors_compressed +=
312 p.crc.compressed_size;
313 compressed_sectors_uncompressed +=
314 p.crc.uncompressed_size;
321 nr_incompressible_extents++;
322 else if (uncompressed)
323 nr_uncompressed_extents++;
325 nr_compressed_extents++;
327 bch2_trans_iter_exit(&trans, &iter);
330 bch2_trans_exit(&trans);
335 pr_buf(out, "uncompressed:\n");
336 pr_buf(out, " nr extents: %llu\n", nr_uncompressed_extents);
337 pr_buf(out, " size: ");
338 bch2_hprint(out, uncompressed_sectors << 9);
341 pr_buf(out, "compressed:\n");
342 pr_buf(out, " nr extents: %llu\n", nr_compressed_extents);
343 pr_buf(out, " compressed size: ");
344 bch2_hprint(out, compressed_sectors_compressed << 9);
346 pr_buf(out, " uncompressed size: ");
347 bch2_hprint(out, compressed_sectors_uncompressed << 9);
350 pr_buf(out, "incompressible:\n");
351 pr_buf(out, " nr extents: %llu\n", nr_incompressible_extents);
352 pr_buf(out, " size: ");
353 bch2_hprint(out, incompressible_sectors << 9);
358 static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
360 pr_buf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
361 bch2_bpos_to_text(out, c->gc_gens_pos);
367 struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
369 sysfs_print(minor, c->minor);
370 sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
372 sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
373 sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
375 sysfs_print(read_realloc_races,
376 atomic_long_read(&c->read_realloc_races));
377 sysfs_print(extent_migrate_done,
378 atomic_long_read(&c->extent_migrate_done));
379 sysfs_print(extent_migrate_raced,
380 atomic_long_read(&c->extent_migrate_raced));
382 sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
384 if (attr == &sysfs_gc_gens_pos)
385 bch2_gc_gens_pos_to_text(out, c);
387 sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
389 sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
390 sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
391 sysfs_hprint(copy_gc_wait,
392 max(0LL, c->copygc_wait -
393 atomic64_read(&c->io_clock[WRITE].now)) << 9);
395 if (attr == &sysfs_rebalance_work)
396 bch2_rebalance_work_to_text(out, c);
398 sysfs_print(promote_whole_extents, c->promote_whole_extents);
402 if (attr == &sysfs_journal_debug)
403 bch2_journal_debug_to_text(out, &c->journal);
405 if (attr == &sysfs_journal_pins)
406 bch2_journal_pins_to_text(out, &c->journal);
408 if (attr == &sysfs_btree_updates)
409 bch2_btree_updates_to_text(out, c);
411 if (attr == &sysfs_dirty_btree_nodes)
412 bch2_dirty_btree_nodes_to_text(out, c);
414 if (attr == &sysfs_btree_cache)
415 bch2_btree_cache_to_text(out, c);
417 if (attr == &sysfs_btree_key_cache)
418 bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
420 if (attr == &sysfs_btree_transactions)
421 bch2_btree_trans_to_text(out, c);
423 if (attr == &sysfs_stripes_heap)
424 bch2_stripes_heap_to_text(out, c);
426 if (attr == &sysfs_open_buckets)
427 bch2_open_buckets_to_text(out, c);
429 if (attr == &sysfs_compression_stats)
430 bch2_compression_stats_to_text(out, c);
432 if (attr == &sysfs_new_stripes)
433 bch2_new_stripes_to_text(out, c);
435 if (attr == &sysfs_io_timers_read)
436 bch2_io_timers_to_text(out, &c->io_clock[READ]);
438 if (attr == &sysfs_io_timers_write)
439 bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
441 if (attr == &sysfs_data_jobs)
442 data_progress_to_text(out, c);
449 struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
451 if (attr == &sysfs_btree_gc_periodic) {
452 ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
455 wake_up_process(c->gc_thread);
459 if (attr == &sysfs_copy_gc_enabled) {
460 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
463 if (c->copygc_thread)
464 wake_up_process(c->copygc_thread);
468 if (attr == &sysfs_rebalance_enabled) {
469 ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
476 sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
478 sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
482 if (!test_bit(BCH_FS_STARTED, &c->flags))
487 if (!test_bit(BCH_FS_RW, &c->flags))
490 if (attr == &sysfs_prune_cache) {
491 struct shrink_control sc;
493 sc.gfp_mask = GFP_KERNEL;
494 sc.nr_to_scan = strtoul_or_return(buf);
495 c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
498 if (attr == &sysfs_trigger_gc) {
500 * Full gc is currently incompatible with btree key cache:
503 down_read(&c->state_lock);
504 bch2_gc(c, false, false);
505 up_read(&c->state_lock);
511 #ifdef CONFIG_BCACHEFS_TESTS
512 if (attr == &sysfs_perf_test) {
513 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
514 char *test = strsep(&p, " \t\n");
515 char *nr_str = strsep(&p, " \t\n");
516 char *threads_str = strsep(&p, " \t\n");
522 !(ret = kstrtouint(threads_str, 10, &threads)) &&
523 !(ret = bch2_strtoull_h(nr_str, &nr)))
524 ret = bch2_btree_perf_test(c, test, nr, threads);
535 struct attribute *bch2_fs_files[] = {
537 &sysfs_btree_cache_size,
538 &sysfs_btree_avg_write_size,
540 &sysfs_promote_whole_extents,
542 &sysfs_compression_stats,
544 #ifdef CONFIG_BCACHEFS_TESTS
550 /* internal dir - just a wrapper */
552 SHOW(bch2_fs_internal)
554 struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
555 return bch2_fs_to_text(out, &c->kobj, attr);
558 STORE(bch2_fs_internal)
560 struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
561 return bch2_fs_store(&c->kobj, attr, buf, size);
563 SYSFS_OPS(bch2_fs_internal);
565 struct attribute *bch2_fs_internal_files[] = {
566 &sysfs_journal_debug,
568 &sysfs_btree_updates,
569 &sysfs_dirty_btree_nodes,
571 &sysfs_btree_key_cache,
572 &sysfs_btree_transactions,
576 &sysfs_io_timers_read,
577 &sysfs_io_timers_write,
582 &sysfs_read_realloc_races,
583 &sysfs_extent_migrate_done,
584 &sysfs_extent_migrate_raced,
588 &sysfs_copy_gc_enabled,
591 &sysfs_rebalance_enabled,
592 &sysfs_rebalance_work,
593 sysfs_pd_controller_files(rebalance),
597 &sysfs_internal_uuid,
603 SHOW(bch2_fs_opts_dir)
605 struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
606 const struct bch_option *opt = container_of(attr, struct bch_option, attr);
607 int id = opt - bch2_opt_table;
608 u64 v = bch2_opt_get_by_id(&c->opts, id);
610 bch2_opt_to_text(out, c, opt, v, OPT_SHOW_FULL_LIST);
616 STORE(bch2_fs_opts_dir)
618 struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
619 const struct bch_option *opt = container_of(attr, struct bch_option, attr);
620 int ret, id = opt - bch2_opt_table;
624 tmp = kstrdup(buf, GFP_KERNEL);
628 ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v);
634 ret = bch2_opt_check_may_set(c, id, v);
638 bch2_opt_set_sb(c, opt, v);
639 bch2_opt_set_by_id(&c->opts, id, v);
641 if ((id == Opt_background_target ||
642 id == Opt_background_compression) && v) {
643 bch2_rebalance_add_work(c, S64_MAX);
649 SYSFS_OPS(bch2_fs_opts_dir);
651 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
653 int bch2_opts_create_sysfs_files(struct kobject *kobj)
655 const struct bch_option *i;
658 for (i = bch2_opt_table;
659 i < bch2_opt_table + bch2_opts_nr;
661 if (!(i->flags & OPT_FS))
664 ret = sysfs_create_file(kobj, &i->attr);
674 SHOW(bch2_fs_time_stats)
676 struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
679 if (attr == &sysfs_time_stat_##name) \
680 bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]);
687 STORE(bch2_fs_time_stats)
691 SYSFS_OPS(bch2_fs_time_stats);
693 struct attribute *bch2_fs_time_stats_files[] = {
695 &sysfs_time_stat_##name,
701 static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca)
703 enum alloc_reserve i;
705 spin_lock(&ca->fs->freelist_lock);
707 pr_buf(out, "free_inc:\t%zu\t%zu\n",
708 fifo_used(&ca->free_inc),
711 for (i = 0; i < RESERVE_NR; i++)
712 pr_buf(out, "free[%u]:\t%zu\t%zu\n", i,
713 fifo_used(&ca->free[i]),
716 spin_unlock(&ca->fs->freelist_lock);
719 static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
721 struct bch_fs *c = ca->fs;
722 struct bch_dev_usage stats = bch2_dev_usage_read(ca);
723 unsigned i, nr[BCH_DATA_NR];
725 memset(nr, 0, sizeof(nr));
727 for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
728 nr[c->open_buckets[i].data_type]++;
731 "\t\t buckets\t sectors fragmented\n"
733 ca->mi.nbuckets - ca->mi.first_bucket);
735 for (i = 1; i < BCH_DATA_NR; i++)
736 pr_buf(out, "%-8s%16llu%16llu%16llu\n",
737 bch2_data_types[i], stats.d[i].buckets,
738 stats.d[i].sectors, stats.d[i].fragmented);
744 "free_inc\t\t%zu/%zu\n"
745 "free[RESERVE_MOVINGGC]\t%zu/%zu\n"
746 "free[RESERVE_NONE]\t%zu/%zu\n"
747 "freelist_wait\t\t%s\n"
748 "open buckets allocated\t%u\n"
749 "open buckets this dev\t%u\n"
750 "open buckets total\t%u\n"
751 "open_buckets_wait\t%s\n"
752 "open_buckets_btree\t%u\n"
753 "open_buckets_user\t%u\n"
754 "btree reserve cache\t%u\n"
755 "thread state:\t\t%s\n",
757 __dev_buckets_available(ca, stats),
758 fifo_used(&ca->free_inc), ca->free_inc.size,
759 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
760 fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
761 c->freelist_wait.list.first ? "waiting" : "empty",
762 OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
765 c->open_buckets_wait.list.first ? "waiting" : "empty",
768 c->btree_reserve_cache_nr,
769 bch2_allocator_states[ca->allocator_state]);
772 static const char * const bch2_rw[] = {
778 static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
782 for (rw = 0; rw < 2; rw++) {
783 pr_buf(out, "%s:\n", bch2_rw[rw]);
785 for (i = 1; i < BCH_DATA_NR; i++)
786 pr_buf(out, "%-12s:%12llu\n",
788 percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
794 struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
795 struct bch_fs *c = ca->fs;
797 sysfs_printf(uuid, "%pU\n", ca->uuid.b);
799 sysfs_print(bucket_size, bucket_bytes(ca));
800 sysfs_print(first_bucket, ca->mi.first_bucket);
801 sysfs_print(nbuckets, ca->mi.nbuckets);
802 sysfs_print(durability, ca->mi.durability);
803 sysfs_print(discard, ca->mi.discard);
805 if (attr == &sysfs_label) {
807 mutex_lock(&c->sb_lock);
808 bch2_disk_path_to_text(out, c->disk_sb.sb,
810 mutex_unlock(&c->sb_lock);
816 if (attr == &sysfs_has_data) {
817 bch2_flags_to_text(out, bch2_data_types,
818 bch2_dev_has_data(c, ca));
822 if (attr == &sysfs_state_rw) {
823 bch2_string_opt_to_text(out, bch2_member_states,
828 if (attr == &sysfs_iodone)
829 dev_iodone_to_text(out, ca);
831 sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
832 sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
834 if (attr == &sysfs_io_latency_stats_read)
835 bch2_time_stats_to_text(out, &ca->io_latency[READ]);
837 if (attr == &sysfs_io_latency_stats_write)
838 bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
840 sysfs_printf(congested, "%u%%",
841 clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
842 * 100 / CONGESTED_MAX);
844 if (attr == &sysfs_reserve_stats)
845 reserve_stats_to_text(out, ca);
847 if (attr == &sysfs_alloc_debug)
848 dev_alloc_debug_to_text(out, ca);
855 struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
856 struct bch_fs *c = ca->fs;
857 struct bch_member *mi;
859 if (attr == &sysfs_discard) {
860 bool v = strtoul_or_return(buf);
862 mutex_lock(&c->sb_lock);
863 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
865 if (v != BCH_MEMBER_DISCARD(mi)) {
866 SET_BCH_MEMBER_DISCARD(mi, v);
869 mutex_unlock(&c->sb_lock);
872 if (attr == &sysfs_label) {
876 tmp = kstrdup(buf, GFP_KERNEL);
880 ret = bch2_dev_group_set(c, ca, strim(tmp));
886 if (attr == &sysfs_wake_allocator)
887 bch2_wake_allocator(ca);
893 struct attribute *bch2_dev_files[] = {
908 &sysfs_io_latency_read,
909 &sysfs_io_latency_write,
910 &sysfs_io_latency_stats_read,
911 &sysfs_io_latency_stats_write,
914 &sysfs_reserve_stats,
918 &sysfs_wake_allocator,
922 #endif /* _BCACHEFS_SYSFS_H_ */