X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fsysfs.c;h=0f86a6c0c9d8420fd8a7493d6422b414f453674f;hb=9690f783569ebeb166dfc1745c0ba0f48db523d0;hp=601e270777a6244d58cf6d0796b1e88c8083700b;hpb=1991277c8e723b018c90523949e8242692810911;p=bcachefs-tools-debian diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 601e270..0f86a6c 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bcache sysfs interfaces * @@ -8,23 +9,31 @@ #ifndef NO_BCACHEFS_SYSFS #include "bcachefs.h" -#include "alloc.h" -#include "compress.h" +#include "alloc_background.h" +#include "alloc_foreground.h" #include "sysfs.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_gc.h" #include "buckets.h" +#include "clock.h" +#include "disk_groups.h" +#include "ec.h" #include "inode.h" #include "journal.h" #include "keylist.h" #include "move.h" +#include "movinggc.h" +#include "nocow_locking.h" #include "opts.h" +#include "rebalance.h" +#include "replicas.h" #include "super-io.h" -#include "tier.h" +#include "tests.h" #include #include @@ -33,46 +42,75 @@ #include "util.h" #define SYSFS_OPS(type) \ -struct sysfs_ops type ## _sysfs_ops = { \ +const struct sysfs_ops type ## _sysfs_ops = { \ .show = type ## _show, \ .store = type ## _store \ } #define SHOW(fn) \ +static ssize_t fn ## _to_text(struct printbuf *, \ + struct kobject *, struct attribute *); \ + \ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ char *buf) \ +{ \ + struct printbuf out = PRINTBUF; \ + ssize_t ret = fn ## _to_text(&out, kobj, attr); \ + \ + if (out.pos && out.buf[out.pos - 1] != '\n') \ + prt_newline(&out); \ + \ + if (!ret && out.allocation_failure) \ + ret = -ENOMEM; \ + \ + if (!ret) { \ + ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \ + memcpy(buf, out.buf, ret); \ + } \ + printbuf_exit(&out); \ + return bch2_err_class(ret); \ +} \ + \ +static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\ + struct attribute *attr) #define STORE(fn) \ +static ssize_t fn ## _store_inner(struct kobject *, struct attribute *,\ + const char *, size_t); \ + \ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ const char *buf, size_t size) \ +{ \ + return bch2_err_class(fn##_store_inner(kobj, attr, buf, size)); \ +} \ + \ +static ssize_t fn ## _store_inner(struct kobject *kobj, struct attribute *attr,\ + const char *buf, size_t size) #define __sysfs_attribute(_name, _mode) \ static struct attribute sysfs_##_name = \ { .name = #_name, .mode = _mode } -#define write_attribute(n) __sysfs_attribute(n, S_IWUSR) -#define read_attribute(n) __sysfs_attribute(n, S_IRUGO) -#define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR) +#define write_attribute(n) __sysfs_attribute(n, 0200) +#define read_attribute(n) __sysfs_attribute(n, 0444) +#define rw_attribute(n) __sysfs_attribute(n, 0644) #define sysfs_printf(file, fmt, ...) \ do { \ if (attr == &sysfs_ ## file) \ - return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\ + prt_printf(out, fmt "\n", __VA_ARGS__); \ } while (0) #define sysfs_print(file, var) \ do { \ if (attr == &sysfs_ ## file) \ - return snprint(buf, PAGE_SIZE, var); \ + snprint(out, var); \ } while (0) #define sysfs_hprint(file, val) \ do { \ - if (attr == &sysfs_ ## file) { \ - ssize_t ret = bch2_hprint(buf, val); \ - strcat(buf, "\n"); \ - return ret + 1; \ - } \ + if (attr == &sysfs_ ## file) \ + prt_human_readable_s64(out, val); \ } while (0) #define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var)) @@ -125,75 +163,106 @@ do { \ return strtoi_h(buf, &var) ?: (ssize_t) size; \ } while (0) -write_attribute(trigger_journal_flush); -write_attribute(trigger_btree_coalesce); write_attribute(trigger_gc); +write_attribute(trigger_discards); +write_attribute(trigger_invalidates); write_attribute(prune_cache); +write_attribute(btree_wakeup); rw_attribute(btree_gc_periodic); +rw_attribute(gc_gens_pos); read_attribute(uuid); read_attribute(minor); read_attribute(bucket_size); -read_attribute(block_size); -read_attribute(btree_node_size); read_attribute(first_bucket); read_attribute(nbuckets); -read_attribute(iostats); -read_attribute(read_priority_stats); -read_attribute(write_priority_stats); -read_attribute(fragmentation_stats); -read_attribute(oldest_gen_stats); -read_attribute(reserve_stats); +rw_attribute(durability); +read_attribute(iodone); + +read_attribute(io_latency_read); +read_attribute(io_latency_write); +read_attribute(io_latency_stats_read); +read_attribute(io_latency_stats_write); +read_attribute(congested); + +read_attribute(btree_write_stats); + read_attribute(btree_cache_size); read_attribute(compression_stats); read_attribute(journal_debug); -read_attribute(journal_pins); read_attribute(btree_updates); -read_attribute(dirty_btree_nodes); +read_attribute(btree_cache); +read_attribute(btree_key_cache); +read_attribute(stripes_heap); +read_attribute(open_buckets); +read_attribute(open_buckets_partial); +read_attribute(write_points); +read_attribute(nocow_lock_table); + +#ifdef BCH_WRITE_REF_DEBUG +read_attribute(write_refs); + +const char * const bch2_write_refs[] = { +#define x(n) #n, + BCH_WRITE_REFS() +#undef x + NULL +}; + +static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c) +{ + bch2_printbuf_tabstop_push(out, 24); + + for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) { + prt_str(out, bch2_write_refs[i]); + prt_tab(out); + prt_printf(out, "%li", atomic_long_read(&c->writes[i])); + prt_newline(out); + } +} +#endif read_attribute(internal_uuid); read_attribute(has_data); read_attribute(alloc_debug); -write_attribute(wake_allocator); - -read_attribute(read_realloc_races); -read_attribute(extent_migrate_done); -read_attribute(extent_migrate_raced); -rw_attribute(journal_write_delay_ms); -rw_attribute(journal_reclaim_delay_ms); +#define x(t, n, ...) read_attribute(t); +BCH_PERSISTENT_COUNTERS() +#undef x rw_attribute(discard); -rw_attribute(cache_replacement_policy); -rw_attribute(group); +rw_attribute(label); rw_attribute(copy_gc_enabled); -sysfs_pd_controller_attribute(copy_gc); +read_attribute(copy_gc_wait); rw_attribute(rebalance_enabled); -rw_attribute(rebalance_percent); sysfs_pd_controller_attribute(rebalance); +read_attribute(rebalance_work); +rw_attribute(promote_whole_extents); -rw_attribute(pd_controllers_update_seconds); +read_attribute(new_stripes); -read_attribute(meta_replicas_have); -read_attribute(data_replicas_have); +read_attribute(io_timers_read); +read_attribute(io_timers_write); -#define BCH_DEBUG_PARAM(name, description) \ - rw_attribute(name); +read_attribute(data_jobs); +read_attribute(moving_ctxts); - BCH_DEBUG_PARAMS() -#undef BCH_DEBUG_PARAM +#ifdef CONFIG_BCACHEFS_TESTS +write_attribute(perf_test); +#endif /* CONFIG_BCACHEFS_TESTS */ -#define BCH_TIME_STAT(name, frequency_units, duration_units) \ - sysfs_time_stats_attribute(name, frequency_units, duration_units); +#define x(_name) \ + static struct attribute sysfs_time_stat_##_name = \ + { .name = #_name, .mode = 0444 }; BCH_TIME_STATS() -#undef BCH_TIME_STAT +#undef x static struct attribute sysfs_state_rw = { .name = "state", - .mode = S_IRUGO + .mode = 0444, }; static size_t bch2_btree_cache_size(struct bch_fs *c) @@ -209,94 +278,115 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) return ret; } -static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) -{ - struct bch_fs_usage stats = bch2_fs_usage_read(c); - - return scnprintf(buf, PAGE_SIZE, - "capacity:\t\t%llu\n" - "1 replicas:\n" - "\tmeta:\t\t%llu\n" - "\tdirty:\t\t%llu\n" - "\treserved:\t%llu\n" - "2 replicas:\n" - "\tmeta:\t\t%llu\n" - "\tdirty:\t\t%llu\n" - "\treserved:\t%llu\n" - "3 replicas:\n" - "\tmeta:\t\t%llu\n" - "\tdirty:\t\t%llu\n" - "\treserved:\t%llu\n" - "4 replicas:\n" - "\tmeta:\t\t%llu\n" - "\tdirty:\t\t%llu\n" - "\treserved:\t%llu\n" - "online reserved:\t%llu\n", - c->capacity, - stats.s[0].data[S_META], - stats.s[0].data[S_DIRTY], - stats.s[0].persistent_reserved, - stats.s[1].data[S_META], - stats.s[1].data[S_DIRTY], - stats.s[1].persistent_reserved, - stats.s[2].data[S_META], - stats.s[2].data[S_DIRTY], - stats.s[2].persistent_reserved, - stats.s[3].data[S_META], - stats.s[3].data[S_DIRTY], - stats.s[3].persistent_reserved, - stats.online_reserved); -} - -static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) +static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c) { + struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0, + enum btree_id id; + u64 nr_uncompressed_extents = 0, nr_compressed_extents = 0, + nr_incompressible_extents = 0, + uncompressed_sectors = 0, + incompressible_sectors = 0, compressed_sectors_compressed = 0, compressed_sectors_uncompressed = 0; + int ret; - if (!bch2_fs_running(c)) + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) - if (k.k->type == BCH_EXTENT) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; - - extent_for_each_ptr_crc(e, ptr, crc) { - if (crc.compression_type == BCH_COMPRESSION_NONE) { - nr_uncompressed_extents++; - uncompressed_sectors += e.k->size; - } else { - nr_compressed_extents++; + bch2_trans_init(&trans, c, 0, 0); + + for (id = 0; id < BTREE_ID_NR; id++) { + if (!btree_type_has_ptrs(id)) + continue; + + for_each_btree_key(&trans, iter, id, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + bool compressed = false, uncompressed = false, incompressible = false; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + switch (p.crc.compression_type) { + case BCH_COMPRESSION_TYPE_none: + uncompressed = true; + uncompressed_sectors += k.k->size; + break; + case BCH_COMPRESSION_TYPE_incompressible: + incompressible = true; + incompressible_sectors += k.k->size; + break; + default: compressed_sectors_compressed += - crc.compressed_size; + p.crc.compressed_size; compressed_sectors_uncompressed += - crc.uncompressed_size; + p.crc.uncompressed_size; + compressed = true; + break; } - - /* only looking at the first ptr */ - break; } + + if (incompressible) + nr_incompressible_extents++; + else if (uncompressed) + nr_uncompressed_extents++; + else if (compressed) + nr_compressed_extents++; } - bch2_btree_iter_unlock(&iter); - - return scnprintf(buf, PAGE_SIZE, - "uncompressed data:\n" - " nr extents: %llu\n" - " size (bytes): %llu\n" - "compressed data:\n" - " nr extents: %llu\n" - " compressed size (bytes): %llu\n" - " uncompressed size (bytes): %llu\n", - nr_uncompressed_extents, - uncompressed_sectors << 9, - nr_compressed_extents, - compressed_sectors_compressed << 9, - compressed_sectors_uncompressed << 9); + bch2_trans_iter_exit(&trans, &iter); + } + + bch2_trans_exit(&trans); + + if (ret) + return ret; + + prt_printf(out, "uncompressed:\n"); + prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents); + prt_printf(out, " size: "); + prt_human_readable_u64(out, uncompressed_sectors << 9); + prt_printf(out, "\n"); + + prt_printf(out, "compressed:\n"); + prt_printf(out, " nr extents: %llu\n", nr_compressed_extents); + prt_printf(out, " compressed size: "); + prt_human_readable_u64(out, compressed_sectors_compressed << 9); + prt_printf(out, "\n"); + prt_printf(out, " uncompressed size: "); + prt_human_readable_u64(out, compressed_sectors_uncompressed << 9); + prt_printf(out, "\n"); + + prt_printf(out, "incompressible:\n"); + prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents); + prt_printf(out, " size: "); + prt_human_readable_u64(out, incompressible_sectors << 9); + prt_printf(out, "\n"); + return 0; +} + +static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) +{ + prt_printf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]); + bch2_bpos_to_text(out, c->gc_gens_pos); + prt_printf(out, "\n"); +} + +static void bch2_btree_wakeup_all(struct bch_fs *c) +{ + struct btree_trans *trans; + + mutex_lock(&c->btree_trans_lock); + list_for_each_entry(trans, &c->btree_trans_list, list) { + struct btree_bkey_cached_common *b = READ_ONCE(trans->locking); + + if (b) + six_lock_wakeup_all(&b->lock); + + } + mutex_unlock(&c->btree_trans_lock); } SHOW(bch2_fs) @@ -306,69 +396,88 @@ SHOW(bch2_fs) sysfs_print(minor, c->minor); sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b); - sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms); - sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); - - sysfs_print(block_size, block_bytes(c)); - sysfs_print(btree_node_size, btree_bytes(c)); sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c)); - sysfs_print(read_realloc_races, - atomic_long_read(&c->read_realloc_races)); - sysfs_print(extent_migrate_done, - atomic_long_read(&c->extent_migrate_done)); - sysfs_print(extent_migrate_raced, - atomic_long_read(&c->extent_migrate_raced)); + if (attr == &sysfs_btree_write_stats) + bch2_btree_write_stats_to_text(out, c); sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic); + if (attr == &sysfs_gc_gens_pos) + bch2_gc_gens_pos_to_text(out, c); + sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); - sysfs_print(pd_controllers_update_seconds, - c->pd_controllers_update_seconds); + sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled); + sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */ - sysfs_printf(rebalance_enabled, "%i", c->rebalance_enabled); - sysfs_print(rebalance_percent, c->rebalance_percent); + if (attr == &sysfs_copy_gc_wait) + bch2_copygc_wait_to_text(out, c); - sysfs_pd_controller_show(rebalance, &c->rebalance_pd); /* XXX */ + if (attr == &sysfs_rebalance_work) + bch2_rebalance_work_to_text(out, c); - sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true)); - sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false)); + sysfs_print(promote_whole_extents, c->promote_whole_extents); /* Debugging: */ - if (attr == &sysfs_alloc_debug) - return show_fs_alloc_debug(c, buf); - if (attr == &sysfs_journal_debug) - return bch2_journal_print_debug(&c->journal, buf); - - if (attr == &sysfs_journal_pins) - return bch2_journal_print_pins(&c->journal, buf); + bch2_journal_debug_to_text(out, &c->journal); if (attr == &sysfs_btree_updates) - return bch2_btree_updates_print(c, buf); + bch2_btree_updates_to_text(out, c); + + if (attr == &sysfs_btree_cache) + bch2_btree_cache_to_text(out, &c->btree_cache); + + if (attr == &sysfs_btree_key_cache) + bch2_btree_key_cache_to_text(out, &c->btree_key_cache); + + if (attr == &sysfs_stripes_heap) + bch2_stripes_heap_to_text(out, c); + + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c); + + if (attr == &sysfs_open_buckets_partial) + bch2_open_buckets_partial_to_text(out, c); - if (attr == &sysfs_dirty_btree_nodes) - return bch2_dirty_btree_nodes_print(c, buf); + if (attr == &sysfs_write_points) + bch2_write_points_to_text(out, c); if (attr == &sysfs_compression_stats) - return bch2_compression_stats(c, buf); + bch2_compression_stats_to_text(out, c); + + if (attr == &sysfs_new_stripes) + bch2_new_stripes_to_text(out, c); + + if (attr == &sysfs_io_timers_read) + bch2_io_timers_to_text(out, &c->io_clock[READ]); + + if (attr == &sysfs_io_timers_write) + bch2_io_timers_to_text(out, &c->io_clock[WRITE]); + + if (attr == &sysfs_data_jobs) + bch2_data_jobs_to_text(out, c); + + if (attr == &sysfs_moving_ctxts) + bch2_fs_moving_ctxts_to_text(out, c); -#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name); - BCH_DEBUG_PARAMS() -#undef BCH_DEBUG_PARAM +#ifdef BCH_WRITE_REF_DEBUG + if (attr == &sysfs_write_refs) + bch2_write_refs_to_text(out, c); +#endif + + if (attr == &sysfs_nocow_lock_table) + bch2_nocow_locks_to_text(out, &c->nocow_locks); return 0; } -STORE(__bch2_fs) +STORE(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms); - sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); - if (attr == &sysfs_btree_gc_periodic) { ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic) ?: (ssize_t) size; @@ -378,50 +487,35 @@ STORE(__bch2_fs) } if (attr == &sysfs_copy_gc_enabled) { - struct bch_dev *ca; - unsigned i; ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled) ?: (ssize_t) size; - for_each_member_device(ca, c, i) - if (ca->copygc_thread) - wake_up_process(ca->copygc_thread); + if (c->copygc_thread) + wake_up_process(c->copygc_thread); return ret; } if (attr == &sysfs_rebalance_enabled) { - ssize_t ret = strtoul_safe(buf, c->rebalance_enabled) + ssize_t ret = strtoul_safe(buf, c->rebalance.enabled) ?: (ssize_t) size; rebalance_wakeup(c); return ret; } - sysfs_strtoul(pd_controllers_update_seconds, - c->pd_controllers_update_seconds); + sysfs_pd_controller_store(rebalance, &c->rebalance.pd); - sysfs_strtoul(rebalance_percent, c->rebalance_percent); - sysfs_pd_controller_store(rebalance, &c->rebalance_pd); + sysfs_strtoul(promote_whole_extents, c->promote_whole_extents); /* Debugging: */ -#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name); - BCH_DEBUG_PARAMS() -#undef BCH_DEBUG_PARAM - - if (!bch2_fs_running(c)) + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; /* Debugging: */ - if (attr == &sysfs_trigger_journal_flush) - bch2_journal_meta_async(&c->journal, NULL); - - if (attr == &sysfs_trigger_btree_coalesce) - bch2_coalesce(c); - - if (attr == &sysfs_trigger_gc) - bch2_gc(c); + if (!test_bit(BCH_FS_RW, &c->flags)) + return -EROFS; if (attr == &sysfs_prune_cache) { struct shrink_control sc; @@ -431,79 +525,162 @@ STORE(__bch2_fs) c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc); } - return size; -} - -STORE(bch2_fs) -{ - struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); + if (attr == &sysfs_btree_wakeup) + bch2_btree_wakeup_all(c); + + if (attr == &sysfs_trigger_gc) { + /* + * Full gc is currently incompatible with btree key cache: + */ +#if 0 + down_read(&c->state_lock); + bch2_gc(c, false, false); + up_read(&c->state_lock); +#else + bch2_gc_gens(c); +#endif + } - mutex_lock(&c->state_lock); - size = __bch2_fs_store(kobj, attr, buf, size); - mutex_unlock(&c->state_lock); + if (attr == &sysfs_trigger_discards) + bch2_do_discards(c); + + if (attr == &sysfs_trigger_invalidates) + bch2_do_invalidates(c); + +#ifdef CONFIG_BCACHEFS_TESTS + if (attr == &sysfs_perf_test) { + char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; + char *test = strsep(&p, " \t\n"); + char *nr_str = strsep(&p, " \t\n"); + char *threads_str = strsep(&p, " \t\n"); + unsigned threads; + u64 nr; + int ret = -EINVAL; + + if (threads_str && + !(ret = kstrtouint(threads_str, 10, &threads)) && + !(ret = bch2_strtoull_h(nr_str, &nr))) + ret = bch2_btree_perf_test(c, test, nr, threads); + kfree(tmp); + if (ret) + size = ret; + } +#endif return size; } SYSFS_OPS(bch2_fs); struct attribute *bch2_fs_files[] = { &sysfs_minor, - &sysfs_block_size, - &sysfs_btree_node_size, &sysfs_btree_cache_size, + &sysfs_btree_write_stats, - &sysfs_meta_replicas_have, - &sysfs_data_replicas_have, - - &sysfs_journal_write_delay_ms, - &sysfs_journal_reclaim_delay_ms, - - &sysfs_rebalance_percent, + &sysfs_promote_whole_extents, &sysfs_compression_stats, + +#ifdef CONFIG_BCACHEFS_TESTS + &sysfs_perf_test, +#endif NULL }; +/* counters dir */ + +SHOW(bch2_fs_counters) +{ + struct bch_fs *c = container_of(kobj, struct bch_fs, counters_kobj); + u64 counter = 0; + u64 counter_since_mount = 0; + + printbuf_tabstop_push(out, 32); + + #define x(t, ...) \ + if (attr == &sysfs_##t) { \ + counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ + counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\ + prt_printf(out, "since mount:"); \ + prt_tab(out); \ + prt_human_readable_u64(out, counter_since_mount); \ + prt_newline(out); \ + \ + prt_printf(out, "since filesystem creation:"); \ + prt_tab(out); \ + prt_human_readable_u64(out, counter); \ + prt_newline(out); \ + } + BCH_PERSISTENT_COUNTERS() + #undef x + return 0; +} + +STORE(bch2_fs_counters) { + return 0; +} + +SYSFS_OPS(bch2_fs_counters); + +struct attribute *bch2_fs_counters_files[] = { +#define x(t, ...) \ + &sysfs_##t, + BCH_PERSISTENT_COUNTERS() +#undef x + NULL +}; /* internal dir - just a wrapper */ SHOW(bch2_fs_internal) { struct bch_fs *c = container_of(kobj, struct bch_fs, internal); - return bch2_fs_show(&c->kobj, attr, buf); + + return bch2_fs_to_text(out, &c->kobj, attr); } STORE(bch2_fs_internal) { struct bch_fs *c = container_of(kobj, struct bch_fs, internal); + return bch2_fs_store(&c->kobj, attr, buf, size); } SYSFS_OPS(bch2_fs_internal); struct attribute *bch2_fs_internal_files[] = { - &sysfs_alloc_debug, &sysfs_journal_debug, - &sysfs_journal_pins, &sysfs_btree_updates, - &sysfs_dirty_btree_nodes, - - &sysfs_read_realloc_races, - &sysfs_extent_migrate_done, - &sysfs_extent_migrate_raced, + &sysfs_btree_cache, + &sysfs_btree_key_cache, + &sysfs_new_stripes, + &sysfs_stripes_heap, + &sysfs_open_buckets, + &sysfs_open_buckets_partial, + &sysfs_write_points, +#ifdef BCH_WRITE_REF_DEBUG + &sysfs_write_refs, +#endif + &sysfs_nocow_lock_table, + &sysfs_io_timers_read, + &sysfs_io_timers_write, - &sysfs_trigger_journal_flush, - &sysfs_trigger_btree_coalesce, &sysfs_trigger_gc, + &sysfs_trigger_discards, + &sysfs_trigger_invalidates, &sysfs_prune_cache, + &sysfs_btree_wakeup, + + &sysfs_gc_gens_pos, &sysfs_copy_gc_enabled, + &sysfs_copy_gc_wait, + &sysfs_rebalance_enabled, + &sysfs_rebalance_work, sysfs_pd_controller_files(rebalance), - &sysfs_internal_uuid, -#define BCH_DEBUG_PARAM(name, description) &sysfs_##name, - BCH_DEBUG_PARAMS() -#undef BCH_DEBUG_PARAM + &sysfs_data_jobs, + &sysfs_moving_ctxts, + &sysfs_internal_uuid, NULL }; @@ -511,16 +688,15 @@ struct attribute *bch2_fs_internal_files[] = { SHOW(bch2_fs_opts_dir) { - char *out = buf, *end = buf + PAGE_SIZE; struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt = container_of(attr, struct bch_option, attr); int id = opt - bch2_opt_table; u64 v = bch2_opt_get_by_id(&c->opts, id); - out += bch2_opt_to_text(c, out, end - out, opt, v, OPT_SHOW_FULL_LIST); - out += scnprintf(out, end - out, "\n"); + bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST); + prt_char(out, '\n'); - return out - buf; + return 0; } STORE(bch2_fs_opts_dir) @@ -528,28 +704,33 @@ STORE(bch2_fs_opts_dir) struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt = container_of(attr, struct bch_option, attr); int ret, id = opt - bch2_opt_table; + char *tmp; u64 v; - ret = bch2_opt_parse(c, opt, buf, &v); - if (ret < 0) - return ret; - - if (id == Opt_compression || - id == Opt_background_compression) { - int ret = bch2_check_set_has_compressed_data(c, v); - if (ret) { - mutex_unlock(&c->sb_lock); - return ret; - } + /* + * We don't need to take c->writes for correctness, but it eliminates an + * unsightly error message in the dmesg log when we're RO: + */ + if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))) + return -EROFS; + + tmp = kstrdup(buf, GFP_KERNEL); + if (!tmp) { + ret = -ENOMEM; + goto err; } - if (opt->set_sb != SET_NO_SB_OPT) { - mutex_lock(&c->sb_lock); - opt->set_sb(c->disk_sb, v); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - } + ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL); + kfree(tmp); + if (ret < 0) + goto err; + + ret = bch2_opt_check_may_set(c, id, v); + if (ret < 0) + goto err; + + bch2_opt_set_sb(c, opt, v); bch2_opt_set_by_id(&c->opts, id, v); if ((id == Opt_background_target || @@ -558,7 +739,10 @@ STORE(bch2_fs_opts_dir) rebalance_wakeup(c); } - return size; + ret = size; +err: + bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); + return ret; } SYSFS_OPS(bch2_fs_opts_dir); @@ -572,7 +756,7 @@ int bch2_opts_create_sysfs_files(struct kobject *kobj) for (i = bch2_opt_table; i < bch2_opt_table + bch2_opts_nr; i++) { - if (i->mode == OPT_INTERNAL) + if (!(i->flags & OPT_FS)) continue; ret = sysfs_create_file(kobj, &i->attr); @@ -589,175 +773,134 @@ SHOW(bch2_fs_time_stats) { struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats); -#define BCH_TIME_STAT(name, frequency_units, duration_units) \ - sysfs_print_time_stats(&c->name##_time, name, \ - frequency_units, duration_units); +#define x(name) \ + if (attr == &sysfs_time_stat_##name) \ + bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]); BCH_TIME_STATS() -#undef BCH_TIME_STAT +#undef x return 0; } STORE(bch2_fs_time_stats) { - struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats); - -#define BCH_TIME_STAT(name, frequency_units, duration_units) \ - sysfs_clear_time_stats(&c->name##_time, name); - BCH_TIME_STATS() -#undef BCH_TIME_STAT - return size; } SYSFS_OPS(bch2_fs_time_stats); struct attribute *bch2_fs_time_stats_files[] = { -#define BCH_TIME_STAT(name, frequency_units, duration_units) \ - sysfs_time_stats_attribute_list(name, frequency_units, duration_units) +#define x(name) \ + &sysfs_time_stat_##name, BCH_TIME_STATS() -#undef BCH_TIME_STAT - +#undef x NULL }; -typedef unsigned (bucket_map_fn)(struct bch_dev *, size_t, void *); - -static unsigned bucket_priority_fn(struct bch_dev *ca, size_t b, - void *private) -{ - struct bucket *g = bucket(ca, b); - int rw = (private ? 1 : 0); - - return ca->fs->prio_clock[rw].hand - g->prio[rw]; -} - -static unsigned bucket_sectors_used_fn(struct bch_dev *ca, size_t b, - void *private) +static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) { - struct bucket *g = bucket(ca, b); - return bucket_sectors_used(g->mark); -} - -static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, size_t b, - void *private) -{ - return bucket_gc_gen(ca, b); -} - -static ssize_t show_quantiles(struct bch_dev *ca, char *buf, - bucket_map_fn *fn, void *private) -{ - int cmp(const void *l, const void *r) - { return *((unsigned *) r) - *((unsigned *) l); } - - size_t i, n; - /* Compute 31 quantiles */ - unsigned q[31], *p; - ssize_t ret = 0; - - down_read(&ca->bucket_lock); - n = ca->mi.nbuckets; - - p = vzalloc(n * sizeof(unsigned)); - if (!p) { - up_read(&ca->bucket_lock); - return -ENOMEM; + struct bch_fs *c = ca->fs; + struct bch_dev_usage stats = bch2_dev_usage_read(ca); + unsigned i, nr[BCH_DATA_NR]; + + memset(nr, 0, sizeof(nr)); + + for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++) + nr[c->open_buckets[i].data_type]++; + + printbuf_tabstop_push(out, 8); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 16); + + prt_tab(out); + prt_str(out, "buckets"); + prt_tab_rjust(out); + prt_str(out, "sectors"); + prt_tab_rjust(out); + prt_str(out, "fragmented"); + prt_tab_rjust(out); + prt_newline(out); + + for (i = 0; i < BCH_DATA_NR; i++) { + prt_str(out, bch2_data_types[i]); + prt_tab(out); + prt_u64(out, stats.d[i].buckets); + prt_tab_rjust(out); + prt_u64(out, stats.d[i].sectors); + prt_tab_rjust(out); + prt_u64(out, stats.d[i].fragmented); + prt_tab_rjust(out); + prt_newline(out); } - for (i = ca->mi.first_bucket; i < n; i++) - p[i] = fn(ca, i, private); - - sort(p, n, sizeof(unsigned), cmp, NULL); - up_read(&ca->bucket_lock); - - while (n && - !p[n - 1]) - --n; - - for (i = 0; i < ARRAY_SIZE(q); i++) - q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)]; - - vfree(p); - - for (i = 0; i < ARRAY_SIZE(q); i++) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%u ", q[i]); - buf[ret - 1] = '\n'; - - return ret; -} - -static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf) -{ - enum alloc_reserve i; - ssize_t ret; - - spin_lock(&ca->freelist_lock); - - ret = scnprintf(buf, PAGE_SIZE, - "free_inc:\t%zu\t%zu\n", - fifo_used(&ca->free_inc), - ca->free_inc.size); - - for (i = 0; i < RESERVE_NR; i++) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "free[%u]:\t%zu\t%zu\n", i, - fifo_used(&ca->free[i]), - ca->free[i].size); - - spin_unlock(&ca->freelist_lock); - - return ret; -} + prt_str(out, "ec"); + prt_tab(out); + prt_u64(out, stats.buckets_ec); + prt_tab_rjust(out); + prt_newline(out); + + prt_newline(out); + + prt_printf(out, "reserves:"); + prt_newline(out); + for (i = 0; i < RESERVE_NR; i++) { + prt_str(out, bch2_alloc_reserves[i]); + prt_tab(out); + prt_u64(out, bch2_dev_buckets_reserved(ca, i)); + prt_tab_rjust(out); + prt_newline(out); + } -static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf) -{ - struct bch_fs *c = ca->fs; - struct bch_dev_usage stats = bch2_dev_usage_read(c, ca); - - return scnprintf(buf, PAGE_SIZE, - "free_inc: %zu/%zu\n" - "free[RESERVE_BTREE]: %zu/%zu\n" - "free[RESERVE_MOVINGGC]: %zu/%zu\n" - "free[RESERVE_NONE]: %zu/%zu\n" - "buckets:\n" - " capacity: %llu\n" - " alloc: %llu\n" - " sb: %llu\n" - " journal: %llu\n" - " meta: %llu\n" - " user: %llu\n" - " cached: %llu\n" - " available: %llu\n" - "sectors:\n" - " sb: %llu\n" - " journal: %llu\n" - " meta: %llu\n" - " user: %llu\n" - " cached: %llu\n" - "freelist_wait: %s\n" - "open buckets: %u/%u (reserved %u)\n" - "open_buckets_wait: %s\n", - fifo_used(&ca->free_inc), ca->free_inc.size, - fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size, - fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size, - fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size, - ca->mi.nbuckets - ca->mi.first_bucket, - stats.buckets_alloc, - stats.buckets[BCH_DATA_SB], - stats.buckets[BCH_DATA_JOURNAL], - stats.buckets[BCH_DATA_BTREE], - stats.buckets[BCH_DATA_USER], - stats.buckets[BCH_DATA_CACHED], - __dev_buckets_available(ca, stats), - stats.sectors[BCH_DATA_SB], - stats.sectors[BCH_DATA_JOURNAL], - stats.sectors[BCH_DATA_BTREE], - stats.sectors[BCH_DATA_USER], - stats.sectors[BCH_DATA_CACHED], - c->freelist_wait.list.first ? "waiting" : "empty", - c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE, - c->open_buckets_wait.list.first ? "waiting" : "empty"); + prt_newline(out); + + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 24); + + prt_str(out, "freelist_wait"); + prt_tab(out); + prt_str(out, c->freelist_wait.list.first ? "waiting" : "empty"); + prt_newline(out); + + prt_str(out, "open buckets allocated"); + prt_tab(out); + prt_u64(out, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free); + prt_newline(out); + + prt_str(out, "open buckets this dev"); + prt_tab(out); + prt_u64(out, ca->nr_open_buckets); + prt_newline(out); + + prt_str(out, "open buckets total"); + prt_tab(out); + prt_u64(out, OPEN_BUCKETS_COUNT); + prt_newline(out); + + prt_str(out, "open_buckets_wait"); + prt_tab(out); + prt_str(out, c->open_buckets_wait.list.first ? "waiting" : "empty"); + prt_newline(out); + + prt_str(out, "open_buckets_btree"); + prt_tab(out); + prt_u64(out, nr[BCH_DATA_btree]); + prt_newline(out); + + prt_str(out, "open_buckets_user"); + prt_tab(out); + prt_u64(out, nr[BCH_DATA_user]); + prt_newline(out); + + prt_str(out, "buckets_to_invalidate"); + prt_tab(out); + prt_u64(out, should_invalidate_buckets(ca, stats)); + prt_newline(out); + + prt_str(out, "btree reserve cache"); + prt_tab(out); + prt_u64(out, c->btree_reserve_cache_nr); + prt_newline(out); } static const char * const bch2_rw[] = { @@ -766,102 +909,72 @@ static const char * const bch2_rw[] = { NULL }; -static ssize_t show_dev_iostats(struct bch_dev *ca, char *buf) +static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca) { - char *out = buf, *end = buf + PAGE_SIZE; - int rw, i, cpu; + int rw, i; for (rw = 0; rw < 2; rw++) { - out += scnprintf(out, end - out, "%s:\n", bch2_rw[rw]); - - for (i = 1; i < BCH_DATA_NR; i++) { - u64 n = 0; - - for_each_possible_cpu(cpu) - n += per_cpu_ptr(ca->io_done, cpu)->sectors[rw][i]; + prt_printf(out, "%s:\n", bch2_rw[rw]); - out += scnprintf(out, end - out, "%-12s:%12llu\n", - bch2_data_types[i], n << 9); - } + for (i = 1; i < BCH_DATA_NR; i++) + prt_printf(out, "%-12s:%12llu\n", + bch2_data_types[i], + percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9); } - - return out - buf; } SHOW(bch2_dev) { struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); struct bch_fs *c = ca->fs; - char *out = buf, *end = buf + PAGE_SIZE; sysfs_printf(uuid, "%pU\n", ca->uuid.b); sysfs_print(bucket_size, bucket_bytes(ca)); - sysfs_print(block_size, block_bytes(c)); sysfs_print(first_bucket, ca->mi.first_bucket); sysfs_print(nbuckets, ca->mi.nbuckets); + sysfs_print(durability, ca->mi.durability); sysfs_print(discard, ca->mi.discard); - if (attr == &sysfs_group) { - struct bch_sb_field_disk_groups *groups; - struct bch_disk_group *g; - unsigned len; - - if (!ca->mi.group) - return scnprintf(out, end - out, "none\n"); - - mutex_lock(&c->sb_lock); - groups = bch2_sb_get_disk_groups(c->disk_sb); - - g = &groups->entries[ca->mi.group - 1]; - len = strnlen(g->label, sizeof(g->label)); - memcpy(buf, g->label, len); - mutex_unlock(&c->sb_lock); + if (attr == &sysfs_label) { + if (ca->mi.group) { + mutex_lock(&c->sb_lock); + bch2_disk_path_to_text(out, c->disk_sb.sb, + ca->mi.group - 1); + mutex_unlock(&c->sb_lock); + } - buf[len++] = '\n'; - return len; + prt_char(out, '\n'); } if (attr == &sysfs_has_data) { - out += bch2_scnprint_flag_list(out, end - out, - bch2_data_types, - bch2_dev_has_data(c, ca)); - out += scnprintf(out, end - out, "\n"); - return out - buf; - } - - sysfs_pd_controller_show(copy_gc, &ca->copygc_pd); - - if (attr == &sysfs_cache_replacement_policy) { - out += bch2_scnprint_string_list(out, end - out, - bch2_cache_replacement_policies, - ca->mi.replacement); - out += scnprintf(out, end - out, "\n"); - return out - buf; + prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca)); + prt_char(out, '\n'); } if (attr == &sysfs_state_rw) { - out += bch2_scnprint_string_list(out, end - out, - bch2_dev_state, - ca->mi.state); - out += scnprintf(out, end - out, "\n"); - return out - buf; + prt_string_option(out, bch2_member_states, ca->mi.state); + prt_char(out, '\n'); } - if (attr == &sysfs_iostats) - return show_dev_iostats(ca, buf); - if (attr == &sysfs_read_priority_stats) - return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0); - if (attr == &sysfs_write_priority_stats) - return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1); - if (attr == &sysfs_fragmentation_stats) - return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL); - if (attr == &sysfs_oldest_gen_stats) - return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL); - if (attr == &sysfs_reserve_stats) - return show_reserve_stats(ca, buf); + if (attr == &sysfs_iodone) + dev_iodone_to_text(out, ca); + + sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ])); + sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE])); + + if (attr == &sysfs_io_latency_stats_read) + bch2_time_stats_to_text(out, &ca->io_latency[READ]); + + if (attr == &sysfs_io_latency_stats_write) + bch2_time_stats_to_text(out, &ca->io_latency[WRITE]); + + sysfs_printf(congested, "%u%%", + clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) + * 100 / CONGESTED_MAX); + if (attr == &sysfs_alloc_debug) - return show_dev_alloc_debug(ca, buf); + dev_alloc_debug_to_text(out, ca); return 0; } @@ -872,13 +985,11 @@ STORE(bch2_dev) struct bch_fs *c = ca->fs; struct bch_member *mi; - sysfs_pd_controller_store(copy_gc, &ca->copygc_pd); - if (attr == &sysfs_discard) { bool v = strtoul_or_return(buf); mutex_lock(&c->sb_lock); - mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx]; + mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; if (v != BCH_MEMBER_DISCARD(mi)) { SET_BCH_MEMBER_DISCARD(mi, v); @@ -887,23 +998,20 @@ STORE(bch2_dev) mutex_unlock(&c->sb_lock); } - if (attr == &sysfs_cache_replacement_policy) { - ssize_t v = bch2_read_string_list(buf, bch2_cache_replacement_policies); - - if (v < 0) - return v; + if (attr == &sysfs_durability) { + u64 v = strtoul_or_return(buf); mutex_lock(&c->sb_lock); - mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx]; + mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; - if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) { - SET_BCH_MEMBER_REPLACEMENT(mi, v); + if (v != BCH_MEMBER_DURABILITY(mi)) { + SET_BCH_MEMBER_DURABILITY(mi, v + 1); bch2_write_super(c); } mutex_unlock(&c->sb_lock); } - if (attr == &sysfs_group) { + if (attr == &sysfs_label) { char *tmp; int ret; @@ -917,9 +1025,6 @@ STORE(bch2_dev) return ret; } - if (attr == &sysfs_wake_allocator) - bch2_wake_allocator(ca); - return size; } SYSFS_OPS(bch2_dev); @@ -927,31 +1032,26 @@ SYSFS_OPS(bch2_dev); struct attribute *bch2_dev_files[] = { &sysfs_uuid, &sysfs_bucket_size, - &sysfs_block_size, &sysfs_first_bucket, &sysfs_nbuckets, + &sysfs_durability, /* settings: */ &sysfs_discard, - &sysfs_cache_replacement_policy, &sysfs_state_rw, - &sysfs_group, + &sysfs_label, &sysfs_has_data, - &sysfs_iostats, + &sysfs_iodone, - /* alloc info - other stats: */ - &sysfs_read_priority_stats, - &sysfs_write_priority_stats, - &sysfs_fragmentation_stats, - &sysfs_oldest_gen_stats, - &sysfs_reserve_stats, + &sysfs_io_latency_read, + &sysfs_io_latency_write, + &sysfs_io_latency_stats_read, + &sysfs_io_latency_stats_write, + &sysfs_congested, /* debug: */ &sysfs_alloc_debug, - &sysfs_wake_allocator, - - sysfs_pd_controller_files(copy_gc), NULL };