]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/sysfs.c
Update bcachefs sources to 367a8fad45 bcachefs: Reset journal flush delay to default...
[bcachefs-tools-debian] / libbcachefs / sysfs.c
index a0b9faeb96b25db679604898cd49d7ac6b59e451..2594fec4b82191d1fff5eb31c6cc72c7482b0d14 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * bcache sysfs interfaces
  *
@@ -5,25 +6,36 @@
  * Copyright 2012 Google, Inc.
  */
 
+#ifndef NO_BCACHEFS_SYSFS
+
 #include "bcachefs.h"
-#include "alloc.h"
-#include "compress.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
 #include "sysfs.h"
 #include "btree_cache.h"
+#include "btree_io.h"
 #include "btree_iter.h"
+#include "btree_key_cache.h"
 #include "btree_update.h"
+#include "btree_update_interior.h"
 #include "btree_gc.h"
 #include "buckets.h"
+#include "clock.h"
+#include "disk_groups.h"
+#include "ec.h"
 #include "inode.h"
 #include "journal.h"
 #include "keylist.h"
 #include "move.h"
 #include "opts.h"
+#include "rebalance.h"
+#include "replicas.h"
 #include "super-io.h"
-#include "tier.h"
+#include "tests.h"
 
 #include <linux/blkdev.h>
 #include <linux/sort.h>
+#include <linux/sched/clock.h>
 
 #include "util.h"
 
@@ -34,8 +46,28 @@ struct sysfs_ops type ## _sysfs_ops = {                                      \
 }
 
 #define SHOW(fn)                                                       \
+static ssize_t fn ## _to_text(struct printbuf *,                       \
+                             struct kobject *, struct attribute *);\
+                                                                       \
 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
                           char *buf)                                   \
+{                                                                      \
+       struct printbuf out = PRINTBUF;                                 \
+       ssize_t ret = fn ## _to_text(&out, kobj, attr);                 \
+                                                                       \
+       if (!ret && out.allocation_failure)                             \
+               ret = -ENOMEM;                                          \
+                                                                       \
+       if (!ret) {                                                     \
+               ret = min_t(size_t, out.pos, PAGE_SIZE - 1);            \
+               memcpy(buf, out.buf, ret);                              \
+       }                                                               \
+       printbuf_exit(&out);                                            \
+       return ret;                                                     \
+}                                                                      \
+                                                                       \
+static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
+                             struct attribute *attr)
 
 #define STORE(fn)                                                      \
 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
@@ -52,22 +84,19 @@ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
 #define sysfs_printf(file, fmt, ...)                                   \
 do {                                                                   \
        if (attr == &sysfs_ ## file)                                    \
-               return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \
+               pr_buf(out, fmt "\n", __VA_ARGS__);                     \
 } while (0)
 
 #define sysfs_print(file, var)                                         \
 do {                                                                   \
        if (attr == &sysfs_ ## file)                                    \
-               return snprint(buf, PAGE_SIZE, var);                    \
+               snprint(out, var);                                      \
 } while (0)
 
 #define sysfs_hprint(file, val)                                                \
 do {                                                                   \
-       if (attr == &sysfs_ ## file) {                                  \
-               ssize_t ret = bch2_hprint(buf, val);                    \
-               strcat(buf, "\n");                                      \
-               return ret + 1;                                         \
-       }                                                               \
+       if (attr == &sysfs_ ## file)                                    \
+               bch2_hprint(out, val);                                  \
 } while (0)
 
 #define var_printf(_var, fmt)  sysfs_printf(_var, fmt, var(_var))
@@ -120,282 +149,214 @@ do {                                                                    \
                return strtoi_h(buf, &var) ?: (ssize_t) size;           \
 } while (0)
 
-write_attribute(trigger_btree_coalesce);
 write_attribute(trigger_gc);
 write_attribute(prune_cache);
+rw_attribute(btree_gc_periodic);
+rw_attribute(gc_gens_pos);
 
 read_attribute(uuid);
 read_attribute(minor);
 read_attribute(bucket_size);
-read_attribute(bucket_size_bytes);
-read_attribute(block_size);
-read_attribute(block_size_bytes);
-read_attribute(btree_node_size);
-read_attribute(btree_node_size_bytes);
 read_attribute(first_bucket);
 read_attribute(nbuckets);
-read_attribute(tree_depth);
-read_attribute(root_usage_percent);
-read_attribute(read_priority_stats);
-read_attribute(write_priority_stats);
-read_attribute(fragmentation_stats);
-read_attribute(oldest_gen_stats);
-read_attribute(reserve_stats);
+read_attribute(durability);
+read_attribute(iodone);
+
+read_attribute(io_latency_read);
+read_attribute(io_latency_write);
+read_attribute(io_latency_stats_read);
+read_attribute(io_latency_stats_write);
+read_attribute(congested);
+
+read_attribute(btree_avg_write_size);
+
 read_attribute(btree_cache_size);
-read_attribute(cache_available_percent);
 read_attribute(compression_stats);
-read_attribute(written);
-read_attribute(btree_written);
-read_attribute(metadata_written);
 read_attribute(journal_debug);
-write_attribute(journal_flush);
+read_attribute(btree_updates);
+read_attribute(btree_cache);
+read_attribute(btree_key_cache);
+read_attribute(btree_transactions);
+read_attribute(stripes_heap);
+read_attribute(open_buckets);
+
 read_attribute(internal_uuid);
 
-read_attribute(btree_gc_running);
-
-read_attribute(btree_nodes);
-read_attribute(btree_used_percent);
-read_attribute(average_key_size);
-read_attribute(available_buckets);
-read_attribute(free_buckets);
-read_attribute(dirty_data);
-read_attribute(dirty_bytes);
-read_attribute(dirty_buckets);
-read_attribute(cached_data);
-read_attribute(cached_bytes);
-read_attribute(cached_buckets);
-read_attribute(meta_buckets);
-read_attribute(alloc_buckets);
 read_attribute(has_data);
-read_attribute(has_metadata);
-read_attribute(bset_tree_stats);
 read_attribute(alloc_debug);
 
-read_attribute(cache_read_races);
-
-rw_attribute(journal_write_delay_ms);
-rw_attribute(journal_reclaim_delay_ms);
-read_attribute(journal_entry_size_max);
+read_attribute(read_realloc_races);
+read_attribute(extent_migrate_done);
+read_attribute(extent_migrate_raced);
+read_attribute(bucket_alloc_fail);
 
 rw_attribute(discard);
-rw_attribute(cache_replacement_policy);
+rw_attribute(label);
 
-rw_attribute(foreground_write_ratelimit_enabled);
 rw_attribute(copy_gc_enabled);
-sysfs_pd_controller_attribute(copy_gc);
-
-rw_attribute(tier);
-rw_attribute(tiering_enabled);
-rw_attribute(tiering_percent);
-sysfs_pd_controller_attribute(tiering);
-
-sysfs_pd_controller_attribute(foreground_write);
-
-rw_attribute(pd_controllers_update_seconds);
-
-rw_attribute(foreground_target_percent);
+read_attribute(copy_gc_wait);
 
-read_attribute(meta_replicas_have);
-read_attribute(data_replicas_have);
+rw_attribute(rebalance_enabled);
+sysfs_pd_controller_attribute(rebalance);
+read_attribute(rebalance_work);
+rw_attribute(promote_whole_extents);
 
-#define BCH_DEBUG_PARAM(name, description)                             \
-       rw_attribute(name);
+read_attribute(new_stripes);
 
-       BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
+read_attribute(io_timers_read);
+read_attribute(io_timers_write);
 
-#define BCH_OPT(_name, _mode, ...)                                     \
-       static struct attribute sysfs_opt_##_name = {                   \
-               .name = #_name, .mode = _mode,                          \
-       };
+read_attribute(data_jobs);
 
-       BCH_VISIBLE_OPTS()
-#undef BCH_OPT
+#ifdef CONFIG_BCACHEFS_TESTS
+write_attribute(perf_test);
+#endif /* CONFIG_BCACHEFS_TESTS */
 
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_time_stats_attribute(name, frequency_units, duration_units);
+#define x(_name)                                               \
+       static struct attribute sysfs_time_stat_##_name =               \
+               { .name = #_name, .mode = S_IRUGO };
        BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
 
 static struct attribute sysfs_state_rw = {
        .name = "state",
        .mode = S_IRUGO
 };
 
-static int bch2_bset_print_stats(struct bch_fs *c, char *buf)
-{
-       struct bset_stats stats;
-       size_t nodes = 0;
-       struct btree *b;
-       struct bucket_table *tbl;
-       struct rhash_head *pos;
-       unsigned iter;
-
-       memset(&stats, 0, sizeof(stats));
-
-       rcu_read_lock();
-       for_each_cached_btree(b, c, tbl, iter, pos) {
-               bch2_btree_keys_stats(b, &stats);
-               nodes++;
-       }
-       rcu_read_unlock();
-
-       return snprintf(buf, PAGE_SIZE,
-                       "btree nodes:           %zu\n"
-                       "written sets:          %zu\n"
-                       "written key bytes:     %zu\n"
-                       "unwritten sets:                %zu\n"
-                       "unwritten key bytes:   %zu\n"
-                       "no table sets:         %zu\n"
-                       "no table key bytes:    %zu\n"
-                       "floats:                        %zu\n"
-                       "failed unpacked:       %zu\n"
-                       "failed prev:           %zu\n"
-                       "failed overflow:       %zu\n",
-                       nodes,
-                       stats.sets[BSET_RO_AUX_TREE].nr,
-                       stats.sets[BSET_RO_AUX_TREE].bytes,
-                       stats.sets[BSET_RW_AUX_TREE].nr,
-                       stats.sets[BSET_RW_AUX_TREE].bytes,
-                       stats.sets[BSET_NO_AUX_TREE].nr,
-                       stats.sets[BSET_NO_AUX_TREE].bytes,
-                       stats.floats,
-                       stats.failed_unpacked,
-                       stats.failed_prev,
-                       stats.failed_overflow);
-}
-
-static unsigned bch2_root_usage(struct bch_fs *c)
-{
-       unsigned bytes = 0;
-       struct bkey_packed *k;
-       struct btree *b;
-       struct btree_node_iter iter;
-
-       goto lock_root;
-
-       do {
-               six_unlock_read(&b->lock);
-lock_root:
-               b = c->btree_roots[BTREE_ID_EXTENTS].b;
-               six_lock_read(&b->lock);
-       } while (b != c->btree_roots[BTREE_ID_EXTENTS].b);
-
-       for_each_btree_node_key(b, k, &iter, btree_node_is_extents(b))
-               bytes += bkey_bytes(k);
-
-       six_unlock_read(&b->lock);
-
-       return (bytes * 100) / btree_bytes(c);
-}
-
 static size_t bch2_btree_cache_size(struct bch_fs *c)
 {
        size_t ret = 0;
        struct btree *b;
 
-       mutex_lock(&c->btree_cache_lock);
-       list_for_each_entry(b, &c->btree_cache, list)
+       mutex_lock(&c->btree_cache.lock);
+       list_for_each_entry(b, &c->btree_cache.live, list)
                ret += btree_bytes(c);
 
-       mutex_unlock(&c->btree_cache_lock);
+       mutex_unlock(&c->btree_cache.lock);
        return ret;
 }
 
-static unsigned bch2_fs_available_percent(struct bch_fs *c)
+static size_t bch2_btree_avg_write_size(struct bch_fs *c)
 {
-       return div64_u64((u64) sectors_available(c) * 100,
-                        c->capacity ?: 1);
-}
+       u64 nr = atomic64_read(&c->btree_writes_nr);
+       u64 sectors = atomic64_read(&c->btree_writes_sectors);
 
-#if 0
-static unsigned bch2_btree_used(struct bch_fs *c)
-{
-       return div64_u64(c->gc_stats.key_bytes * 100,
-                        (c->gc_stats.nodes ?: 1) * btree_bytes(c));
+       return nr ? div64_u64(sectors, nr) : 0;
 }
 
-static unsigned bch2_average_key_size(struct bch_fs *c)
+static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
 {
-       return c->gc_stats.nkeys
-               ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
-               : 0;
-}
-#endif
+       long ret = 0;
+       struct bch_move_stats *stats;
+
+       mutex_lock(&c->data_progress_lock);
+       list_for_each_entry(stats, &c->data_progress_list, list) {
+               pr_buf(out, "%s: data type %s btree_id %s position: ",
+                      stats->name,
+                      bch2_data_types[stats->data_type],
+                      bch2_btree_ids[stats->btree_id]);
+               bch2_bpos_to_text(out, stats->pos);
+               pr_buf(out, "%s", "\n");
+       }
 
-static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
-{
-       struct bch_fs_usage stats = bch2_fs_usage_read(c);
-
-       return scnprintf(buf, PAGE_SIZE,
-                        "capacity:\t\t%llu\n"
-                        "compressed:\n"
-                        "\tmeta:\t\t%llu\n"
-                        "\tdirty:\t\t%llu\n"
-                        "\tcached:\t\t%llu\n"
-                        "uncompressed:\n"
-                        "\tmeta:\t\t%llu\n"
-                        "\tdirty:\t\t%llu\n"
-                        "\tcached:\t\t%llu\n"
-                        "persistent reserved sectors:\t%llu\n"
-                        "online reserved sectors:\t%llu\n",
-                        c->capacity,
-                        stats.s[S_COMPRESSED][S_META],
-                        stats.s[S_COMPRESSED][S_DIRTY],
-                        stats.s[S_COMPRESSED][S_CACHED],
-                        stats.s[S_UNCOMPRESSED][S_META],
-                        stats.s[S_UNCOMPRESSED][S_DIRTY],
-                        stats.s[S_UNCOMPRESSED][S_CACHED],
-                        stats.persistent_reserved,
-                        stats.online_reserved);
+       mutex_unlock(&c->data_progress_lock);
+       return ret;
 }
 
-static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
 {
+       struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
+       enum btree_id id;
+       u64 nr_uncompressed_extents = 0,
            nr_compressed_extents = 0,
+           nr_incompressible_extents = 0,
+           uncompressed_sectors = 0,
+           incompressible_sectors = 0,
            compressed_sectors_compressed = 0,
            compressed_sectors_uncompressed = 0;
+       int ret;
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
+               return -EPERM;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
-               if (k.k->type == BCH_EXTENT) {
-                       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-                       const struct bch_extent_ptr *ptr;
-                       const union bch_extent_crc *crc;
-
-                       extent_for_each_ptr_crc(e, ptr, crc) {
-                               if (crc_compression_type(crc) == BCH_COMPRESSION_NONE) {
-                                       nr_uncompressed_extents++;
-                                       uncompressed_sectors += e.k->size;
-                               } else {
-                                       nr_compressed_extents++;
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for (id = 0; id < BTREE_ID_NR; id++) {
+               if (!((1U << id) & BTREE_ID_HAS_PTRS))
+                       continue;
+
+               for_each_btree_key(&trans, iter, id, POS_MIN,
+                                  BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+                       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+                       const union bch_extent_entry *entry;
+                       struct extent_ptr_decoded p;
+                       bool compressed = false, uncompressed = false, incompressible = false;
+
+                       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+                               switch (p.crc.compression_type) {
+                               case BCH_COMPRESSION_TYPE_none:
+                                       uncompressed = true;
+                                       uncompressed_sectors += k.k->size;
+                                       break;
+                               case BCH_COMPRESSION_TYPE_incompressible:
+                                       incompressible = true;
+                                       incompressible_sectors += k.k->size;
+                                       break;
+                               default:
                                        compressed_sectors_compressed +=
-                                               crc_compressed_size(e.k, crc);
+                                               p.crc.compressed_size;
                                        compressed_sectors_uncompressed +=
-                                               crc_uncompressed_size(e.k, crc);
+                                               p.crc.uncompressed_size;
+                                       compressed = true;
+                                       break;
                                }
-
-                               /* only looking at the first ptr */
-                               break;
                        }
+
+                       if (incompressible)
+                               nr_incompressible_extents++;
+                       else if (uncompressed)
+                               nr_uncompressed_extents++;
+                       else if (compressed)
+                               nr_compressed_extents++;
                }
-       bch2_btree_iter_unlock(&iter);
-
-       return snprintf(buf, PAGE_SIZE,
-                       "uncompressed data:\n"
-                       "       nr extents:                     %llu\n"
-                       "       size (bytes):                   %llu\n"
-                       "compressed data:\n"
-                       "       nr extents:                     %llu\n"
-                       "       compressed size (bytes):        %llu\n"
-                       "       uncompressed size (bytes):      %llu\n",
-                       nr_uncompressed_extents,
-                       uncompressed_sectors << 9,
-                       nr_compressed_extents,
-                       compressed_sectors_compressed << 9,
-                       compressed_sectors_uncompressed << 9);
+               bch2_trans_iter_exit(&trans, &iter);
+       }
+
+       bch2_trans_exit(&trans);
+
+       if (ret)
+               return ret;
+
+       pr_buf(out, "uncompressed:\n");
+       pr_buf(out, "   nr extents:             %llu\n", nr_uncompressed_extents);
+       pr_buf(out, "   size:                   ");
+       bch2_hprint(out, uncompressed_sectors << 9);
+       pr_buf(out, "\n");
+
+       pr_buf(out, "compressed:\n");
+       pr_buf(out, "   nr extents:             %llu\n", nr_compressed_extents);
+       pr_buf(out, "   compressed size:        ");
+       bch2_hprint(out, compressed_sectors_compressed << 9);
+       pr_buf(out, "\n");
+       pr_buf(out, "   uncompressed size:      ");
+       bch2_hprint(out, compressed_sectors_uncompressed << 9);
+       pr_buf(out, "\n");
+
+       pr_buf(out, "incompressible:\n");
+       pr_buf(out, "   nr extents:             %llu\n", nr_incompressible_extents);
+       pr_buf(out, "   size:                   ");
+       bch2_hprint(out, incompressible_sectors << 9);
+       pr_buf(out, "\n");
+       return 0;
+}
+
+static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       pr_buf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
+       bch2_bpos_to_text(out, c->gc_gens_pos);
+       pr_buf(out, "\n");
 }
 
 SHOW(bch2_fs)
@@ -403,185 +364,179 @@ SHOW(bch2_fs)
        struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
 
        sysfs_print(minor,                      c->minor);
-
-       sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
-       sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
-       sysfs_hprint(journal_entry_size_max,    c->journal.entry_size_max);
-
-       sysfs_hprint(block_size,                block_bytes(c));
-       sysfs_print(block_size_bytes,           block_bytes(c));
-       sysfs_hprint(btree_node_size,           c->sb.btree_node_size << 9);
-       sysfs_print(btree_node_size_bytes,      c->sb.btree_node_size << 9);
+       sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
 
        sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
-       sysfs_print(cache_available_percent,    bch2_fs_available_percent(c));
+       sysfs_hprint(btree_avg_write_size,      bch2_btree_avg_write_size(c));
 
-       sysfs_print(btree_gc_running,           c->gc_pos.phase != GC_PHASE_DONE);
+       sysfs_print(read_realloc_races,
+                   atomic_long_read(&c->read_realloc_races));
+       sysfs_print(extent_migrate_done,
+                   atomic_long_read(&c->extent_migrate_done));
+       sysfs_print(extent_migrate_raced,
+                   atomic_long_read(&c->extent_migrate_raced));
+       sysfs_print(bucket_alloc_fail,
+                   atomic_long_read(&c->bucket_alloc_fail));
 
-#if 0
-       /* XXX: reimplement */
-       sysfs_print(btree_used_percent, bch2_btree_used(c));
-       sysfs_print(btree_nodes,        c->gc_stats.nodes);
-       sysfs_hprint(average_key_size,  bch2_average_key_size(c));
-#endif
+       sysfs_printf(btree_gc_periodic, "%u",   (int) c->btree_gc_periodic);
 
-       sysfs_print(cache_read_races,
-                   atomic_long_read(&c->cache_read_races));
+       if (attr == &sysfs_gc_gens_pos)
+               bch2_gc_gens_pos_to_text(out, c);
 
-       sysfs_printf(foreground_write_ratelimit_enabled, "%i",
-                    c->foreground_write_ratelimit_enabled);
        sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-       sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd);
-
-       sysfs_print(pd_controllers_update_seconds,
-                   c->pd_controllers_update_seconds);
-       sysfs_print(foreground_target_percent, c->foreground_target_percent);
 
-       sysfs_printf(tiering_enabled,           "%i", c->tiering_enabled);
-       sysfs_print(tiering_percent,            c->tiering_percent);
+       sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
+       sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
+       sysfs_hprint(copy_gc_wait,
+                    max(0LL, c->copygc_wait -
+                        atomic64_read(&c->io_clock[WRITE].now)) << 9);
 
-       sysfs_pd_controller_show(tiering,       &c->tiers[1].pd); /* XXX */
+       if (attr == &sysfs_rebalance_work)
+               bch2_rebalance_work_to_text(out, c);
 
-       sysfs_printf(meta_replicas_have, "%u",  c->sb.meta_replicas_have);
-       sysfs_printf(data_replicas_have, "%u",  c->sb.data_replicas_have);
+       sysfs_print(promote_whole_extents,      c->promote_whole_extents);
 
        /* Debugging: */
 
        if (attr == &sysfs_journal_debug)
-               return bch2_journal_print_debug(&c->journal, buf);
+               bch2_journal_debug_to_text(out, &c->journal);
 
-#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
-       BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
+       if (attr == &sysfs_btree_updates)
+               bch2_btree_updates_to_text(out, c);
 
-       if (!bch2_fs_running(c))
-               return -EPERM;
+       if (attr == &sysfs_btree_cache)
+               bch2_btree_cache_to_text(out, c);
 
-       if (attr == &sysfs_bset_tree_stats)
-               return bch2_bset_print_stats(c, buf);
-       if (attr == &sysfs_alloc_debug)
-               return show_fs_alloc_debug(c, buf);
+       if (attr == &sysfs_btree_key_cache)
+               bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
+
+       if (attr == &sysfs_btree_transactions)
+               bch2_btree_trans_to_text(out, c);
 
-       sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level);
-       sysfs_print(root_usage_percent,         bch2_root_usage(c));
+       if (attr == &sysfs_stripes_heap)
+               bch2_stripes_heap_to_text(out, c);
+
+       if (attr == &sysfs_open_buckets)
+               bch2_open_buckets_to_text(out, c);
 
        if (attr == &sysfs_compression_stats)
-               return bch2_compression_stats(c, buf);
+               bch2_compression_stats_to_text(out, c);
+
+       if (attr == &sysfs_new_stripes)
+               bch2_new_stripes_to_text(out, c);
+
+       if (attr == &sysfs_io_timers_read)
+               bch2_io_timers_to_text(out, &c->io_clock[READ]);
 
-       sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
+       if (attr == &sysfs_io_timers_write)
+               bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
+
+       if (attr == &sysfs_data_jobs)
+               data_progress_to_text(out, c);
 
        return 0;
 }
 
-STORE(__bch2_fs)
+STORE(bch2_fs)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
 
-       sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
-       sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
+       if (attr == &sysfs_btree_gc_periodic) {
+               ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
+                       ?: (ssize_t) size;
 
-       sysfs_strtoul(foreground_write_ratelimit_enabled,
-                     c->foreground_write_ratelimit_enabled);
+               wake_up_process(c->gc_thread);
+               return ret;
+       }
 
        if (attr == &sysfs_copy_gc_enabled) {
-               struct bch_dev *ca;
-               unsigned i;
                ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
                        ?: (ssize_t) size;
 
-               for_each_member_device(ca, c, i)
-                       if (ca->moving_gc_read)
-                               wake_up_process(ca->moving_gc_read);
+               if (c->copygc_thread)
+                       wake_up_process(c->copygc_thread);
                return ret;
        }
 
-       if (attr == &sysfs_tiering_enabled) {
-               ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
+       if (attr == &sysfs_rebalance_enabled) {
+               ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
                        ?: (ssize_t) size;
 
-               bch2_tiering_start(c); /* issue wakeups */
+               rebalance_wakeup(c);
                return ret;
        }
 
-       sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
-
-       sysfs_strtoul(pd_controllers_update_seconds,
-                     c->pd_controllers_update_seconds);
-       sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
+       sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
 
-       sysfs_strtoul(tiering_percent,          c->tiering_percent);
-       sysfs_pd_controller_store(tiering,      &c->tiers[1].pd); /* XXX */
+       sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
 
        /* Debugging: */
 
-#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
-       BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
-
-       if (!bch2_fs_running(c))
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
                return -EPERM;
 
-       if (attr == &sysfs_journal_flush) {
-               bch2_journal_meta_async(&c->journal, NULL);
-
-               return size;
-       }
-
-       if (attr == &sysfs_trigger_btree_coalesce)
-               bch2_coalesce(c);
-
        /* Debugging: */
 
-       if (attr == &sysfs_trigger_gc)
-               bch2_gc(c);
+       if (!test_bit(BCH_FS_RW, &c->flags))
+               return -EROFS;
 
        if (attr == &sysfs_prune_cache) {
                struct shrink_control sc;
 
                sc.gfp_mask = GFP_KERNEL;
                sc.nr_to_scan = strtoul_or_return(buf);
-               c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
+               c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
        }
 
-       return size;
-}
-
-STORE(bch2_fs)
-{
-       struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-
-       mutex_lock(&c->state_lock);
-       size = __bch2_fs_store(kobj, attr, buf, size);
-       mutex_unlock(&c->state_lock);
+       if (attr == &sysfs_trigger_gc) {
+               /*
+                * Full gc is currently incompatible with btree key cache:
+                */
+#if 0
+               down_read(&c->state_lock);
+               bch2_gc(c, false, false);
+               up_read(&c->state_lock);
+#else
+               bch2_gc_gens(c);
+#endif
+       }
 
+#ifdef CONFIG_BCACHEFS_TESTS
+       if (attr == &sysfs_perf_test) {
+               char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
+               char *test              = strsep(&p, " \t\n");
+               char *nr_str            = strsep(&p, " \t\n");
+               char *threads_str       = strsep(&p, " \t\n");
+               unsigned threads;
+               u64 nr;
+               int ret = -EINVAL;
+
+               if (threads_str &&
+                   !(ret = kstrtouint(threads_str, 10, &threads)) &&
+                   !(ret = bch2_strtoull_h(nr_str, &nr)))
+                       ret = bch2_btree_perf_test(c, test, nr, threads);
+               kfree(tmp);
+
+               if (ret)
+                       size = ret;
+       }
+#endif
        return size;
 }
 SYSFS_OPS(bch2_fs);
 
 struct attribute *bch2_fs_files[] = {
-       &sysfs_journal_write_delay_ms,
-       &sysfs_journal_reclaim_delay_ms,
-       &sysfs_journal_entry_size_max,
-
-       &sysfs_block_size,
-       &sysfs_block_size_bytes,
-       &sysfs_btree_node_size,
-       &sysfs_btree_node_size_bytes,
-       &sysfs_tree_depth,
-       &sysfs_root_usage_percent,
+       &sysfs_minor,
        &sysfs_btree_cache_size,
-       &sysfs_cache_available_percent,
-       &sysfs_compression_stats,
-
-       &sysfs_average_key_size,
+       &sysfs_btree_avg_write_size,
 
-       &sysfs_meta_replicas_have,
-       &sysfs_data_replicas_have,
+       &sysfs_promote_whole_extents,
 
-       &sysfs_foreground_target_percent,
-       &sysfs_tiering_percent,
+       &sysfs_compression_stats,
 
-       &sysfs_journal_flush,
+#ifdef CONFIG_BCACHEFS_TESTS
+       &sysfs_perf_test,
+#endif
        NULL
 };
 
@@ -590,7 +545,7 @@ struct attribute *bch2_fs_files[] = {
 SHOW(bch2_fs_internal)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-       return bch2_fs_show(&c->kobj, attr, buf);
+       return bch2_fs_to_text(out, &c->kobj, attr);
 }
 
 STORE(bch2_fs_internal)
@@ -602,31 +557,36 @@ SYSFS_OPS(bch2_fs_internal);
 
 struct attribute *bch2_fs_internal_files[] = {
        &sysfs_journal_debug,
+       &sysfs_btree_updates,
+       &sysfs_btree_cache,
+       &sysfs_btree_key_cache,
+       &sysfs_btree_transactions,
+       &sysfs_new_stripes,
+       &sysfs_stripes_heap,
+       &sysfs_open_buckets,
+       &sysfs_io_timers_read,
+       &sysfs_io_timers_write,
 
-       &sysfs_alloc_debug,
-
-       &sysfs_btree_gc_running,
+       &sysfs_trigger_gc,
+       &sysfs_prune_cache,
 
-       &sysfs_btree_nodes,
-       &sysfs_btree_used_percent,
+       &sysfs_read_realloc_races,
+       &sysfs_extent_migrate_done,
+       &sysfs_extent_migrate_raced,
+       &sysfs_bucket_alloc_fail,
 
-       &sysfs_bset_tree_stats,
-       &sysfs_cache_read_races,
+       &sysfs_gc_gens_pos,
 
-       &sysfs_trigger_btree_coalesce,
-       &sysfs_trigger_gc,
-       &sysfs_prune_cache,
-       &sysfs_foreground_write_ratelimit_enabled,
        &sysfs_copy_gc_enabled,
-       &sysfs_tiering_enabled,
-       sysfs_pd_controller_files(tiering),
-       sysfs_pd_controller_files(foreground_write),
-       &sysfs_internal_uuid,
+       &sysfs_copy_gc_wait,
 
-#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
-       BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
+       &sysfs_rebalance_enabled,
+       &sysfs_rebalance_work,
+       sysfs_pd_controller_files(rebalance),
 
+       &sysfs_data_jobs,
+
+       &sysfs_internal_uuid,
        NULL
 };
 
@@ -635,55 +595,81 @@ struct attribute *bch2_fs_internal_files[] = {
 SHOW(bch2_fs_opts_dir)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+       const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+       int id = opt - bch2_opt_table;
+       u64 v = bch2_opt_get_by_id(&c->opts, id);
+
+       bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST);
+       pr_char(out, '\n');
 
-       return bch2_opt_show(&c->opts, attr->name, buf, PAGE_SIZE);
+       return 0;
 }
 
 STORE(bch2_fs_opts_dir)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-       const struct bch_option *opt;
-       enum bch_opt_id id;
+       const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+       int ret = size, id = opt - bch2_opt_table;
+       char *tmp;
        u64 v;
 
-       id = bch2_parse_sysfs_opt(attr->name, buf, &v);
-       if (id < 0)
-               return id;
-
-       opt = &bch2_opt_table[id];
-
-       mutex_lock(&c->sb_lock);
-
-       if (id == Opt_compression) {
-               int ret = bch2_check_set_has_compressed_data(c, v);
-               if (ret) {
-                       mutex_unlock(&c->sb_lock);
-                       return ret;
-               }
+       /*
+        * We don't need to take c->writes for correctness, but it eliminates an
+        * unsightly error message in the dmesg log when we're RO:
+        */
+       if (unlikely(!percpu_ref_tryget(&c->writes)))
+               return -EROFS;
+
+       tmp = kstrdup(buf, GFP_KERNEL);
+       if (!tmp) {
+               ret = -ENOMEM;
+               goto err;
        }
 
-       if (opt->set_sb != SET_NO_SB_OPT) {
-               opt->set_sb(c->disk_sb, v);
-               bch2_write_super(c);
-       }
+       ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
+       kfree(tmp);
 
-       bch2_opt_set(&c->opts, id, v);
+       if (ret < 0)
+               goto err;
 
-       mutex_unlock(&c->sb_lock);
+       ret = bch2_opt_check_may_set(c, id, v);
+       if (ret < 0)
+               goto err;
 
-       return size;
+       bch2_opt_set_sb(c, opt, v);
+       bch2_opt_set_by_id(&c->opts, id, v);
+
+       if ((id == Opt_background_target ||
+            id == Opt_background_compression) && v) {
+               bch2_rebalance_add_work(c, S64_MAX);
+               rebalance_wakeup(c);
+       }
+err:
+       percpu_ref_put(&c->writes);
+       return ret;
 }
 SYSFS_OPS(bch2_fs_opts_dir);
 
-struct attribute *bch2_fs_opts_dir_files[] = {
-#define BCH_OPT(_name, ...)                                            \
-       &sysfs_opt_##_name,
+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
+
+int bch2_opts_create_sysfs_files(struct kobject *kobj)
+{
+       const struct bch_option *i;
+       int ret;
 
-       BCH_VISIBLE_OPTS()
-#undef BCH_OPT
+       for (i = bch2_opt_table;
+            i < bch2_opt_table + bch2_opts_nr;
+            i++) {
+               if (!(i->flags & OPT_FS))
+                       continue;
 
-       NULL
-};
+               ret = sysfs_create_file(kobj, &i->attr);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
 
 /* time stats */
 
@@ -691,224 +677,148 @@ SHOW(bch2_fs_time_stats)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
 
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_print_time_stats(&c->name##_time, name,                   \
-                              frequency_units, duration_units);
+#define x(name)                                                                \
+       if (attr == &sysfs_time_stat_##name)                            \
+               bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]);
        BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
 
        return 0;
 }
 
 STORE(bch2_fs_time_stats)
 {
-       struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_clear_time_stats(&c->name##_time, name);
-       BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
        return size;
 }
 SYSFS_OPS(bch2_fs_time_stats);
 
 struct attribute *bch2_fs_time_stats_files[] = {
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
+#define x(name)                                                \
+       &sysfs_time_stat_##name,
        BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
+#undef x
        NULL
 };
 
-typedef unsigned (bucket_map_fn)(struct bch_dev *, struct bucket *, void *);
-
-static unsigned bucket_priority_fn(struct bch_dev *ca, struct bucket *g,
-                                  void *private)
+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
 {
-       int rw = (private ? 1 : 0);
-
-       return ca->fs->prio_clock[rw].hand - g->prio[rw];
-}
-
-static unsigned bucket_sectors_used_fn(struct bch_dev *ca, struct bucket *g,
-                                      void *private)
-{
-       return bucket_sectors_used(g);
+       struct bch_fs *c = ca->fs;
+       struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+       unsigned i, nr[BCH_DATA_NR];
+
+       memset(nr, 0, sizeof(nr));
+
+       for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
+               nr[c->open_buckets[i].data_type]++;
+
+       pr_buf(out,
+              "\t\t buckets\t sectors      fragmented\n"
+              "capacity%16llu\n",
+              ca->mi.nbuckets - ca->mi.first_bucket);
+
+       for (i = 1; i < BCH_DATA_NR; i++)
+               pr_buf(out, "%-8s%16llu%16llu%16llu\n",
+                      bch2_data_types[i], stats.d[i].buckets,
+                      stats.d[i].sectors, stats.d[i].fragmented);
+
+       pr_buf(out,
+              "ec\t%16llu\n"
+              "available%15llu\n"
+              "\n"
+              "freelist_wait\t\t%s\n"
+              "open buckets allocated\t%u\n"
+              "open buckets this dev\t%u\n"
+              "open buckets total\t%u\n"
+              "open_buckets_wait\t%s\n"
+              "open_buckets_btree\t%u\n"
+              "open_buckets_user\t%u\n"
+              "btree reserve cache\t%u\n",
+              stats.buckets_ec,
+              __dev_buckets_available(ca, stats, RESERVE_none),
+              c->freelist_wait.list.first              ? "waiting" : "empty",
+              OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
+              ca->nr_open_buckets,
+              OPEN_BUCKETS_COUNT,
+              c->open_buckets_wait.list.first          ? "waiting" : "empty",
+              nr[BCH_DATA_btree],
+              nr[BCH_DATA_user],
+              c->btree_reserve_cache_nr);
 }
 
-static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, struct bucket *g,
-                                    void *private)
-{
-       return bucket_gc_gen(ca, g);
-}
+static const char * const bch2_rw[] = {
+       "read",
+       "write",
+       NULL
+};
 
-static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
-                             bucket_map_fn *fn, void *private)
+static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
 {
-       int cmp(const void *l, const void *r)
-       {       return *((unsigned *) r) - *((unsigned *) l); }
-
-       size_t n = ca->mi.nbuckets, i;
-       /* Compute 31 quantiles */
-       unsigned q[31], *p;
-       ssize_t ret = 0;
-
-       p = vzalloc(ca->mi.nbuckets * sizeof(unsigned));
-       if (!p)
-               return -ENOMEM;
-
-       for (i = ca->mi.first_bucket; i < n; i++)
-               p[i] = fn(ca, &ca->buckets[i], private);
-
-       sort(p, n, sizeof(unsigned), cmp, NULL);
-
-       while (n &&
-              !p[n - 1])
-               --n;
+       int rw, i;
 
-       for (i = 0; i < ARRAY_SIZE(q); i++)
-               q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
-
-       vfree(p);
-
-       for (i = 0; i < ARRAY_SIZE(q); i++)
-               ret += scnprintf(buf + ret, PAGE_SIZE - ret,
-                                "%u ", q[i]);
-       buf[ret - 1] = '\n';
-
-       return ret;
+       for (rw = 0; rw < 2; rw++) {
+               pr_buf(out, "%s:\n", bch2_rw[rw]);
 
+               for (i = 1; i < BCH_DATA_NR; i++)
+                       pr_buf(out, "%-12s:%12llu\n",
+                              bch2_data_types[i],
+                              percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
+       }
 }
 
-static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
+SHOW(bch2_dev)
 {
-       enum alloc_reserve i;
-       ssize_t ret;
-
-       spin_lock(&ca->freelist_lock);
+       struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+       struct bch_fs *c = ca->fs;
 
-       ret = scnprintf(buf, PAGE_SIZE,
-                       "free_inc:\t%zu\t%zu\n",
-                       fifo_used(&ca->free_inc),
-                       ca->free_inc.size);
+       sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
 
-       for (i = 0; i < RESERVE_NR; i++)
-               ret += scnprintf(buf + ret, PAGE_SIZE - ret,
-                                "free[%u]:\t%zu\t%zu\n", i,
-                                fifo_used(&ca->free[i]),
-                                ca->free[i].size);
+       sysfs_print(bucket_size,        bucket_bytes(ca));
+       sysfs_print(first_bucket,       ca->mi.first_bucket);
+       sysfs_print(nbuckets,           ca->mi.nbuckets);
+       sysfs_print(durability,         ca->mi.durability);
+       sysfs_print(discard,            ca->mi.discard);
 
-       spin_unlock(&ca->freelist_lock);
+       if (attr == &sysfs_label) {
+               if (ca->mi.group) {
+                       mutex_lock(&c->sb_lock);
+                       bch2_disk_path_to_text(out, c->disk_sb.sb,
+                                              ca->mi.group - 1);
+                       mutex_unlock(&c->sb_lock);
+               }
 
-       return ret;
-}
+               pr_char(out, '\n');
+       }
 
-static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
-{
-       struct bch_fs *c = ca->fs;
-       struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+       if (attr == &sysfs_has_data) {
+               bch2_flags_to_text(out, bch2_data_types,
+                                  bch2_dev_has_data(c, ca));
+               pr_char(out, '\n');
+       }
 
-       return scnprintf(buf, PAGE_SIZE,
-               "free_inc:               %zu/%zu\n"
-               "free[RESERVE_PRIO]:     %zu/%zu\n"
-               "free[RESERVE_BTREE]:    %zu/%zu\n"
-               "free[RESERVE_MOVINGGC]: %zu/%zu\n"
-               "free[RESERVE_NONE]:     %zu/%zu\n"
-               "alloc:                  %llu/%llu\n"
-               "meta:                   %llu/%llu\n"
-               "dirty:                  %llu/%llu\n"
-               "available:              %llu/%llu\n"
-               "freelist_wait:          %s\n"
-               "open buckets:           %u/%u (reserved %u)\n"
-               "open_buckets_wait:      %s\n",
-               fifo_used(&ca->free_inc),               ca->free_inc.size,
-               fifo_used(&ca->free[RESERVE_PRIO]),     ca->free[RESERVE_PRIO].size,
-               fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
-               fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
-               fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
-               stats.buckets_alloc,                    ca->mi.nbuckets - ca->mi.first_bucket,
-               stats.buckets_meta,                     ca->mi.nbuckets - ca->mi.first_bucket,
-               stats.buckets_dirty,                    ca->mi.nbuckets - ca->mi.first_bucket,
-               __dev_buckets_available(ca, stats),     ca->mi.nbuckets - ca->mi.first_bucket,
-               c->freelist_wait.list.first             ? "waiting" : "empty",
-               c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
-               c->open_buckets_wait.list.first         ? "waiting" : "empty");
-}
+       if (attr == &sysfs_state_rw) {
+               bch2_string_opt_to_text(out, bch2_member_states,
+                                       ca->mi.state);
+               pr_char(out, '\n');
+       }
 
-static u64 sectors_written(struct bch_dev *ca)
-{
-       u64 ret = 0;
-       int cpu;
+       if (attr == &sysfs_iodone)
+               dev_iodone_to_text(out, ca);
 
-       for_each_possible_cpu(cpu)
-               ret += *per_cpu_ptr(ca->sectors_written, cpu);
+       sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
+       sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
 
-       return ret;
-}
+       if (attr == &sysfs_io_latency_stats_read)
+               bch2_time_stats_to_text(out, &ca->io_latency[READ]);
 
-SHOW(bch2_dev)
-{
-       struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-       struct bch_fs *c = ca->fs;
-       struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+       if (attr == &sysfs_io_latency_stats_write)
+               bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
 
-       sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
+       sysfs_printf(congested,                 "%u%%",
+                    clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
+                    * 100 / CONGESTED_MAX);
 
-       sysfs_hprint(bucket_size,       bucket_bytes(ca));
-       sysfs_print(bucket_size_bytes,  bucket_bytes(ca));
-       sysfs_hprint(block_size,        block_bytes(c));
-       sysfs_print(block_size_bytes,   block_bytes(c));
-       sysfs_print(first_bucket,       ca->mi.first_bucket);
-       sysfs_print(nbuckets,           ca->mi.nbuckets);
-       sysfs_print(discard,            ca->mi.discard);
-       sysfs_hprint(written, sectors_written(ca) << 9);
-       sysfs_hprint(btree_written,
-                    atomic64_read(&ca->btree_sectors_written) << 9);
-       sysfs_hprint(metadata_written,
-                    (atomic64_read(&ca->meta_sectors_written) +
-                     atomic64_read(&ca->btree_sectors_written)) << 9);
-
-       sysfs_hprint(dirty_data,        stats.sectors[S_DIRTY] << 9);
-       sysfs_print(dirty_bytes,        stats.sectors[S_DIRTY] << 9);
-       sysfs_print(dirty_buckets,      stats.buckets_dirty);
-       sysfs_hprint(cached_data,       stats.sectors[S_CACHED] << 9);
-       sysfs_print(cached_bytes,       stats.sectors[S_CACHED] << 9);
-       sysfs_print(cached_buckets,     stats.buckets_cached);
-       sysfs_print(meta_buckets,       stats.buckets_meta);
-       sysfs_print(alloc_buckets,      stats.buckets_alloc);
-       sysfs_print(available_buckets,  dev_buckets_available(ca));
-       sysfs_print(free_buckets,       dev_buckets_free(ca));
-       sysfs_print(has_data,           ca->mi.has_data);
-       sysfs_print(has_metadata,       ca->mi.has_metadata);
-
-       sysfs_pd_controller_show(copy_gc, &ca->moving_gc_pd);
-
-       if (attr == &sysfs_cache_replacement_policy)
-               return bch2_snprint_string_list(buf, PAGE_SIZE,
-                                               bch2_cache_replacement_policies,
-                                               ca->mi.replacement);
-
-       sysfs_print(tier,               ca->mi.tier);
-
-       if (attr == &sysfs_state_rw)
-               return bch2_snprint_string_list(buf, PAGE_SIZE,
-                                               bch2_dev_state,
-                                               ca->mi.state);
-
-       if (attr == &sysfs_read_priority_stats)
-               return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
-       if (attr == &sysfs_write_priority_stats)
-               return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
-       if (attr == &sysfs_fragmentation_stats)
-               return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
-       if (attr == &sysfs_oldest_gen_stats)
-               return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
-       if (attr == &sysfs_reserve_stats)
-               return show_reserve_stats(ca, buf);
        if (attr == &sysfs_alloc_debug)
-               return show_dev_alloc_debug(ca, buf);
+               dev_alloc_debug_to_text(out, ca);
 
        return 0;
 }
@@ -919,13 +829,11 @@ STORE(bch2_dev)
        struct bch_fs *c = ca->fs;
        struct bch_member *mi;
 
-       sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd);
-
        if (attr == &sysfs_discard) {
                bool v = strtoul_or_return(buf);
 
                mutex_lock(&c->sb_lock);
-               mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+               mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
 
                if (v != BCH_MEMBER_DISCARD(mi)) {
                        SET_BCH_MEMBER_DISCARD(mi, v);
@@ -934,45 +842,18 @@ STORE(bch2_dev)
                mutex_unlock(&c->sb_lock);
        }
 
-       if (attr == &sysfs_cache_replacement_policy) {
-               ssize_t v = bch2_read_string_list(buf, bch2_cache_replacement_policies);
+       if (attr == &sysfs_label) {
+               char *tmp;
+               int ret;
 
-               if (v < 0)
-                       return v;
+               tmp = kstrdup(buf, GFP_KERNEL);
+               if (!tmp)
+                       return -ENOMEM;
 
-               mutex_lock(&c->sb_lock);
-               mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
-
-               if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
-                       SET_BCH_MEMBER_REPLACEMENT(mi, v);
-                       bch2_write_super(c);
-               }
-               mutex_unlock(&c->sb_lock);
-       }
-
-       if (attr == &sysfs_tier) {
-               unsigned prev_tier;
-               unsigned v = strtoul_restrict_or_return(buf,
-                                       0, BCH_TIER_MAX - 1);
-
-               mutex_lock(&c->sb_lock);
-               prev_tier = ca->mi.tier;
-
-               if (v == ca->mi.tier) {
-                       mutex_unlock(&c->sb_lock);
-                       return size;
-               }
-
-               mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
-               SET_BCH_MEMBER_TIER(mi, v);
-               bch2_write_super(c);
-
-               bch2_dev_group_remove(&c->tiers[prev_tier].devs, ca);
-               bch2_dev_group_add(&c->tiers[ca->mi.tier].devs, ca);
-               mutex_unlock(&c->sb_lock);
-
-               bch2_recalc_capacity(c);
-               bch2_tiering_start(c);
+               ret = bch2_dev_group_set(c, ca, strim(tmp));
+               kfree(tmp);
+               if (ret)
+                       return ret;
        }
 
        return size;
@@ -982,37 +863,27 @@ SYSFS_OPS(bch2_dev);
 struct attribute *bch2_dev_files[] = {
        &sysfs_uuid,
        &sysfs_bucket_size,
-       &sysfs_bucket_size_bytes,
-       &sysfs_block_size,
-       &sysfs_block_size_bytes,
        &sysfs_first_bucket,
        &sysfs_nbuckets,
-       &sysfs_read_priority_stats,
-       &sysfs_write_priority_stats,
-       &sysfs_fragmentation_stats,
-       &sysfs_oldest_gen_stats,
-       &sysfs_reserve_stats,
-       &sysfs_available_buckets,
-       &sysfs_free_buckets,
-       &sysfs_dirty_data,
-       &sysfs_dirty_bytes,
-       &sysfs_dirty_buckets,
-       &sysfs_cached_data,
-       &sysfs_cached_bytes,
-       &sysfs_cached_buckets,
-       &sysfs_meta_buckets,
-       &sysfs_alloc_buckets,
-       &sysfs_has_data,
-       &sysfs_has_metadata,
+       &sysfs_durability,
+
+       /* settings: */
        &sysfs_discard,
-       &sysfs_written,
-       &sysfs_btree_written,
-       &sysfs_metadata_written,
-       &sysfs_cache_replacement_policy,
-       &sysfs_tier,
        &sysfs_state_rw,
-       &sysfs_alloc_debug,
+       &sysfs_label,
+
+       &sysfs_has_data,
+       &sysfs_iodone,
 
-       sysfs_pd_controller_files(copy_gc),
+       &sysfs_io_latency_read,
+       &sysfs_io_latency_write,
+       &sysfs_io_latency_stats_read,
+       &sysfs_io_latency_stats_write,
+       &sysfs_congested,
+
+       /* debug: */
+       &sysfs_alloc_debug,
        NULL
 };
+
+#endif  /* _BCACHEFS_SYSFS_H_ */