]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/sysfs.c
Update bcachefs sources to 9abf628c70 bcachefs: Fix a spurious error in fsck
[bcachefs-tools-debian] / libbcachefs / sysfs.c
index 1e8f8735a466d8ab65de1fb4e23e45226c622bb2..66b5b9f933bc4c562d9b0d295480531252030210 100644 (file)
 #include "compress.h"
 #include "sysfs.h"
 #include "btree_cache.h"
+#include "btree_io.h"
 #include "btree_iter.h"
 #include "btree_update.h"
+#include "btree_update_interior.h"
 #include "btree_gc.h"
 #include "buckets.h"
+#include "disk_groups.h"
 #include "inode.h"
 #include "journal.h"
 #include "keylist.h"
 #include "move.h"
 #include "opts.h"
+#include "rebalance.h"
+#include "replicas.h"
 #include "super-io.h"
-#include "tier.h"
 
 #include <linux/blkdev.h>
 #include <linux/sort.h>
@@ -136,16 +140,27 @@ read_attribute(block_size);
 read_attribute(btree_node_size);
 read_attribute(first_bucket);
 read_attribute(nbuckets);
-read_attribute(iostats);
-read_attribute(read_priority_stats);
-read_attribute(write_priority_stats);
-read_attribute(fragmentation_stats);
-read_attribute(oldest_gen_stats);
+read_attribute(durability);
+read_attribute(iodone);
+
+read_attribute(io_latency_read);
+read_attribute(io_latency_write);
+read_attribute(io_latency_stats_read);
+read_attribute(io_latency_stats_write);
+read_attribute(congested);
+
+read_attribute(bucket_quantiles_last_read);
+read_attribute(bucket_quantiles_last_write);
+read_attribute(bucket_quantiles_fragmentation);
+read_attribute(bucket_quantiles_oldest_gen);
+
 read_attribute(reserve_stats);
 read_attribute(btree_cache_size);
 read_attribute(compression_stats);
 read_attribute(journal_debug);
 read_attribute(journal_pins);
+read_attribute(btree_updates);
+read_attribute(dirty_btree_nodes);
 
 read_attribute(internal_uuid);
 
@@ -162,15 +177,15 @@ rw_attribute(journal_reclaim_delay_ms);
 
 rw_attribute(discard);
 rw_attribute(cache_replacement_policy);
+rw_attribute(label);
 
 rw_attribute(copy_gc_enabled);
 sysfs_pd_controller_attribute(copy_gc);
 
-rw_attribute(tier);
-rw_attribute(tiering_enabled);
-rw_attribute(tiering_percent);
-sysfs_pd_controller_attribute(tiering);
-
+rw_attribute(rebalance_enabled);
+sysfs_pd_controller_attribute(rebalance);
+read_attribute(rebalance_work);
+rw_attribute(promote_whole_extents);
 
 rw_attribute(pd_controllers_update_seconds);
 
@@ -183,10 +198,11 @@ read_attribute(data_replicas_have);
        BCH_DEBUG_PARAMS()
 #undef BCH_DEBUG_PARAM
 
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_time_stats_attribute(name, frequency_units, duration_units);
+#define x(_name)                                               \
+       static struct attribute sysfs_time_stat_##_name =               \
+               { .name = #_name, .mode = S_IRUGO };
        BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
 
 static struct attribute sysfs_state_rw = {
        .name = "state",
@@ -324,10 +340,13 @@ SHOW(bch2_fs)
        sysfs_print(pd_controllers_update_seconds,
                    c->pd_controllers_update_seconds);
 
-       sysfs_printf(tiering_enabled,           "%i", c->tiering_enabled);
-       sysfs_print(tiering_percent,            c->tiering_percent);
+       sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
+       sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
 
-       sysfs_pd_controller_show(tiering,       &c->tiers[1].pd); /* XXX */
+       if (attr == &sysfs_rebalance_work)
+               return bch2_rebalance_work_show(c, buf);
+
+       sysfs_print(promote_whole_extents,      c->promote_whole_extents);
 
        sysfs_printf(meta_replicas_have, "%u",  bch2_replicas_online(c, true));
        sysfs_printf(data_replicas_have, "%u",  bch2_replicas_online(c, false));
@@ -343,6 +362,12 @@ SHOW(bch2_fs)
        if (attr == &sysfs_journal_pins)
                return bch2_journal_print_pins(&c->journal, buf);
 
+       if (attr == &sysfs_btree_updates)
+               return bch2_btree_updates_print(c, buf);
+
+       if (attr == &sysfs_dirty_btree_nodes)
+               return bch2_dirty_btree_nodes_print(c, buf);
+
        if (attr == &sysfs_compression_stats)
                return bch2_compression_stats(c, buf);
 
@@ -380,19 +405,19 @@ STORE(__bch2_fs)
                return ret;
        }
 
-       if (attr == &sysfs_tiering_enabled) {
-               ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
+       if (attr == &sysfs_rebalance_enabled) {
+               ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
                        ?: (ssize_t) size;
 
-               bch2_tiering_start(c); /* issue wakeups */
+               rebalance_wakeup(c);
                return ret;
        }
 
        sysfs_strtoul(pd_controllers_update_seconds,
                      c->pd_controllers_update_seconds);
+       sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
 
-       sysfs_strtoul(tiering_percent,          c->tiering_percent);
-       sysfs_pd_controller_store(tiering,      &c->tiers[1].pd); /* XXX */
+       sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
 
        /* Debugging: */
 
@@ -449,7 +474,7 @@ struct attribute *bch2_fs_files[] = {
        &sysfs_journal_write_delay_ms,
        &sysfs_journal_reclaim_delay_ms,
 
-       &sysfs_tiering_percent,
+       &sysfs_promote_whole_extents,
 
        &sysfs_compression_stats,
        NULL
@@ -474,6 +499,8 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_alloc_debug,
        &sysfs_journal_debug,
        &sysfs_journal_pins,
+       &sysfs_btree_updates,
+       &sysfs_dirty_btree_nodes,
 
        &sysfs_read_realloc_races,
        &sysfs_extent_migrate_done,
@@ -485,8 +512,11 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_prune_cache,
 
        &sysfs_copy_gc_enabled,
-       &sysfs_tiering_enabled,
-       sysfs_pd_controller_files(tiering),
+
+       &sysfs_rebalance_enabled,
+       &sysfs_rebalance_work,
+       sysfs_pd_controller_files(rebalance),
+
        &sysfs_internal_uuid,
 
 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
@@ -506,9 +536,7 @@ SHOW(bch2_fs_opts_dir)
        int id = opt - bch2_opt_table;
        u64 v = bch2_opt_get_by_id(&c->opts, id);
 
-       out += opt->type == BCH_OPT_STR
-               ? bch2_scnprint_string_list(out, end - out, opt->choices, v)
-               : scnprintf(out, end - out, "%lli", v);
+       out += bch2_opt_to_text(c, out, end - out, opt, v, OPT_SHOW_FULL_LIST);
        out += scnprintf(out, end - out, "\n");
 
        return out - buf;
@@ -519,15 +547,21 @@ STORE(bch2_fs_opts_dir)
        struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
        const struct bch_option *opt = container_of(attr, struct bch_option, attr);
        int ret, id = opt - bch2_opt_table;
+       char *tmp;
        u64 v;
 
-       ret = bch2_opt_parse(opt, buf, &v);
+       tmp = kstrdup(buf, GFP_KERNEL);
+       if (!tmp)
+               return -ENOMEM;
+
+       ret = bch2_opt_parse(c, opt, strim(tmp), &v);
+       kfree(tmp);
+
        if (ret < 0)
                return ret;
 
-       mutex_lock(&c->sb_lock);
-
-       if (id == Opt_compression) {
+       if (id == Opt_compression ||
+           id == Opt_background_compression) {
                int ret = bch2_check_set_has_compressed_data(c, v);
                if (ret) {
                        mutex_unlock(&c->sb_lock);
@@ -536,13 +570,19 @@ STORE(bch2_fs_opts_dir)
        }
 
        if (opt->set_sb != SET_NO_SB_OPT) {
-               opt->set_sb(c->disk_sb, v);
+               mutex_lock(&c->sb_lock);
+               opt->set_sb(c->disk_sb.sb, v);
                bch2_write_super(c);
+               mutex_unlock(&c->sb_lock);
        }
 
        bch2_opt_set_by_id(&c->opts, id, v);
 
-       mutex_unlock(&c->sb_lock);
+       if ((id == Opt_background_target ||
+            id == Opt_background_compression) && v) {
+               bch2_rebalance_add_work(c, S64_MAX);
+               rebalance_wakeup(c);
+       }
 
        return size;
 }
@@ -575,67 +615,65 @@ SHOW(bch2_fs_time_stats)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
 
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_print_time_stats(&c->name##_time, name,                   \
-                              frequency_units, duration_units);
+#define x(name)                                                \
+       if (attr == &sysfs_time_stat_##name)                            \
+               return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
+                                            buf, PAGE_SIZE);
        BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
 
        return 0;
 }
 
 STORE(bch2_fs_time_stats)
 {
-       struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_clear_time_stats(&c->name##_time, name);
-       BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
        return size;
 }
 SYSFS_OPS(bch2_fs_time_stats);
 
 struct attribute *bch2_fs_time_stats_files[] = {
-#define BCH_TIME_STAT(name, frequency_units, duration_units)           \
-       sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
+#define x(name)                                                \
+       &sysfs_time_stat_##name,
        BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
+#undef x
        NULL
 };
 
-typedef unsigned (bucket_map_fn)(struct bch_dev *, size_t, void *);
+typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
+                                size_t, void *);
 
-static unsigned bucket_priority_fn(struct bch_dev *ca, size_t b,
-                                  void *private)
+static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
+                                 size_t b, void *private)
 {
-       struct bucket *g = bucket(ca, b);
        int rw = (private ? 1 : 0);
 
-       return ca->fs->prio_clock[rw].hand - g->prio[rw];
+       return bucket_last_io(c, bucket(ca, b), rw);
 }
 
-static unsigned bucket_sectors_used_fn(struct bch_dev *ca, size_t b,
-                                      void *private)
+static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
+                                      size_t b, void *private)
 {
        struct bucket *g = bucket(ca, b);
        return bucket_sectors_used(g->mark);
 }
 
-static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, size_t b,
-                                    void *private)
+static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
+                                    size_t b, void *private)
 {
        return bucket_gc_gen(ca, b);
 }
 
-static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
-                             bucket_map_fn *fn, void *private)
+static int unsigned_cmp(const void *_l, const void *_r)
 {
-       int cmp(const void *l, const void *r)
-       {       return *((unsigned *) r) - *((unsigned *) l); }
+       unsigned l = *((unsigned *) _l);
+       unsigned r = *((unsigned *) _r);
 
+       return (l > r) - (l < r);
+}
+
+static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
+                             char *buf, bucket_map_fn *fn, void *private)
+{
        size_t i, n;
        /* Compute 31 quantiles */
        unsigned q[31], *p;
@@ -651,9 +689,9 @@ static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
        }
 
        for (i = ca->mi.first_bucket; i < n; i++)
-               p[i] = fn(ca, i, private);
+               p[i] = fn(c, ca, i, private);
 
-       sort(p, n, sizeof(unsigned), cmp, NULL);
+       sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
        up_read(&ca->bucket_lock);
 
        while (n &&
@@ -752,7 +790,7 @@ static const char * const bch2_rw[] = {
        NULL
 };
 
-static ssize_t show_dev_iostats(struct bch_dev *ca, char *buf)
+static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
 {
        char *out = buf, *end = buf + PAGE_SIZE;
        int rw, i, cpu;
@@ -786,8 +824,23 @@ SHOW(bch2_dev)
        sysfs_print(block_size,         block_bytes(c));
        sysfs_print(first_bucket,       ca->mi.first_bucket);
        sysfs_print(nbuckets,           ca->mi.nbuckets);
+       sysfs_print(durability,         ca->mi.durability);
        sysfs_print(discard,            ca->mi.discard);
 
+       if (attr == &sysfs_label) {
+               if (ca->mi.group) {
+                       mutex_lock(&c->sb_lock);
+                       out += bch2_disk_path_print(&c->disk_sb, out, end - out,
+                                                   ca->mi.group - 1);
+                       mutex_unlock(&c->sb_lock);
+               } else {
+                       out += scnprintf(out, end - out, "none");
+               }
+
+               out += scnprintf(out, end - out, "\n");
+               return out - buf;
+       }
+
        if (attr == &sysfs_has_data) {
                out += bch2_scnprint_flag_list(out, end - out,
                                               bch2_data_types,
@@ -806,8 +859,6 @@ SHOW(bch2_dev)
                return out - buf;
        }
 
-       sysfs_print(tier,               ca->mi.tier);
-
        if (attr == &sysfs_state_rw) {
                out += bch2_scnprint_string_list(out, end - out,
                                                 bch2_dev_state,
@@ -816,16 +867,30 @@ SHOW(bch2_dev)
                return out - buf;
        }
 
-       if (attr == &sysfs_iostats)
-               return show_dev_iostats(ca, buf);
-       if (attr == &sysfs_read_priority_stats)
-               return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
-       if (attr == &sysfs_write_priority_stats)
-               return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
-       if (attr == &sysfs_fragmentation_stats)
-               return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
-       if (attr == &sysfs_oldest_gen_stats)
-               return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
+       if (attr == &sysfs_iodone)
+               return show_dev_iodone(ca, buf);
+
+       sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
+       sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
+
+       if (attr == &sysfs_io_latency_stats_read)
+               return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
+       if (attr == &sysfs_io_latency_stats_write)
+               return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
+
+       sysfs_printf(congested,                 "%u%%",
+                    clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
+                    * 100 / CONGESTED_MAX);
+
+       if (attr == &sysfs_bucket_quantiles_last_read)
+               return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
+       if (attr == &sysfs_bucket_quantiles_last_write)
+               return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
+       if (attr == &sysfs_bucket_quantiles_fragmentation)
+               return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
+       if (attr == &sysfs_bucket_quantiles_oldest_gen)
+               return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
+
        if (attr == &sysfs_reserve_stats)
                return show_reserve_stats(ca, buf);
        if (attr == &sysfs_alloc_debug)
@@ -846,7 +911,7 @@ STORE(bch2_dev)
                bool v = strtoul_or_return(buf);
 
                mutex_lock(&c->sb_lock);
-               mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+               mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
 
                if (v != BCH_MEMBER_DISCARD(mi)) {
                        SET_BCH_MEMBER_DISCARD(mi, v);
@@ -856,13 +921,13 @@ STORE(bch2_dev)
        }
 
        if (attr == &sysfs_cache_replacement_policy) {
-               ssize_t v = bch2_read_string_list(buf, bch2_cache_replacement_policies);
+               ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
 
                if (v < 0)
                        return v;
 
                mutex_lock(&c->sb_lock);
-               mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+               mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
 
                if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
                        SET_BCH_MEMBER_REPLACEMENT(mi, v);
@@ -871,29 +936,18 @@ STORE(bch2_dev)
                mutex_unlock(&c->sb_lock);
        }
 
-       if (attr == &sysfs_tier) {
-               unsigned prev_tier;
-               unsigned v = strtoul_restrict_or_return(buf,
-                                       0, BCH_TIER_MAX - 1);
+       if (attr == &sysfs_label) {
+               char *tmp;
+               int ret;
 
-               mutex_lock(&c->sb_lock);
-               prev_tier = ca->mi.tier;
-
-               if (v == ca->mi.tier) {
-                       mutex_unlock(&c->sb_lock);
-                       return size;
-               }
-
-               mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
-               SET_BCH_MEMBER_TIER(mi, v);
-               bch2_write_super(c);
+               tmp = kstrdup(buf, GFP_KERNEL);
+               if (!tmp)
+                       return -ENOMEM;
 
-               clear_bit(ca->dev_idx, c->tiers[prev_tier].devs.d);
-               set_bit(ca->dev_idx, c->tiers[ca->mi.tier].devs.d);
-               mutex_unlock(&c->sb_lock);
-
-               bch2_recalc_capacity(c);
-               bch2_tiering_start(c);
+               ret = bch2_dev_group_set(c, ca, strim(tmp));
+               kfree(tmp);
+               if (ret)
+                       return ret;
        }
 
        if (attr == &sysfs_wake_allocator)
@@ -909,21 +963,29 @@ struct attribute *bch2_dev_files[] = {
        &sysfs_block_size,
        &sysfs_first_bucket,
        &sysfs_nbuckets,
+       &sysfs_durability,
 
        /* settings: */
        &sysfs_discard,
        &sysfs_cache_replacement_policy,
-       &sysfs_tier,
        &sysfs_state_rw,
+       &sysfs_label,
 
        &sysfs_has_data,
-       &sysfs_iostats,
+       &sysfs_iodone,
+
+       &sysfs_io_latency_read,
+       &sysfs_io_latency_write,
+       &sysfs_io_latency_stats_read,
+       &sysfs_io_latency_stats_write,
+       &sysfs_congested,
 
        /* alloc info - other stats: */
-       &sysfs_read_priority_stats,
-       &sysfs_write_priority_stats,
-       &sysfs_fragmentation_stats,
-       &sysfs_oldest_gen_stats,
+       &sysfs_bucket_quantiles_last_read,
+       &sysfs_bucket_quantiles_last_write,
+       &sysfs_bucket_quantiles_fragmentation,
+       &sysfs_bucket_quantiles_oldest_gen,
+
        &sysfs_reserve_stats,
 
        /* debug: */