#include "compress.h"
#include "sysfs.h"
#include "btree_cache.h"
+#include "btree_io.h"
#include "btree_iter.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
#include "btree_gc.h"
#include "buckets.h"
+#include "disk_groups.h"
#include "inode.h"
#include "journal.h"
#include "keylist.h"
#include "move.h"
#include "opts.h"
+#include "rebalance.h"
+#include "replicas.h"
#include "super-io.h"
-#include "tier.h"
#include <linux/blkdev.h>
#include <linux/sort.h>
read_attribute(btree_node_size);
read_attribute(first_bucket);
read_attribute(nbuckets);
-read_attribute(iostats);
-read_attribute(read_priority_stats);
-read_attribute(write_priority_stats);
-read_attribute(fragmentation_stats);
-read_attribute(oldest_gen_stats);
+read_attribute(durability);
+read_attribute(iodone);
+
+read_attribute(io_latency_read);
+read_attribute(io_latency_write);
+read_attribute(io_latency_stats_read);
+read_attribute(io_latency_stats_write);
+read_attribute(congested);
+
+read_attribute(bucket_quantiles_last_read);
+read_attribute(bucket_quantiles_last_write);
+read_attribute(bucket_quantiles_fragmentation);
+read_attribute(bucket_quantiles_oldest_gen);
+
read_attribute(reserve_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(journal_debug);
read_attribute(journal_pins);
+read_attribute(btree_updates);
+read_attribute(dirty_btree_nodes);
read_attribute(internal_uuid);
rw_attribute(discard);
rw_attribute(cache_replacement_policy);
+rw_attribute(label);
rw_attribute(copy_gc_enabled);
sysfs_pd_controller_attribute(copy_gc);
-rw_attribute(tier);
-rw_attribute(tiering_enabled);
-rw_attribute(tiering_percent);
-sysfs_pd_controller_attribute(tiering);
-
+rw_attribute(rebalance_enabled);
+sysfs_pd_controller_attribute(rebalance);
+read_attribute(rebalance_work);
+rw_attribute(promote_whole_extents);
rw_attribute(pd_controllers_update_seconds);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_time_stats_attribute(name, frequency_units, duration_units);
+#define x(_name) \
+ static struct attribute sysfs_time_stat_##_name = \
+ { .name = #_name, .mode = S_IRUGO };
BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
static struct attribute sysfs_state_rw = {
.name = "state",
sysfs_print(pd_controllers_update_seconds,
c->pd_controllers_update_seconds);
- sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
- sysfs_print(tiering_percent, c->tiering_percent);
+ sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
+ sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
- sysfs_pd_controller_show(tiering, &c->tiers[1].pd); /* XXX */
+ if (attr == &sysfs_rebalance_work)
+ return bch2_rebalance_work_show(c, buf);
+
+ sysfs_print(promote_whole_extents, c->promote_whole_extents);
sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true));
sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false));
if (attr == &sysfs_journal_pins)
return bch2_journal_print_pins(&c->journal, buf);
+ if (attr == &sysfs_btree_updates)
+ return bch2_btree_updates_print(c, buf);
+
+ if (attr == &sysfs_dirty_btree_nodes)
+ return bch2_dirty_btree_nodes_print(c, buf);
+
if (attr == &sysfs_compression_stats)
return bch2_compression_stats(c, buf);
return ret;
}
- if (attr == &sysfs_tiering_enabled) {
- ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
+ if (attr == &sysfs_rebalance_enabled) {
+ ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
?: (ssize_t) size;
- bch2_tiering_start(c); /* issue wakeups */
+ rebalance_wakeup(c);
return ret;
}
sysfs_strtoul(pd_controllers_update_seconds,
c->pd_controllers_update_seconds);
+ sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
- sysfs_strtoul(tiering_percent, c->tiering_percent);
- sysfs_pd_controller_store(tiering, &c->tiers[1].pd); /* XXX */
+ sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
/* Debugging: */
&sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms,
- &sysfs_tiering_percent,
+ &sysfs_promote_whole_extents,
&sysfs_compression_stats,
NULL
&sysfs_alloc_debug,
&sysfs_journal_debug,
&sysfs_journal_pins,
+ &sysfs_btree_updates,
+ &sysfs_dirty_btree_nodes,
&sysfs_read_realloc_races,
&sysfs_extent_migrate_done,
&sysfs_prune_cache,
&sysfs_copy_gc_enabled,
- &sysfs_tiering_enabled,
- sysfs_pd_controller_files(tiering),
+
+ &sysfs_rebalance_enabled,
+ &sysfs_rebalance_work,
+ sysfs_pd_controller_files(rebalance),
+
&sysfs_internal_uuid,
#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
int id = opt - bch2_opt_table;
u64 v = bch2_opt_get_by_id(&c->opts, id);
- out += opt->type == BCH_OPT_STR
- ? bch2_scnprint_string_list(out, end - out, opt->choices, v)
- : scnprintf(out, end - out, "%lli", v);
+ out += bch2_opt_to_text(c, out, end - out, opt, v, OPT_SHOW_FULL_LIST);
out += scnprintf(out, end - out, "\n");
return out - buf;
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
const struct bch_option *opt = container_of(attr, struct bch_option, attr);
int ret, id = opt - bch2_opt_table;
+ char *tmp;
u64 v;
- ret = bch2_opt_parse(opt, buf, &v);
+ tmp = kstrdup(buf, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ ret = bch2_opt_parse(c, opt, strim(tmp), &v);
+ kfree(tmp);
+
if (ret < 0)
return ret;
- mutex_lock(&c->sb_lock);
-
- if (id == Opt_compression) {
+ if (id == Opt_compression ||
+ id == Opt_background_compression) {
int ret = bch2_check_set_has_compressed_data(c, v);
if (ret) {
mutex_unlock(&c->sb_lock);
}
if (opt->set_sb != SET_NO_SB_OPT) {
- opt->set_sb(c->disk_sb, v);
+ mutex_lock(&c->sb_lock);
+ opt->set_sb(c->disk_sb.sb, v);
bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
}
bch2_opt_set_by_id(&c->opts, id, v);
- mutex_unlock(&c->sb_lock);
+ if ((id == Opt_background_target ||
+ id == Opt_background_compression) && v) {
+ bch2_rebalance_add_work(c, S64_MAX);
+ rebalance_wakeup(c);
+ }
return size;
}
{
struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_print_time_stats(&c->name##_time, name, \
- frequency_units, duration_units);
+#define x(name) \
+ if (attr == &sysfs_time_stat_##name) \
+ return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
+ buf, PAGE_SIZE);
BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
return 0;
}
STORE(bch2_fs_time_stats)
{
- struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_clear_time_stats(&c->name##_time, name);
- BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
return size;
}
SYSFS_OPS(bch2_fs_time_stats);
struct attribute *bch2_fs_time_stats_files[] = {
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
+#define x(name) \
+ &sysfs_time_stat_##name,
BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
+#undef x
NULL
};
-typedef unsigned (bucket_map_fn)(struct bch_dev *, size_t, void *);
+typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
+ size_t, void *);
-static unsigned bucket_priority_fn(struct bch_dev *ca, size_t b,
- void *private)
+static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, void *private)
{
- struct bucket *g = bucket(ca, b);
int rw = (private ? 1 : 0);
- return ca->fs->prio_clock[rw].hand - g->prio[rw];
+ return bucket_last_io(c, bucket(ca, b), rw);
}
-static unsigned bucket_sectors_used_fn(struct bch_dev *ca, size_t b,
- void *private)
+static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, void *private)
{
struct bucket *g = bucket(ca, b);
return bucket_sectors_used(g->mark);
}
-static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, size_t b,
- void *private)
+static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, void *private)
{
return bucket_gc_gen(ca, b);
}
-static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
- bucket_map_fn *fn, void *private)
+static int unsigned_cmp(const void *_l, const void *_r)
{
- int cmp(const void *l, const void *r)
- { return *((unsigned *) r) - *((unsigned *) l); }
+ unsigned l = *((unsigned *) _l);
+ unsigned r = *((unsigned *) _r);
+ return (l > r) - (l < r);
+}
+
+static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
+ char *buf, bucket_map_fn *fn, void *private)
+{
size_t i, n;
/* Compute 31 quantiles */
unsigned q[31], *p;
}
for (i = ca->mi.first_bucket; i < n; i++)
- p[i] = fn(ca, i, private);
+ p[i] = fn(c, ca, i, private);
- sort(p, n, sizeof(unsigned), cmp, NULL);
+ sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
up_read(&ca->bucket_lock);
while (n &&
NULL
};
-static ssize_t show_dev_iostats(struct bch_dev *ca, char *buf)
+static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
{
char *out = buf, *end = buf + PAGE_SIZE;
int rw, i, cpu;
sysfs_print(block_size, block_bytes(c));
sysfs_print(first_bucket, ca->mi.first_bucket);
sysfs_print(nbuckets, ca->mi.nbuckets);
+ sysfs_print(durability, ca->mi.durability);
sysfs_print(discard, ca->mi.discard);
+ if (attr == &sysfs_label) {
+ if (ca->mi.group) {
+ mutex_lock(&c->sb_lock);
+ out += bch2_disk_path_print(&c->disk_sb, out, end - out,
+ ca->mi.group - 1);
+ mutex_unlock(&c->sb_lock);
+ } else {
+ out += scnprintf(out, end - out, "none");
+ }
+
+ out += scnprintf(out, end - out, "\n");
+ return out - buf;
+ }
+
if (attr == &sysfs_has_data) {
out += bch2_scnprint_flag_list(out, end - out,
bch2_data_types,
return out - buf;
}
- sysfs_print(tier, ca->mi.tier);
-
if (attr == &sysfs_state_rw) {
out += bch2_scnprint_string_list(out, end - out,
bch2_dev_state,
return out - buf;
}
- if (attr == &sysfs_iostats)
- return show_dev_iostats(ca, buf);
- if (attr == &sysfs_read_priority_stats)
- return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
- if (attr == &sysfs_write_priority_stats)
- return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
- if (attr == &sysfs_fragmentation_stats)
- return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
- if (attr == &sysfs_oldest_gen_stats)
- return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
+ if (attr == &sysfs_iodone)
+ return show_dev_iodone(ca, buf);
+
+ sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
+ sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
+
+ if (attr == &sysfs_io_latency_stats_read)
+ return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
+ if (attr == &sysfs_io_latency_stats_write)
+ return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
+
+ sysfs_printf(congested, "%u%%",
+ clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
+ * 100 / CONGESTED_MAX);
+
+ if (attr == &sysfs_bucket_quantiles_last_read)
+ return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
+ if (attr == &sysfs_bucket_quantiles_last_write)
+ return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
+ if (attr == &sysfs_bucket_quantiles_fragmentation)
+ return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
+ if (attr == &sysfs_bucket_quantiles_oldest_gen)
+ return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
+
if (attr == &sysfs_reserve_stats)
return show_reserve_stats(ca, buf);
if (attr == &sysfs_alloc_debug)
bool v = strtoul_or_return(buf);
mutex_lock(&c->sb_lock);
- mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+ mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
if (v != BCH_MEMBER_DISCARD(mi)) {
SET_BCH_MEMBER_DISCARD(mi, v);
}
if (attr == &sysfs_cache_replacement_policy) {
- ssize_t v = bch2_read_string_list(buf, bch2_cache_replacement_policies);
+ ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
if (v < 0)
return v;
mutex_lock(&c->sb_lock);
- mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+ mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
SET_BCH_MEMBER_REPLACEMENT(mi, v);
mutex_unlock(&c->sb_lock);
}
- if (attr == &sysfs_tier) {
- unsigned prev_tier;
- unsigned v = strtoul_restrict_or_return(buf,
- 0, BCH_TIER_MAX - 1);
+ if (attr == &sysfs_label) {
+ char *tmp;
+ int ret;
- mutex_lock(&c->sb_lock);
- prev_tier = ca->mi.tier;
-
- if (v == ca->mi.tier) {
- mutex_unlock(&c->sb_lock);
- return size;
- }
-
- mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
- SET_BCH_MEMBER_TIER(mi, v);
- bch2_write_super(c);
+ tmp = kstrdup(buf, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
- clear_bit(ca->dev_idx, c->tiers[prev_tier].devs.d);
- set_bit(ca->dev_idx, c->tiers[ca->mi.tier].devs.d);
- mutex_unlock(&c->sb_lock);
-
- bch2_recalc_capacity(c);
- bch2_tiering_start(c);
+ ret = bch2_dev_group_set(c, ca, strim(tmp));
+ kfree(tmp);
+ if (ret)
+ return ret;
}
if (attr == &sysfs_wake_allocator)
&sysfs_block_size,
&sysfs_first_bucket,
&sysfs_nbuckets,
+ &sysfs_durability,
/* settings: */
&sysfs_discard,
&sysfs_cache_replacement_policy,
- &sysfs_tier,
&sysfs_state_rw,
+ &sysfs_label,
&sysfs_has_data,
- &sysfs_iostats,
+ &sysfs_iodone,
+
+ &sysfs_io_latency_read,
+ &sysfs_io_latency_write,
+ &sysfs_io_latency_stats_read,
+ &sysfs_io_latency_stats_write,
+ &sysfs_congested,
/* alloc info - other stats: */
- &sysfs_read_priority_stats,
- &sysfs_write_priority_stats,
- &sysfs_fragmentation_stats,
- &sysfs_oldest_gen_stats,
+ &sysfs_bucket_quantiles_last_read,
+ &sysfs_bucket_quantiles_last_write,
+ &sysfs_bucket_quantiles_fragmentation,
+ &sysfs_bucket_quantiles_oldest_gen,
+
&sysfs_reserve_stats,
/* debug: */