+// SPDX-License-Identifier: GPL-2.0
/*
* bcache sysfs interfaces
*
* Copyright 2012 Google, Inc.
*/
+#ifndef NO_BCACHEFS_SYSFS
+
#include "bcachefs.h"
-#include "alloc.h"
-#include "compress.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
#include "sysfs.h"
#include "btree_cache.h"
+#include "btree_io.h"
#include "btree_iter.h"
+#include "btree_key_cache.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
#include "btree_gc.h"
#include "buckets.h"
+#include "clock.h"
+#include "disk_groups.h"
+#include "ec.h"
#include "inode.h"
#include "journal.h"
#include "keylist.h"
#include "move.h"
#include "opts.h"
+#include "rebalance.h"
+#include "replicas.h"
#include "super-io.h"
-#include "tier.h"
+#include "tests.h"
#include <linux/blkdev.h>
#include <linux/sort.h>
+#include <linux/sched/clock.h>
#include "util.h"
}
#define SHOW(fn) \
+static ssize_t fn ## _to_text(struct printbuf *, \
+ struct kobject *, struct attribute *);\
+ \
static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
char *buf) \
+{ \
+ struct printbuf out = PRINTBUF; \
+ ssize_t ret = fn ## _to_text(&out, kobj, attr); \
+ \
+ if (!ret && out.allocation_failure) \
+ ret = -ENOMEM; \
+ \
+ if (!ret) { \
+ ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \
+ memcpy(buf, out.buf, ret); \
+ } \
+ printbuf_exit(&out); \
+ return ret; \
+} \
+ \
+static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
+ struct attribute *attr)
#define STORE(fn) \
static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
#define sysfs_printf(file, fmt, ...) \
do { \
if (attr == &sysfs_ ## file) \
- return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \
+ pr_buf(out, fmt "\n", __VA_ARGS__); \
} while (0)
#define sysfs_print(file, var) \
do { \
if (attr == &sysfs_ ## file) \
- return snprint(buf, PAGE_SIZE, var); \
+ snprint(out, var); \
} while (0)
#define sysfs_hprint(file, val) \
do { \
- if (attr == &sysfs_ ## file) { \
- ssize_t ret = bch2_hprint(buf, val); \
- strcat(buf, "\n"); \
- return ret + 1; \
- } \
+ if (attr == &sysfs_ ## file) \
+ bch2_hprint(out, val); \
} while (0)
#define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var))
return strtoi_h(buf, &var) ?: (ssize_t) size; \
} while (0)
-write_attribute(trigger_journal_flush);
-write_attribute(trigger_btree_coalesce);
write_attribute(trigger_gc);
write_attribute(prune_cache);
+rw_attribute(btree_gc_periodic);
+rw_attribute(gc_gens_pos);
read_attribute(uuid);
read_attribute(minor);
read_attribute(bucket_size);
-read_attribute(block_size);
-read_attribute(btree_node_size);
read_attribute(first_bucket);
read_attribute(nbuckets);
-read_attribute(read_priority_stats);
-read_attribute(write_priority_stats);
-read_attribute(fragmentation_stats);
-read_attribute(oldest_gen_stats);
-read_attribute(reserve_stats);
+read_attribute(durability);
+read_attribute(iodone);
+
+read_attribute(io_latency_read);
+read_attribute(io_latency_write);
+read_attribute(io_latency_stats_read);
+read_attribute(io_latency_stats_write);
+read_attribute(congested);
+
+read_attribute(btree_avg_write_size);
+
read_attribute(btree_cache_size);
read_attribute(compression_stats);
-read_attribute(written);
-read_attribute(btree_written);
-read_attribute(metadata_written);
read_attribute(journal_debug);
-read_attribute(journal_pins);
+read_attribute(btree_updates);
+read_attribute(btree_cache);
+read_attribute(btree_key_cache);
+read_attribute(btree_transactions);
+read_attribute(stripes_heap);
+read_attribute(open_buckets);
read_attribute(internal_uuid);
-read_attribute(available_buckets);
-read_attribute(free_buckets);
-read_attribute(dirty_data);
-read_attribute(dirty_bytes);
-read_attribute(dirty_buckets);
-read_attribute(cached_data);
-read_attribute(cached_bytes);
-read_attribute(cached_buckets);
-read_attribute(meta_buckets);
-read_attribute(alloc_buckets);
read_attribute(has_data);
-read_attribute(has_metadata);
read_attribute(alloc_debug);
read_attribute(read_realloc_races);
-
-rw_attribute(journal_write_delay_ms);
-rw_attribute(journal_reclaim_delay_ms);
+read_attribute(extent_migrate_done);
+read_attribute(extent_migrate_raced);
+read_attribute(bucket_alloc_fail);
rw_attribute(discard);
-rw_attribute(cache_replacement_policy);
+rw_attribute(label);
-rw_attribute(foreground_write_ratelimit_enabled);
rw_attribute(copy_gc_enabled);
-sysfs_pd_controller_attribute(copy_gc);
-
-rw_attribute(tier);
-rw_attribute(tiering_enabled);
-rw_attribute(tiering_percent);
-sysfs_pd_controller_attribute(tiering);
-
-sysfs_pd_controller_attribute(foreground_write);
-
-rw_attribute(pd_controllers_update_seconds);
+read_attribute(copy_gc_wait);
-rw_attribute(foreground_target_percent);
+rw_attribute(rebalance_enabled);
+sysfs_pd_controller_attribute(rebalance);
+read_attribute(rebalance_work);
+rw_attribute(promote_whole_extents);
-read_attribute(meta_replicas_have);
-read_attribute(data_replicas_have);
+read_attribute(new_stripes);
-#define BCH_DEBUG_PARAM(name, description) \
- rw_attribute(name);
+read_attribute(io_timers_read);
+read_attribute(io_timers_write);
- BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
+read_attribute(data_jobs);
-#define BCH_OPT(_name, _mode, ...) \
- static struct attribute sysfs_opt_##_name = { \
- .name = #_name, .mode = _mode, \
- };
+#ifdef CONFIG_BCACHEFS_TESTS
+write_attribute(perf_test);
+#endif /* CONFIG_BCACHEFS_TESTS */
- BCH_VISIBLE_OPTS()
-#undef BCH_OPT
-
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_time_stats_attribute(name, frequency_units, duration_units);
+#define x(_name) \
+ static struct attribute sysfs_time_stat_##_name = \
+ { .name = #_name, .mode = S_IRUGO };
BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
static struct attribute sysfs_state_rw = {
.name = "state",
size_t ret = 0;
struct btree *b;
- mutex_lock(&c->btree_cache_lock);
- list_for_each_entry(b, &c->btree_cache, list)
+ mutex_lock(&c->btree_cache.lock);
+ list_for_each_entry(b, &c->btree_cache.live, list)
ret += btree_bytes(c);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&c->btree_cache.lock);
return ret;
}
-static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
+static size_t bch2_btree_avg_write_size(struct bch_fs *c)
{
- struct bch_fs_usage stats = bch2_fs_usage_read(c);
-
- return scnprintf(buf, PAGE_SIZE,
- "capacity:\t\t%llu\n"
- "compressed:\n"
- "\tmeta:\t\t%llu\n"
- "\tdirty:\t\t%llu\n"
- "\tcached:\t\t%llu\n"
- "uncompressed:\n"
- "\tmeta:\t\t%llu\n"
- "\tdirty:\t\t%llu\n"
- "\tcached:\t\t%llu\n"
- "persistent reserved sectors:\t%llu\n"
- "online reserved sectors:\t%llu\n",
- c->capacity,
- stats.s[S_COMPRESSED][S_META],
- stats.s[S_COMPRESSED][S_DIRTY],
- stats.s[S_COMPRESSED][S_CACHED],
- stats.s[S_UNCOMPRESSED][S_META],
- stats.s[S_UNCOMPRESSED][S_DIRTY],
- stats.s[S_UNCOMPRESSED][S_CACHED],
- stats.persistent_reserved,
- stats.online_reserved);
+ u64 nr = atomic64_read(&c->btree_writes_nr);
+ u64 sectors = atomic64_read(&c->btree_writes_sectors);
+
+ return nr ? div64_u64(sectors, nr) : 0;
}
-static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
+static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
{
+ long ret = 0;
+ struct bch_move_stats *stats;
+
+ mutex_lock(&c->data_progress_lock);
+ list_for_each_entry(stats, &c->data_progress_list, list) {
+ pr_buf(out, "%s: data type %s btree_id %s position: ",
+ stats->name,
+ bch2_data_types[stats->data_type],
+ bch2_btree_ids[stats->btree_id]);
+ bch2_bpos_to_text(out, stats->pos);
+ pr_buf(out, "%s", "\n");
+ }
+
+ mutex_unlock(&c->data_progress_lock);
+ return ret;
+}
+
+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
+ enum btree_id id;
+ u64 nr_uncompressed_extents = 0,
nr_compressed_extents = 0,
+ nr_incompressible_extents = 0,
+ uncompressed_sectors = 0,
+ incompressible_sectors = 0,
compressed_sectors_compressed = 0,
compressed_sectors_uncompressed = 0;
+ int ret;
- if (!bch2_fs_running(c))
+ if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
- if (k.k->type == BCH_EXTENT) {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const struct bch_extent_ptr *ptr;
- const union bch_extent_crc *crc;
-
- extent_for_each_ptr_crc(e, ptr, crc) {
- if (crc_compression_type(crc) == BCH_COMPRESSION_NONE) {
- nr_uncompressed_extents++;
- uncompressed_sectors += e.k->size;
- } else {
- nr_compressed_extents++;
+ bch2_trans_init(&trans, c, 0, 0);
+
+ for (id = 0; id < BTREE_ID_NR; id++) {
+ if (!((1U << id) & BTREE_ID_HAS_PTRS))
+ continue;
+
+ for_each_btree_key(&trans, iter, id, POS_MIN,
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ bool compressed = false, uncompressed = false, incompressible = false;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ switch (p.crc.compression_type) {
+ case BCH_COMPRESSION_TYPE_none:
+ uncompressed = true;
+ uncompressed_sectors += k.k->size;
+ break;
+ case BCH_COMPRESSION_TYPE_incompressible:
+ incompressible = true;
+ incompressible_sectors += k.k->size;
+ break;
+ default:
compressed_sectors_compressed +=
- crc_compressed_size(e.k, crc);
+ p.crc.compressed_size;
compressed_sectors_uncompressed +=
- crc_uncompressed_size(e.k, crc);
+ p.crc.uncompressed_size;
+ compressed = true;
+ break;
}
-
- /* only looking at the first ptr */
- break;
}
+
+ if (incompressible)
+ nr_incompressible_extents++;
+ else if (uncompressed)
+ nr_uncompressed_extents++;
+ else if (compressed)
+ nr_compressed_extents++;
}
- bch2_btree_iter_unlock(&iter);
-
- return snprintf(buf, PAGE_SIZE,
- "uncompressed data:\n"
- " nr extents: %llu\n"
- " size (bytes): %llu\n"
- "compressed data:\n"
- " nr extents: %llu\n"
- " compressed size (bytes): %llu\n"
- " uncompressed size (bytes): %llu\n",
- nr_uncompressed_extents,
- uncompressed_sectors << 9,
- nr_compressed_extents,
- compressed_sectors_compressed << 9,
- compressed_sectors_uncompressed << 9);
+ bch2_trans_iter_exit(&trans, &iter);
+ }
+
+ bch2_trans_exit(&trans);
+
+ if (ret)
+ return ret;
+
+ pr_buf(out, "uncompressed:\n");
+ pr_buf(out, " nr extents: %llu\n", nr_uncompressed_extents);
+ pr_buf(out, " size: ");
+ bch2_hprint(out, uncompressed_sectors << 9);
+ pr_buf(out, "\n");
+
+ pr_buf(out, "compressed:\n");
+ pr_buf(out, " nr extents: %llu\n", nr_compressed_extents);
+ pr_buf(out, " compressed size: ");
+ bch2_hprint(out, compressed_sectors_compressed << 9);
+ pr_buf(out, "\n");
+ pr_buf(out, " uncompressed size: ");
+ bch2_hprint(out, compressed_sectors_uncompressed << 9);
+ pr_buf(out, "\n");
+
+ pr_buf(out, "incompressible:\n");
+ pr_buf(out, " nr extents: %llu\n", nr_incompressible_extents);
+ pr_buf(out, " size: ");
+ bch2_hprint(out, incompressible_sectors << 9);
+ pr_buf(out, "\n");
+ return 0;
+}
+
+static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ pr_buf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
+ bch2_bpos_to_text(out, c->gc_gens_pos);
+ pr_buf(out, "\n");
}
SHOW(bch2_fs)
sysfs_print(minor, c->minor);
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
- sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
- sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
-
- sysfs_print(block_size, block_bytes(c));
- sysfs_print(btree_node_size, btree_bytes(c));
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
+ sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
sysfs_print(read_realloc_races,
atomic_long_read(&c->read_realloc_races));
+ sysfs_print(extent_migrate_done,
+ atomic_long_read(&c->extent_migrate_done));
+ sysfs_print(extent_migrate_raced,
+ atomic_long_read(&c->extent_migrate_raced));
+ sysfs_print(bucket_alloc_fail,
+ atomic_long_read(&c->bucket_alloc_fail));
- sysfs_printf(foreground_write_ratelimit_enabled, "%i",
- c->foreground_write_ratelimit_enabled);
- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
- sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd);
+ sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
+
+ if (attr == &sysfs_gc_gens_pos)
+ bch2_gc_gens_pos_to_text(out, c);
- sysfs_print(pd_controllers_update_seconds,
- c->pd_controllers_update_seconds);
- sysfs_print(foreground_target_percent, c->foreground_target_percent);
+ sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
- sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
- sysfs_print(tiering_percent, c->tiering_percent);
+ sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
+ sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
+ sysfs_hprint(copy_gc_wait,
+ max(0LL, c->copygc_wait -
+ atomic64_read(&c->io_clock[WRITE].now)) << 9);
- sysfs_pd_controller_show(tiering, &c->tiers[1].pd); /* XXX */
+ if (attr == &sysfs_rebalance_work)
+ bch2_rebalance_work_to_text(out, c);
- sysfs_printf(meta_replicas_have, "%u", c->sb.meta_replicas_have);
- sysfs_printf(data_replicas_have, "%u", c->sb.data_replicas_have);
+ sysfs_print(promote_whole_extents, c->promote_whole_extents);
/* Debugging: */
- if (attr == &sysfs_alloc_debug)
- return show_fs_alloc_debug(c, buf);
-
if (attr == &sysfs_journal_debug)
- return bch2_journal_print_debug(&c->journal, buf);
+ bch2_journal_debug_to_text(out, &c->journal);
+
+ if (attr == &sysfs_btree_updates)
+ bch2_btree_updates_to_text(out, c);
+
+ if (attr == &sysfs_btree_cache)
+ bch2_btree_cache_to_text(out, c);
+
+ if (attr == &sysfs_btree_key_cache)
+ bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
- if (attr == &sysfs_journal_pins)
- return bch2_journal_print_pins(&c->journal, buf);
+ if (attr == &sysfs_btree_transactions)
+ bch2_btree_trans_to_text(out, c);
+
+ if (attr == &sysfs_stripes_heap)
+ bch2_stripes_heap_to_text(out, c);
+
+ if (attr == &sysfs_open_buckets)
+ bch2_open_buckets_to_text(out, c);
if (attr == &sysfs_compression_stats)
- return bch2_compression_stats(c, buf);
+ bch2_compression_stats_to_text(out, c);
+
+ if (attr == &sysfs_new_stripes)
+ bch2_new_stripes_to_text(out, c);
+
+ if (attr == &sysfs_io_timers_read)
+ bch2_io_timers_to_text(out, &c->io_clock[READ]);
+
+ if (attr == &sysfs_io_timers_write)
+ bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
-#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
- BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
+ if (attr == &sysfs_data_jobs)
+ data_progress_to_text(out, c);
return 0;
}
-STORE(__bch2_fs)
+STORE(bch2_fs)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
- sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
- sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
+ if (attr == &sysfs_btree_gc_periodic) {
+ ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
+ ?: (ssize_t) size;
- sysfs_strtoul(foreground_write_ratelimit_enabled,
- c->foreground_write_ratelimit_enabled);
+ wake_up_process(c->gc_thread);
+ return ret;
+ }
if (attr == &sysfs_copy_gc_enabled) {
- struct bch_dev *ca;
- unsigned i;
ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
?: (ssize_t) size;
- for_each_member_device(ca, c, i)
- if (ca->moving_gc_read)
- wake_up_process(ca->moving_gc_read);
+ if (c->copygc_thread)
+ wake_up_process(c->copygc_thread);
return ret;
}
- if (attr == &sysfs_tiering_enabled) {
- ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
+ if (attr == &sysfs_rebalance_enabled) {
+ ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
?: (ssize_t) size;
- bch2_tiering_start(c); /* issue wakeups */
+ rebalance_wakeup(c);
return ret;
}
- sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
-
- sysfs_strtoul(pd_controllers_update_seconds,
- c->pd_controllers_update_seconds);
- sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
+ sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
- sysfs_strtoul(tiering_percent, c->tiering_percent);
- sysfs_pd_controller_store(tiering, &c->tiers[1].pd); /* XXX */
+ sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
/* Debugging: */
-#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
- BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
-
- if (!bch2_fs_running(c))
+ if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
/* Debugging: */
- if (attr == &sysfs_trigger_journal_flush)
- bch2_journal_meta_async(&c->journal, NULL);
-
- if (attr == &sysfs_trigger_btree_coalesce)
- bch2_coalesce(c);
-
- if (attr == &sysfs_trigger_gc)
- bch2_gc(c);
+ if (!test_bit(BCH_FS_RW, &c->flags))
+ return -EROFS;
if (attr == &sysfs_prune_cache) {
struct shrink_control sc;
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
- c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
+ c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
}
- return size;
-}
-
-STORE(bch2_fs)
-{
- struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-
- mutex_lock(&c->state_lock);
- size = __bch2_fs_store(kobj, attr, buf, size);
- mutex_unlock(&c->state_lock);
+ if (attr == &sysfs_trigger_gc) {
+ /*
+ * Full gc is currently incompatible with btree key cache:
+ */
+#if 0
+ down_read(&c->state_lock);
+ bch2_gc(c, false, false);
+ up_read(&c->state_lock);
+#else
+ bch2_gc_gens(c);
+#endif
+ }
+#ifdef CONFIG_BCACHEFS_TESTS
+ if (attr == &sysfs_perf_test) {
+ char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
+ char *test = strsep(&p, " \t\n");
+ char *nr_str = strsep(&p, " \t\n");
+ char *threads_str = strsep(&p, " \t\n");
+ unsigned threads;
+ u64 nr;
+ int ret = -EINVAL;
+
+ if (threads_str &&
+ !(ret = kstrtouint(threads_str, 10, &threads)) &&
+ !(ret = bch2_strtoull_h(nr_str, &nr)))
+ ret = bch2_btree_perf_test(c, test, nr, threads);
+ kfree(tmp);
+
+ if (ret)
+ size = ret;
+ }
+#endif
return size;
}
SYSFS_OPS(bch2_fs);
struct attribute *bch2_fs_files[] = {
&sysfs_minor,
- &sysfs_block_size,
- &sysfs_btree_node_size,
&sysfs_btree_cache_size,
+ &sysfs_btree_avg_write_size,
- &sysfs_meta_replicas_have,
- &sysfs_data_replicas_have,
-
- &sysfs_journal_write_delay_ms,
- &sysfs_journal_reclaim_delay_ms,
-
- &sysfs_foreground_target_percent,
- &sysfs_tiering_percent,
+ &sysfs_promote_whole_extents,
&sysfs_compression_stats,
+
+#ifdef CONFIG_BCACHEFS_TESTS
+ &sysfs_perf_test,
+#endif
NULL
};
SHOW(bch2_fs_internal)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
- return bch2_fs_show(&c->kobj, attr, buf);
+ return bch2_fs_to_text(out, &c->kobj, attr);
}
STORE(bch2_fs_internal)
SYSFS_OPS(bch2_fs_internal);
struct attribute *bch2_fs_internal_files[] = {
- &sysfs_alloc_debug,
&sysfs_journal_debug,
- &sysfs_journal_pins,
-
- &sysfs_read_realloc_races,
+ &sysfs_btree_updates,
+ &sysfs_btree_cache,
+ &sysfs_btree_key_cache,
+ &sysfs_btree_transactions,
+ &sysfs_new_stripes,
+ &sysfs_stripes_heap,
+ &sysfs_open_buckets,
+ &sysfs_io_timers_read,
+ &sysfs_io_timers_write,
- &sysfs_trigger_journal_flush,
- &sysfs_trigger_btree_coalesce,
&sysfs_trigger_gc,
&sysfs_prune_cache,
- &sysfs_foreground_write_ratelimit_enabled,
+ &sysfs_read_realloc_races,
+ &sysfs_extent_migrate_done,
+ &sysfs_extent_migrate_raced,
+ &sysfs_bucket_alloc_fail,
+
+ &sysfs_gc_gens_pos,
+
&sysfs_copy_gc_enabled,
- &sysfs_tiering_enabled,
- sysfs_pd_controller_files(tiering),
- sysfs_pd_controller_files(foreground_write),
- &sysfs_internal_uuid,
+ &sysfs_copy_gc_wait,
+
+ &sysfs_rebalance_enabled,
+ &sysfs_rebalance_work,
+ sysfs_pd_controller_files(rebalance),
-#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
- BCH_DEBUG_PARAMS()
-#undef BCH_DEBUG_PARAM
+ &sysfs_data_jobs,
+ &sysfs_internal_uuid,
NULL
};
SHOW(bch2_fs_opts_dir)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+ const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+ int id = opt - bch2_opt_table;
+ u64 v = bch2_opt_get_by_id(&c->opts, id);
+
+ bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST);
+ pr_char(out, '\n');
- return bch2_opt_show(&c->opts, attr->name, buf, PAGE_SIZE);
+ return 0;
}
STORE(bch2_fs_opts_dir)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
- const struct bch_option *opt;
- int id;
+ const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+ int ret = size, id = opt - bch2_opt_table;
+ char *tmp;
u64 v;
- id = bch2_parse_sysfs_opt(attr->name, buf, &v);
- if (id < 0)
- return id;
-
- opt = &bch2_opt_table[id];
-
- mutex_lock(&c->sb_lock);
-
- if (id == Opt_compression) {
- int ret = bch2_check_set_has_compressed_data(c, v);
- if (ret) {
- mutex_unlock(&c->sb_lock);
- return ret;
- }
+ /*
+ * We don't need to take c->writes for correctness, but it eliminates an
+ * unsightly error message in the dmesg log when we're RO:
+ */
+ if (unlikely(!percpu_ref_tryget(&c->writes)))
+ return -EROFS;
+
+ tmp = kstrdup(buf, GFP_KERNEL);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto err;
}
- if (opt->set_sb != SET_NO_SB_OPT) {
- opt->set_sb(c->disk_sb, v);
- bch2_write_super(c);
- }
+ ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
+ kfree(tmp);
- bch2_opt_set(&c->opts, id, v);
+ if (ret < 0)
+ goto err;
- mutex_unlock(&c->sb_lock);
+ ret = bch2_opt_check_may_set(c, id, v);
+ if (ret < 0)
+ goto err;
- return size;
+ bch2_opt_set_sb(c, opt, v);
+ bch2_opt_set_by_id(&c->opts, id, v);
+
+ if ((id == Opt_background_target ||
+ id == Opt_background_compression) && v) {
+ bch2_rebalance_add_work(c, S64_MAX);
+ rebalance_wakeup(c);
+ }
+err:
+ percpu_ref_put(&c->writes);
+ return ret;
}
SYSFS_OPS(bch2_fs_opts_dir);
-struct attribute *bch2_fs_opts_dir_files[] = {
-#define BCH_OPT(_name, ...) \
- &sysfs_opt_##_name,
+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
- BCH_VISIBLE_OPTS()
-#undef BCH_OPT
+int bch2_opts_create_sysfs_files(struct kobject *kobj)
+{
+ const struct bch_option *i;
+ int ret;
- NULL
-};
+ for (i = bch2_opt_table;
+ i < bch2_opt_table + bch2_opts_nr;
+ i++) {
+ if (!(i->flags & OPT_FS))
+ continue;
+
+ ret = sysfs_create_file(kobj, &i->attr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
/* time stats */
{
struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_print_time_stats(&c->name##_time, name, \
- frequency_units, duration_units);
+#define x(name) \
+ if (attr == &sysfs_time_stat_##name) \
+ bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]);
BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+#undef x
return 0;
}
STORE(bch2_fs_time_stats)
{
- struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_clear_time_stats(&c->name##_time, name);
- BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
return size;
}
SYSFS_OPS(bch2_fs_time_stats);
struct attribute *bch2_fs_time_stats_files[] = {
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
+#define x(name) \
+ &sysfs_time_stat_##name,
BCH_TIME_STATS()
-#undef BCH_TIME_STAT
-
+#undef x
NULL
};
-typedef unsigned (bucket_map_fn)(struct bch_dev *, struct bucket *, void *);
-
-static unsigned bucket_priority_fn(struct bch_dev *ca, struct bucket *g,
- void *private)
-{
- int rw = (private ? 1 : 0);
-
- return ca->fs->prio_clock[rw].hand - g->prio[rw];
-}
-
-static unsigned bucket_sectors_used_fn(struct bch_dev *ca, struct bucket *g,
- void *private)
+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
{
- return bucket_sectors_used(g->mark);
+ struct bch_fs *c = ca->fs;
+ struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+ unsigned i, nr[BCH_DATA_NR];
+
+ memset(nr, 0, sizeof(nr));
+
+ for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
+ nr[c->open_buckets[i].data_type]++;
+
+ pr_buf(out,
+ "\t\t buckets\t sectors fragmented\n"
+ "capacity%16llu\n",
+ ca->mi.nbuckets - ca->mi.first_bucket);
+
+ for (i = 1; i < BCH_DATA_NR; i++)
+ pr_buf(out, "%-8s%16llu%16llu%16llu\n",
+ bch2_data_types[i], stats.d[i].buckets,
+ stats.d[i].sectors, stats.d[i].fragmented);
+
+ pr_buf(out,
+ "ec\t%16llu\n"
+ "available%15llu\n"
+ "\n"
+ "freelist_wait\t\t%s\n"
+ "open buckets allocated\t%u\n"
+ "open buckets this dev\t%u\n"
+ "open buckets total\t%u\n"
+ "open_buckets_wait\t%s\n"
+ "open_buckets_btree\t%u\n"
+ "open_buckets_user\t%u\n"
+ "btree reserve cache\t%u\n",
+ stats.buckets_ec,
+ __dev_buckets_available(ca, stats, RESERVE_none),
+ c->freelist_wait.list.first ? "waiting" : "empty",
+ OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
+ ca->nr_open_buckets,
+ OPEN_BUCKETS_COUNT,
+ c->open_buckets_wait.list.first ? "waiting" : "empty",
+ nr[BCH_DATA_btree],
+ nr[BCH_DATA_user],
+ c->btree_reserve_cache_nr);
}
-static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, struct bucket *g,
- void *private)
-{
- return bucket_gc_gen(ca, g);
-}
+static const char * const bch2_rw[] = {
+ "read",
+ "write",
+ NULL
+};
-static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
- bucket_map_fn *fn, void *private)
+static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
{
- int cmp(const void *l, const void *r)
- { return *((unsigned *) r) - *((unsigned *) l); }
-
- size_t n = ca->mi.nbuckets, i;
- /* Compute 31 quantiles */
- unsigned q[31], *p;
- ssize_t ret = 0;
-
- p = vzalloc(ca->mi.nbuckets * sizeof(unsigned));
- if (!p)
- return -ENOMEM;
-
- for (i = ca->mi.first_bucket; i < n; i++)
- p[i] = fn(ca, &ca->buckets[i], private);
-
- sort(p, n, sizeof(unsigned), cmp, NULL);
-
- while (n &&
- !p[n - 1])
- --n;
+ int rw, i;
- for (i = 0; i < ARRAY_SIZE(q); i++)
- q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
-
- vfree(p);
-
- for (i = 0; i < ARRAY_SIZE(q); i++)
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "%u ", q[i]);
- buf[ret - 1] = '\n';
-
- return ret;
+ for (rw = 0; rw < 2; rw++) {
+ pr_buf(out, "%s:\n", bch2_rw[rw]);
+ for (i = 1; i < BCH_DATA_NR; i++)
+ pr_buf(out, "%-12s:%12llu\n",
+ bch2_data_types[i],
+ percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
+ }
}
-static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
+SHOW(bch2_dev)
{
- enum alloc_reserve i;
- ssize_t ret;
-
- spin_lock(&ca->freelist_lock);
+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+ struct bch_fs *c = ca->fs;
- ret = scnprintf(buf, PAGE_SIZE,
- "free_inc:\t%zu\t%zu\n",
- fifo_used(&ca->free_inc),
- ca->free_inc.size);
+ sysfs_printf(uuid, "%pU\n", ca->uuid.b);
- for (i = 0; i < RESERVE_NR; i++)
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "free[%u]:\t%zu\t%zu\n", i,
- fifo_used(&ca->free[i]),
- ca->free[i].size);
+ sysfs_print(bucket_size, bucket_bytes(ca));
+ sysfs_print(first_bucket, ca->mi.first_bucket);
+ sysfs_print(nbuckets, ca->mi.nbuckets);
+ sysfs_print(durability, ca->mi.durability);
+ sysfs_print(discard, ca->mi.discard);
- spin_unlock(&ca->freelist_lock);
+ if (attr == &sysfs_label) {
+ if (ca->mi.group) {
+ mutex_lock(&c->sb_lock);
+ bch2_disk_path_to_text(out, c->disk_sb.sb,
+ ca->mi.group - 1);
+ mutex_unlock(&c->sb_lock);
+ }
- return ret;
-}
+ pr_char(out, '\n');
+ }
-static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
-{
- struct bch_fs *c = ca->fs;
- struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+ if (attr == &sysfs_has_data) {
+ bch2_flags_to_text(out, bch2_data_types,
+ bch2_dev_has_data(c, ca));
+ pr_char(out, '\n');
+ }
- return scnprintf(buf, PAGE_SIZE,
- "free_inc: %zu/%zu\n"
- "free[RESERVE_PRIO]: %zu/%zu\n"
- "free[RESERVE_BTREE]: %zu/%zu\n"
- "free[RESERVE_MOVINGGC]: %zu/%zu\n"
- "free[RESERVE_NONE]: %zu/%zu\n"
- "alloc: %llu/%llu\n"
- "meta: %llu/%llu\n"
- "dirty: %llu/%llu\n"
- "available: %llu/%llu\n"
- "freelist_wait: %s\n"
- "open buckets: %u/%u (reserved %u)\n"
- "open_buckets_wait: %s\n",
- fifo_used(&ca->free_inc), ca->free_inc.size,
- fifo_used(&ca->free[RESERVE_PRIO]), ca->free[RESERVE_PRIO].size,
- fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size,
- fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
- fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
- stats.buckets_alloc, ca->mi.nbuckets - ca->mi.first_bucket,
- stats.buckets_meta, ca->mi.nbuckets - ca->mi.first_bucket,
- stats.buckets_dirty, ca->mi.nbuckets - ca->mi.first_bucket,
- __dev_buckets_available(ca, stats), ca->mi.nbuckets - ca->mi.first_bucket,
- c->freelist_wait.list.first ? "waiting" : "empty",
- c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
- c->open_buckets_wait.list.first ? "waiting" : "empty");
-}
+ if (attr == &sysfs_state_rw) {
+ bch2_string_opt_to_text(out, bch2_member_states,
+ ca->mi.state);
+ pr_char(out, '\n');
+ }
-static u64 sectors_written(struct bch_dev *ca)
-{
- u64 ret = 0;
- int cpu;
+ if (attr == &sysfs_iodone)
+ dev_iodone_to_text(out, ca);
- for_each_possible_cpu(cpu)
- ret += *per_cpu_ptr(ca->sectors_written, cpu);
+ sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
+ sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
- return ret;
-}
+ if (attr == &sysfs_io_latency_stats_read)
+ bch2_time_stats_to_text(out, &ca->io_latency[READ]);
-SHOW(bch2_dev)
-{
- struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
- struct bch_fs *c = ca->fs;
- struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+ if (attr == &sysfs_io_latency_stats_write)
+ bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
- sysfs_printf(uuid, "%pU\n", ca->uuid.b);
+ sysfs_printf(congested, "%u%%",
+ clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
+ * 100 / CONGESTED_MAX);
- sysfs_print(bucket_size, bucket_bytes(ca));
- sysfs_print(block_size, block_bytes(c));
- sysfs_print(first_bucket, ca->mi.first_bucket);
- sysfs_print(nbuckets, ca->mi.nbuckets);
- sysfs_print(discard, ca->mi.discard);
- sysfs_hprint(written, sectors_written(ca) << 9);
- sysfs_hprint(btree_written,
- atomic64_read(&ca->btree_sectors_written) << 9);
- sysfs_hprint(metadata_written,
- (atomic64_read(&ca->meta_sectors_written) +
- atomic64_read(&ca->btree_sectors_written)) << 9);
-
- sysfs_hprint(dirty_data, stats.sectors[S_DIRTY] << 9);
- sysfs_print(dirty_bytes, stats.sectors[S_DIRTY] << 9);
- sysfs_print(dirty_buckets, stats.buckets_dirty);
- sysfs_hprint(cached_data, stats.sectors[S_CACHED] << 9);
- sysfs_print(cached_bytes, stats.sectors[S_CACHED] << 9);
- sysfs_print(cached_buckets, stats.buckets_cached);
- sysfs_print(meta_buckets, stats.buckets_meta);
- sysfs_print(alloc_buckets, stats.buckets_alloc);
- sysfs_print(available_buckets, dev_buckets_available(ca));
- sysfs_print(free_buckets, dev_buckets_free(ca));
- sysfs_print(has_data, ca->mi.has_data);
- sysfs_print(has_metadata, ca->mi.has_metadata);
-
- sysfs_pd_controller_show(copy_gc, &ca->moving_gc_pd);
-
- if (attr == &sysfs_cache_replacement_policy)
- return bch2_snprint_string_list(buf, PAGE_SIZE,
- bch2_cache_replacement_policies,
- ca->mi.replacement);
-
- sysfs_print(tier, ca->mi.tier);
-
- if (attr == &sysfs_state_rw)
- return bch2_snprint_string_list(buf, PAGE_SIZE,
- bch2_dev_state,
- ca->mi.state);
-
- if (attr == &sysfs_read_priority_stats)
- return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
- if (attr == &sysfs_write_priority_stats)
- return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
- if (attr == &sysfs_fragmentation_stats)
- return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
- if (attr == &sysfs_oldest_gen_stats)
- return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
- if (attr == &sysfs_reserve_stats)
- return show_reserve_stats(ca, buf);
if (attr == &sysfs_alloc_debug)
- return show_dev_alloc_debug(ca, buf);
+ dev_alloc_debug_to_text(out, ca);
return 0;
}
struct bch_fs *c = ca->fs;
struct bch_member *mi;
- sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd);
-
if (attr == &sysfs_discard) {
bool v = strtoul_or_return(buf);
mutex_lock(&c->sb_lock);
- mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+ mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
if (v != BCH_MEMBER_DISCARD(mi)) {
SET_BCH_MEMBER_DISCARD(mi, v);
mutex_unlock(&c->sb_lock);
}
- if (attr == &sysfs_cache_replacement_policy) {
- ssize_t v = bch2_read_string_list(buf, bch2_cache_replacement_policies);
-
- if (v < 0)
- return v;
-
- mutex_lock(&c->sb_lock);
- mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+ if (attr == &sysfs_label) {
+ char *tmp;
+ int ret;
- if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
- SET_BCH_MEMBER_REPLACEMENT(mi, v);
- bch2_write_super(c);
- }
- mutex_unlock(&c->sb_lock);
- }
-
- if (attr == &sysfs_tier) {
- unsigned prev_tier;
- unsigned v = strtoul_restrict_or_return(buf,
- 0, BCH_TIER_MAX - 1);
-
- mutex_lock(&c->sb_lock);
- prev_tier = ca->mi.tier;
+ tmp = kstrdup(buf, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
- if (v == ca->mi.tier) {
- mutex_unlock(&c->sb_lock);
- return size;
- }
-
- mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
- SET_BCH_MEMBER_TIER(mi, v);
- bch2_write_super(c);
-
- bch2_dev_group_remove(&c->tiers[prev_tier].devs, ca);
- bch2_dev_group_add(&c->tiers[ca->mi.tier].devs, ca);
- mutex_unlock(&c->sb_lock);
-
- bch2_recalc_capacity(c);
- bch2_tiering_start(c);
+ ret = bch2_dev_group_set(c, ca, strim(tmp));
+ kfree(tmp);
+ if (ret)
+ return ret;
}
return size;
struct attribute *bch2_dev_files[] = {
&sysfs_uuid,
&sysfs_bucket_size,
- &sysfs_block_size,
&sysfs_first_bucket,
&sysfs_nbuckets,
+ &sysfs_durability,
/* settings: */
&sysfs_discard,
- &sysfs_cache_replacement_policy,
- &sysfs_tier,
&sysfs_state_rw,
+ &sysfs_label,
&sysfs_has_data,
- &sysfs_has_metadata,
-
- /* io stats: */
- &sysfs_written,
- &sysfs_btree_written,
- &sysfs_metadata_written,
-
- /* alloc info - data: */
- &sysfs_dirty_data,
- &sysfs_dirty_bytes,
- &sysfs_cached_data,
- &sysfs_cached_bytes,
-
- /* alloc info - buckets: */
- &sysfs_available_buckets,
- &sysfs_free_buckets,
- &sysfs_dirty_buckets,
- &sysfs_cached_buckets,
- &sysfs_meta_buckets,
- &sysfs_alloc_buckets,
-
- /* alloc info - other stats: */
- &sysfs_read_priority_stats,
- &sysfs_write_priority_stats,
- &sysfs_fragmentation_stats,
- &sysfs_oldest_gen_stats,
- &sysfs_reserve_stats,
+ &sysfs_iodone,
+
+ &sysfs_io_latency_read,
+ &sysfs_io_latency_write,
+ &sysfs_io_latency_stats_read,
+ &sysfs_io_latency_stats_write,
+ &sysfs_congested,
/* debug: */
&sysfs_alloc_debug,
-
- sysfs_pd_controller_files(copy_gc),
NULL
};
+
+#endif /* _BCACHEFS_SYSFS_H_ */