#include "journal.h"
#include "keylist.h"
#include "move.h"
+#include "movinggc.h"
+#include "nocow_locking.h"
#include "opts.h"
#include "rebalance.h"
#include "replicas.h"
#include "tests.h"
#include <linux/blkdev.h>
-#include <linux/pretty-printers.h>
#include <linux/sort.h>
#include <linux/sched/clock.h>
#include "util.h"
#define SYSFS_OPS(type) \
-const struct sysfs_ops type ## _sysfs_ops = { \
+const struct sysfs_ops type ## _sysfs_ops = { \
.show = type ## _show, \
.store = type ## _store \
}
#define SHOW(fn) \
static ssize_t fn ## _to_text(struct printbuf *, \
- struct kobject *, struct attribute *);\
+ struct kobject *, struct attribute *); \
\
static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
char *buf) \
memcpy(buf, out.buf, ret); \
} \
printbuf_exit(&out); \
- return ret; \
+ return bch2_err_class(ret); \
} \
\
static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
struct attribute *attr)
#define STORE(fn) \
+static ssize_t fn ## _store_inner(struct kobject *, struct attribute *,\
+ const char *, size_t); \
+ \
static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
const char *buf, size_t size) \
+{ \
+ return bch2_err_class(fn##_store_inner(kobj, attr, buf, size)); \
+} \
+ \
+static ssize_t fn ## _store_inner(struct kobject *kobj, struct attribute *attr,\
+ const char *buf, size_t size)
#define __sysfs_attribute(_name, _mode) \
static struct attribute sysfs_##_name = \
{ .name = #_name, .mode = _mode }
-#define write_attribute(n) __sysfs_attribute(n, S_IWUSR)
-#define read_attribute(n) __sysfs_attribute(n, S_IRUGO)
-#define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR)
+#define write_attribute(n) __sysfs_attribute(n, 0200)
+#define read_attribute(n) __sysfs_attribute(n, 0444)
+#define rw_attribute(n) __sysfs_attribute(n, 0644)
#define sysfs_printf(file, fmt, ...) \
do { \
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
write_attribute(prune_cache);
+write_attribute(btree_wakeup);
rw_attribute(btree_gc_periodic);
rw_attribute(gc_gens_pos);
read_attribute(bucket_size);
read_attribute(first_bucket);
read_attribute(nbuckets);
-read_attribute(durability);
+rw_attribute(durability);
read_attribute(iodone);
read_attribute(io_latency_read);
read_attribute(io_latency_stats_write);
read_attribute(congested);
-read_attribute(btree_avg_write_size);
+read_attribute(btree_write_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
read_attribute(open_buckets);
+read_attribute(open_buckets_partial);
+read_attribute(write_points);
+read_attribute(nocow_lock_table);
+
+#ifdef BCH_WRITE_REF_DEBUG
+read_attribute(write_refs);
+
+static const char * const bch2_write_refs[] = {
+#define x(n) #n,
+ BCH_WRITE_REFS()
+#undef x
+ NULL
+};
+
+static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ bch2_printbuf_tabstop_push(out, 24);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) {
+ prt_str(out, bch2_write_refs[i]);
+ prt_tab(out);
+ prt_printf(out, "%li", atomic_long_read(&c->writes[i]));
+ prt_newline(out);
+ }
+}
+#endif
read_attribute(internal_uuid);
+read_attribute(disk_groups);
read_attribute(has_data);
read_attribute(alloc_debug);
read_attribute(io_timers_write);
read_attribute(data_jobs);
+read_attribute(moving_ctxts);
#ifdef CONFIG_BCACHEFS_TESTS
write_attribute(perf_test);
#define x(_name) \
static struct attribute sysfs_time_stat_##_name = \
- { .name = #_name, .mode = S_IRUGO };
+ { .name = #_name, .mode = 0444 };
BCH_TIME_STATS()
#undef x
static struct attribute sysfs_state_rw = {
.name = "state",
- .mode = S_IRUGO
+ .mode = 0444,
};
static size_t bch2_btree_cache_size(struct bch_fs *c)
return ret;
}
-static size_t bch2_btree_avg_write_size(struct bch_fs *c)
-{
- u64 nr = atomic64_read(&c->btree_writes_nr);
- u64 sectors = atomic64_read(&c->btree_writes_sectors);
-
- return nr ? div64_u64(sectors, nr) : 0;
-}
-
-static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
-{
- long ret = 0;
- struct bch_move_stats *stats;
-
- mutex_lock(&c->data_progress_lock);
- list_for_each_entry(stats, &c->data_progress_list, list) {
- prt_printf(out, "%s: data type %s btree_id %s position: ",
- stats->name,
- bch2_data_types[stats->data_type],
- bch2_btree_ids[stats->btree_id]);
- bch2_bpos_to_text(out, stats->pos);
- prt_printf(out, "%s", "\n");
- }
-
- mutex_unlock(&c->data_progress_lock);
- return ret;
-}
-
static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_trans trans;
bch2_trans_init(&trans, c, 0, 0);
for (id = 0; id < BTREE_ID_NR; id++) {
- if (!((1U << id) & BTREE_ID_HAS_PTRS))
+ if (!btree_type_has_ptrs(id))
continue;
for_each_btree_key(&trans, iter, id, POS_MIN,
prt_printf(out, "\n");
}
+static void bch2_btree_wakeup_all(struct bch_fs *c)
+{
+ struct btree_trans *trans;
+
+ seqmutex_lock(&c->btree_trans_lock);
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
+
+ if (b)
+ six_lock_wakeup_all(&b->lock);
+
+ }
+ seqmutex_unlock(&c->btree_trans_lock);
+}
+
SHOW(bch2_fs)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
- sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
+
+ if (attr == &sysfs_btree_write_stats)
+ bch2_btree_write_stats_to_text(out, c);
sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
- sysfs_hprint(copy_gc_wait,
- max(0LL, c->copygc_wait -
- atomic64_read(&c->io_clock[WRITE].now)) << 9);
+
+ if (attr == &sysfs_copy_gc_wait)
+ bch2_copygc_wait_to_text(out, c);
if (attr == &sysfs_rebalance_work)
bch2_rebalance_work_to_text(out, c);
bch2_btree_updates_to_text(out, c);
if (attr == &sysfs_btree_cache)
- bch2_btree_cache_to_text(out, c);
+ bch2_btree_cache_to_text(out, &c->btree_cache);
if (attr == &sysfs_btree_key_cache)
bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c);
+ if (attr == &sysfs_open_buckets_partial)
+ bch2_open_buckets_partial_to_text(out, c);
+
+ if (attr == &sysfs_write_points)
+ bch2_write_points_to_text(out, c);
+
if (attr == &sysfs_compression_stats)
bch2_compression_stats_to_text(out, c);
bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
if (attr == &sysfs_data_jobs)
- data_progress_to_text(out, c);
+ bch2_data_jobs_to_text(out, c);
+
+ if (attr == &sysfs_moving_ctxts)
+ bch2_fs_moving_ctxts_to_text(out, c);
+
+#ifdef BCH_WRITE_REF_DEBUG
+ if (attr == &sysfs_write_refs)
+ bch2_write_refs_to_text(out, c);
+#endif
+
+ if (attr == &sysfs_nocow_lock_table)
+ bch2_nocow_locks_to_text(out, &c->nocow_locks);
+
+ if (attr == &sysfs_disk_groups)
+ bch2_disk_groups_to_text(out, c);
return 0;
}
c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
}
+ if (attr == &sysfs_btree_wakeup)
+ bch2_btree_wakeup_all(c);
+
if (attr == &sysfs_trigger_gc) {
/*
* Full gc is currently incompatible with btree key cache:
struct attribute *bch2_fs_files[] = {
&sysfs_minor,
&sysfs_btree_cache_size,
- &sysfs_btree_avg_write_size,
+ &sysfs_btree_write_stats,
&sysfs_promote_whole_extents,
counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
prt_printf(out, "since mount:"); \
prt_tab(out); \
- prt_human_readable_u64(out, counter_since_mount << 9); \
+ prt_human_readable_u64(out, counter_since_mount); \
prt_newline(out); \
\
prt_printf(out, "since filesystem creation:"); \
prt_tab(out); \
- prt_human_readable_u64(out, counter << 9); \
+ prt_human_readable_u64(out, counter); \
prt_newline(out); \
}
BCH_PERSISTENT_COUNTERS()
SHOW(bch2_fs_internal)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
+
return bch2_fs_to_text(out, &c->kobj, attr);
}
STORE(bch2_fs_internal)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
+
return bch2_fs_store(&c->kobj, attr, buf, size);
}
SYSFS_OPS(bch2_fs_internal);
&sysfs_new_stripes,
&sysfs_stripes_heap,
&sysfs_open_buckets,
+ &sysfs_open_buckets_partial,
+ &sysfs_write_points,
+#ifdef BCH_WRITE_REF_DEBUG
+ &sysfs_write_refs,
+#endif
+ &sysfs_nocow_lock_table,
&sysfs_io_timers_read,
&sysfs_io_timers_write,
&sysfs_trigger_discards,
&sysfs_trigger_invalidates,
&sysfs_prune_cache,
+ &sysfs_btree_wakeup,
&sysfs_gc_gens_pos,
sysfs_pd_controller_files(rebalance),
&sysfs_data_jobs,
+ &sysfs_moving_ctxts,
&sysfs_internal_uuid,
+
+ &sysfs_disk_groups,
NULL
};
* We don't need to take c->writes for correctness, but it eliminates an
* unsightly error message in the dmesg log when we're RO:
*/
- if (unlikely(!percpu_ref_tryget_live(&c->writes)))
+ if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
return -EROFS;
tmp = kstrdup(buf, GFP_KERNEL);
ret = size;
err:
- percpu_ref_put(&c->writes);
+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return ret;
}
SYSFS_OPS(bch2_fs_opts_dir);
for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
- prt_printf(out,
- "\t\t\t buckets\t sectors fragmented\n"
- "capacity\t%16llu\n",
- ca->mi.nbuckets - ca->mi.first_bucket);
-
- for (i = 0; i < BCH_DATA_NR; i++)
- prt_printf(out, "%-16s%16llu%16llu%16llu\n",
- bch2_data_types[i], stats.d[i].buckets,
- stats.d[i].sectors, stats.d[i].fragmented);
-
- prt_printf(out,
- "ec\t\t%16llu\n"
- "\n"
- "freelist_wait\t\t%s\n"
- "open buckets allocated\t%u\n"
- "open buckets this dev\t%u\n"
- "open buckets total\t%u\n"
- "open_buckets_wait\t%s\n"
- "open_buckets_btree\t%u\n"
- "open_buckets_user\t%u\n"
- "buckets_to_invalidate\t%llu\n"
- "btree reserve cache\t%u\n",
- stats.buckets_ec,
- c->freelist_wait.list.first ? "waiting" : "empty",
- OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
- ca->nr_open_buckets,
- OPEN_BUCKETS_COUNT,
- c->open_buckets_wait.list.first ? "waiting" : "empty",
- nr[BCH_DATA_btree],
- nr[BCH_DATA_user],
- should_invalidate_buckets(ca, stats),
- c->btree_reserve_cache_nr);
+ printbuf_tabstop_push(out, 8);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+
+ prt_tab(out);
+ prt_str(out, "buckets");
+ prt_tab_rjust(out);
+ prt_str(out, "sectors");
+ prt_tab_rjust(out);
+ prt_str(out, "fragmented");
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ for (i = 0; i < BCH_DATA_NR; i++) {
+ prt_str(out, bch2_data_types[i]);
+ prt_tab(out);
+ prt_u64(out, stats.d[i].buckets);
+ prt_tab_rjust(out);
+ prt_u64(out, stats.d[i].sectors);
+ prt_tab_rjust(out);
+ prt_u64(out, stats.d[i].fragmented);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ prt_str(out, "ec");
+ prt_tab(out);
+ prt_u64(out, stats.buckets_ec);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_newline(out);
+
+ prt_printf(out, "reserves:");
+ prt_newline(out);
+ for (i = 0; i < BCH_WATERMARK_NR; i++) {
+ prt_str(out, bch2_watermarks[i]);
+ prt_tab(out);
+ prt_u64(out, bch2_dev_buckets_reserved(ca, i));
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 24);
+
+ prt_str(out, "freelist_wait");
+ prt_tab(out);
+ prt_str(out, c->freelist_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open buckets allocated");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
+ prt_newline(out);
+
+ prt_str(out, "open buckets this dev");
+ prt_tab(out);
+ prt_u64(out, ca->nr_open_buckets);
+ prt_newline(out);
+
+ prt_str(out, "open buckets total");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_wait");
+ prt_tab(out);
+ prt_str(out, c->open_buckets_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_btree");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_btree]);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_user");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_user]);
+ prt_newline(out);
+
+ prt_str(out, "buckets_to_invalidate");
+ prt_tab(out);
+ prt_u64(out, should_invalidate_buckets(ca, stats));
+ prt_newline(out);
+
+ prt_str(out, "btree reserve cache");
+ prt_tab(out);
+ prt_u64(out, c->btree_reserve_cache_nr);
+ prt_newline(out);
}
static const char * const bch2_rw[] = {
mutex_unlock(&c->sb_lock);
}
+ if (attr == &sysfs_durability) {
+ u64 v = strtoul_or_return(buf);
+
+ mutex_lock(&c->sb_lock);
+ mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+
+ if (v != BCH_MEMBER_DURABILITY(mi)) {
+ SET_BCH_MEMBER_DURABILITY(mi, v + 1);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+ }
+
if (attr == &sysfs_label) {
char *tmp;
int ret;