#include "journal.h"
#include "keylist.h"
#include "move.h"
+#include "movinggc.h"
#include "nocow_locking.h"
#include "opts.h"
#include "rebalance.h"
#include "tests.h"
#include <linux/blkdev.h>
-#include <linux/pretty-printers.h>
#include <linux/sort.h>
#include <linux/sched/clock.h>
prt_human_readable_s64(out, val); \
} while (0)
-#define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var))
-#define var_print(_var) sysfs_print(_var, var(_var))
-#define var_hprint(_var) sysfs_hprint(_var, var(_var))
-
#define sysfs_strtoul(file, var) \
do { \
if (attr == &sysfs_ ## file) \
_v; \
})
-#define strtoul_restrict_or_return(cp, min, max) \
-({ \
- unsigned long __v = 0; \
- int _r = strtoul_safe_restrict(cp, __v, min, max); \
- if (_r) \
- return _r; \
- __v; \
-})
-
-#define strtoi_h_or_return(cp) \
-({ \
- u64 _v; \
- int _r = strtoi_h(cp, &_v); \
- if (_r) \
- return _r; \
- _v; \
-})
-
-#define sysfs_hatoi(file, var) \
-do { \
- if (attr == &sysfs_ ## file) \
- return strtoi_h(buf, &var) ?: (ssize_t) size; \
-} while (0)
-
write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
read_attribute(uuid);
read_attribute(minor);
+read_attribute(flags);
read_attribute(bucket_size);
read_attribute(first_bucket);
read_attribute(nbuckets);
rw_attribute(durability);
-read_attribute(iodone);
+read_attribute(io_done);
+read_attribute(io_errors);
+write_attribute(io_errors_reset);
read_attribute(io_latency_read);
read_attribute(io_latency_write);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
read_attribute(open_buckets);
+read_attribute(open_buckets_partial);
+read_attribute(write_points);
read_attribute(nocow_lock_table);
+#ifdef BCH_WRITE_REF_DEBUG
+read_attribute(write_refs);
+
+static const char * const bch2_write_refs[] = {
+#define x(n) #n,
+ BCH_WRITE_REFS()
+#undef x
+ NULL
+};
+
+static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ bch2_printbuf_tabstop_push(out, 24);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) {
+ prt_str(out, bch2_write_refs[i]);
+ prt_tab(out);
+ prt_printf(out, "%li", atomic_long_read(&c->writes[i]));
+ prt_newline(out);
+ }
+}
+#endif
+
read_attribute(internal_uuid);
+read_attribute(disk_groups);
read_attribute(has_data);
read_attribute(alloc_debug);
rw_attribute(rebalance_enabled);
sysfs_pd_controller_attribute(rebalance);
-read_attribute(rebalance_work);
+read_attribute(rebalance_status);
rw_attribute(promote_whole_extents);
read_attribute(new_stripes);
read_attribute(io_timers_read);
read_attribute(io_timers_write);
-read_attribute(data_jobs);
+read_attribute(moving_ctxts);
#ifdef CONFIG_BCACHEFS_TESTS
write_attribute(perf_test);
return ret;
}
-static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
-{
- long ret = 0;
- struct bch_move_stats *stats;
-
- mutex_lock(&c->data_progress_lock);
- list_for_each_entry(stats, &c->data_progress_list, list) {
- prt_printf(out, "%s: data type %s btree_id %s position: ",
- stats->name,
- bch2_data_types[stats->data_type],
- bch2_btree_ids[stats->btree_id]);
- bch2_bpos_to_text(out, stats->pos);
- prt_printf(out, "%s", "\n");
- }
-
- mutex_unlock(&c->data_progress_lock);
- return ret;
-}
-
static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_s_c k;
+ struct btree_trans *trans;
enum btree_id id;
- u64 nr_uncompressed_extents = 0,
- nr_compressed_extents = 0,
- nr_incompressible_extents = 0,
- uncompressed_sectors = 0,
- incompressible_sectors = 0,
- compressed_sectors_compressed = 0,
- compressed_sectors_uncompressed = 0;
- int ret;
+ struct compression_type_stats {
+ u64 nr_extents;
+ u64 sectors_compressed;
+ u64 sectors_uncompressed;
+ } s[BCH_COMPRESSION_TYPE_NR];
+ u64 compressed_incompressible = 0;
+ int ret = 0;
+
+ memset(s, 0, sizeof(s));
- if (!test_bit(BCH_FS_STARTED, &c->flags))
+ if (!test_bit(BCH_FS_started, &c->flags))
return -EPERM;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
for (id = 0; id < BTREE_ID_NR; id++) {
if (!btree_type_has_ptrs(id))
continue;
- for_each_btree_key(&trans, iter, id, POS_MIN,
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ ret = for_each_btree_key(trans, iter, id, POS_MIN,
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- bool compressed = false, uncompressed = false, incompressible = false;
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- switch (p.crc.compression_type) {
- case BCH_COMPRESSION_TYPE_none:
- uncompressed = true;
- uncompressed_sectors += k.k->size;
- break;
- case BCH_COMPRESSION_TYPE_incompressible:
- incompressible = true;
- incompressible_sectors += k.k->size;
- break;
- default:
- compressed_sectors_compressed +=
- p.crc.compressed_size;
- compressed_sectors_uncompressed +=
- p.crc.uncompressed_size;
- compressed = true;
- break;
+ bool compressed = false, incompressible = false;
+
+ bkey_for_each_crc(k.k, ptrs, crc, entry) {
+ incompressible |= crc.compression_type == BCH_COMPRESSION_TYPE_incompressible;
+ compressed |= crc_is_compressed(crc);
+
+ if (crc_is_compressed(crc)) {
+ s[crc.compression_type].nr_extents++;
+ s[crc.compression_type].sectors_compressed += crc.compressed_size;
+ s[crc.compression_type].sectors_uncompressed += crc.uncompressed_size;
}
}
- if (incompressible)
- nr_incompressible_extents++;
- else if (uncompressed)
- nr_uncompressed_extents++;
- else if (compressed)
- nr_compressed_extents++;
- }
- bch2_trans_iter_exit(&trans, &iter);
+ compressed_incompressible += compressed && incompressible;
+
+ if (!compressed) {
+ unsigned t = incompressible ? BCH_COMPRESSION_TYPE_incompressible : 0;
+
+ s[t].nr_extents++;
+ s[t].sectors_compressed += k.k->size;
+ s[t].sectors_uncompressed += k.k->size;
+ }
+ 0;
+ }));
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
return ret;
- prt_printf(out, "uncompressed:\n");
- prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents);
- prt_printf(out, " size: ");
- prt_human_readable_u64(out, uncompressed_sectors << 9);
- prt_printf(out, "\n");
+ prt_str(out, "type");
+ printbuf_tabstop_push(out, 12);
+ prt_tab(out);
- prt_printf(out, "compressed:\n");
- prt_printf(out, " nr extents: %llu\n", nr_compressed_extents);
- prt_printf(out, " compressed size: ");
- prt_human_readable_u64(out, compressed_sectors_compressed << 9);
- prt_printf(out, "\n");
- prt_printf(out, " uncompressed size: ");
- prt_human_readable_u64(out, compressed_sectors_uncompressed << 9);
- prt_printf(out, "\n");
+ prt_str(out, "compressed");
+ printbuf_tabstop_push(out, 16);
+ prt_tab_rjust(out);
+
+ prt_str(out, "uncompressed");
+ printbuf_tabstop_push(out, 16);
+ prt_tab_rjust(out);
+
+ prt_str(out, "average extent size");
+ printbuf_tabstop_push(out, 24);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(s); i++) {
+ prt_str(out, bch2_compression_types[i]);
+ prt_tab(out);
+
+ prt_human_readable_u64(out, s[i].sectors_compressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, s[i].sectors_uncompressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, s[i].nr_extents
+ ? div_u64(s[i].sectors_uncompressed << 9, s[i].nr_extents)
+ : 0);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ if (compressed_incompressible) {
+ prt_printf(out, "%llu compressed & incompressible extents", compressed_incompressible);
+ prt_newline(out);
+ }
- prt_printf(out, "incompressible:\n");
- prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents);
- prt_printf(out, " size: ");
- prt_human_readable_u64(out, incompressible_sectors << 9);
- prt_printf(out, "\n");
return 0;
}
static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
{
- prt_printf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
+ prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree));
bch2_bpos_to_text(out, c->gc_gens_pos);
prt_printf(out, "\n");
}
{
struct btree_trans *trans;
- mutex_lock(&c->btree_trans_lock);
+ seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
six_lock_wakeup_all(&b->lock);
}
- mutex_unlock(&c->btree_trans_lock);
+ seqmutex_unlock(&c->btree_trans_lock);
}
SHOW(bch2_fs)
sysfs_print(minor, c->minor);
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
+ if (attr == &sysfs_flags)
+ prt_bitflags(out, bch2_fs_flag_strs, c->flags);
+
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
if (attr == &sysfs_btree_write_stats)
sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
- sysfs_hprint(copy_gc_wait,
- max(0LL, c->copygc_wait -
- atomic64_read(&c->io_clock[WRITE].now)) << 9);
- if (attr == &sysfs_rebalance_work)
- bch2_rebalance_work_to_text(out, c);
+ if (attr == &sysfs_copy_gc_wait)
+ bch2_copygc_wait_to_text(out, c);
+
+ if (attr == &sysfs_rebalance_status)
+ bch2_rebalance_status_to_text(out, c);
sysfs_print(promote_whole_extents, c->promote_whole_extents);
bch2_btree_updates_to_text(out, c);
if (attr == &sysfs_btree_cache)
- bch2_btree_cache_to_text(out, &c->btree_cache);
+ bch2_btree_cache_to_text(out, c);
if (attr == &sysfs_btree_key_cache)
bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c);
+ if (attr == &sysfs_open_buckets_partial)
+ bch2_open_buckets_partial_to_text(out, c);
+
+ if (attr == &sysfs_write_points)
+ bch2_write_points_to_text(out, c);
+
if (attr == &sysfs_compression_stats)
bch2_compression_stats_to_text(out, c);
if (attr == &sysfs_io_timers_write)
bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
- if (attr == &sysfs_data_jobs)
- data_progress_to_text(out, c);
+ if (attr == &sysfs_moving_ctxts)
+ bch2_fs_moving_ctxts_to_text(out, c);
+
+#ifdef BCH_WRITE_REF_DEBUG
+ if (attr == &sysfs_write_refs)
+ bch2_write_refs_to_text(out, c);
+#endif
if (attr == &sysfs_nocow_lock_table)
bch2_nocow_locks_to_text(out, &c->nocow_locks);
+ if (attr == &sysfs_disk_groups)
+ bch2_disk_groups_to_text(out, c);
+
return 0;
}
/* Debugging: */
- if (!test_bit(BCH_FS_STARTED, &c->flags))
+ if (!test_bit(BCH_FS_started, &c->flags))
return -EPERM;
/* Debugging: */
- if (!test_bit(BCH_FS_RW, &c->flags))
+ if (!test_bit(BCH_FS_rw, &c->flags))
return -EROFS;
if (attr == &sysfs_prune_cache) {
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
- c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
+ c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
}
if (attr == &sysfs_btree_wakeup)
counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
prt_printf(out, "since mount:"); \
prt_tab(out); \
- prt_human_readable_u64(out, counter_since_mount << 9); \
+ prt_human_readable_u64(out, counter_since_mount); \
prt_newline(out); \
\
prt_printf(out, "since filesystem creation:"); \
prt_tab(out); \
- prt_human_readable_u64(out, counter << 9); \
+ prt_human_readable_u64(out, counter); \
prt_newline(out); \
}
BCH_PERSISTENT_COUNTERS()
SYSFS_OPS(bch2_fs_internal);
struct attribute *bch2_fs_internal_files[] = {
+ &sysfs_flags,
&sysfs_journal_debug,
&sysfs_btree_updates,
&sysfs_btree_cache,
&sysfs_new_stripes,
&sysfs_stripes_heap,
&sysfs_open_buckets,
+ &sysfs_open_buckets_partial,
+ &sysfs_write_points,
+#ifdef BCH_WRITE_REF_DEBUG
+ &sysfs_write_refs,
+#endif
&sysfs_nocow_lock_table,
&sysfs_io_timers_read,
&sysfs_io_timers_write,
&sysfs_copy_gc_wait,
&sysfs_rebalance_enabled,
- &sysfs_rebalance_work,
+ &sysfs_rebalance_status,
sysfs_pd_controller_files(rebalance),
- &sysfs_data_jobs,
+ &sysfs_moving_ctxts,
&sysfs_internal_uuid,
+
+ &sysfs_disk_groups,
NULL
};
* We don't need to take c->writes for correctness, but it eliminates an
* unsightly error message in the dmesg log when we're RO:
*/
- if (unlikely(!percpu_ref_tryget_live(&c->writes)))
+ if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
return -EROFS;
tmp = kstrdup(buf, GFP_KERNEL);
bch2_opt_set_by_id(&c->opts, id, v);
if ((id == Opt_background_target ||
- id == Opt_background_compression) && v) {
- bch2_rebalance_add_work(c, S64_MAX);
- rebalance_wakeup(c);
- }
+ id == Opt_background_compression) && v)
+ bch2_set_rebalance_needs_scan(c, 0);
ret = size;
err:
- percpu_ref_put(&c->writes);
+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return ret;
}
SYSFS_OPS(bch2_fs_opts_dir);
for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
- prt_printf(out,
- "\t\t\t buckets\t sectors fragmented\n"
- "capacity\t%16llu\n",
- ca->mi.nbuckets - ca->mi.first_bucket);
-
- for (i = 0; i < BCH_DATA_NR; i++)
- prt_printf(out, "%-16s%16llu%16llu%16llu\n",
- bch2_data_types[i], stats.d[i].buckets,
- stats.d[i].sectors, stats.d[i].fragmented);
-
- prt_printf(out,
- "ec\t\t%16llu\n"
- "\n"
- "freelist_wait\t\t%s\n"
- "open buckets allocated\t%u\n"
- "open buckets this dev\t%u\n"
- "open buckets total\t%u\n"
- "open_buckets_wait\t%s\n"
- "open_buckets_btree\t%u\n"
- "open_buckets_user\t%u\n"
- "buckets_to_invalidate\t%llu\n"
- "btree reserve cache\t%u\n",
- stats.buckets_ec,
- c->freelist_wait.list.first ? "waiting" : "empty",
- OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
- ca->nr_open_buckets,
- OPEN_BUCKETS_COUNT,
- c->open_buckets_wait.list.first ? "waiting" : "empty",
- nr[BCH_DATA_btree],
- nr[BCH_DATA_user],
- should_invalidate_buckets(ca, stats),
- c->btree_reserve_cache_nr);
+ printbuf_tabstop_push(out, 8);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+
+ bch2_dev_usage_to_text(out, &stats);
+
+ prt_newline(out);
+
+ prt_printf(out, "reserves:");
+ prt_newline(out);
+ for (i = 0; i < BCH_WATERMARK_NR; i++) {
+ prt_str(out, bch2_watermarks[i]);
+ prt_tab(out);
+ prt_u64(out, bch2_dev_buckets_reserved(ca, i));
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 24);
+
+ prt_str(out, "freelist_wait");
+ prt_tab(out);
+ prt_str(out, c->freelist_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open buckets allocated");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
+ prt_newline(out);
+
+ prt_str(out, "open buckets this dev");
+ prt_tab(out);
+ prt_u64(out, ca->nr_open_buckets);
+ prt_newline(out);
+
+ prt_str(out, "open buckets total");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_wait");
+ prt_tab(out);
+ prt_str(out, c->open_buckets_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_btree");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_btree]);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_user");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_user]);
+ prt_newline(out);
+
+ prt_str(out, "buckets_to_invalidate");
+ prt_tab(out);
+ prt_u64(out, should_invalidate_buckets(ca, stats));
+ prt_newline(out);
+
+ prt_str(out, "btree reserve cache");
+ prt_tab(out);
+ prt_u64(out, c->btree_reserve_cache_nr);
+ prt_newline(out);
}
static const char * const bch2_rw[] = {
NULL
};
-static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
+static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
{
int rw, i;
sysfs_print(discard, ca->mi.discard);
if (attr == &sysfs_label) {
- if (ca->mi.group) {
- mutex_lock(&c->sb_lock);
- bch2_disk_path_to_text(out, c->disk_sb.sb,
- ca->mi.group - 1);
- mutex_unlock(&c->sb_lock);
- }
-
+ if (ca->mi.group)
+ bch2_disk_path_to_text(out, c, ca->mi.group - 1);
prt_char(out, '\n');
}
prt_char(out, '\n');
}
- if (attr == &sysfs_iodone)
- dev_iodone_to_text(out, ca);
+ if (attr == &sysfs_io_done)
+ dev_io_done_to_text(out, ca);
+
+ if (attr == &sysfs_io_errors)
+ bch2_dev_io_errors_to_text(out, ca);
sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
bool v = strtoul_or_return(buf);
mutex_lock(&c->sb_lock);
- mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
if (v != BCH_MEMBER_DISCARD(mi)) {
SET_BCH_MEMBER_DISCARD(mi, v);
u64 v = strtoul_or_return(buf);
mutex_lock(&c->sb_lock);
- mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
- if (v != BCH_MEMBER_DURABILITY(mi)) {
+ if (v + 1 != BCH_MEMBER_DURABILITY(mi)) {
SET_BCH_MEMBER_DURABILITY(mi, v + 1);
bch2_write_super(c);
}
return ret;
}
+ if (attr == &sysfs_io_errors_reset)
+ bch2_dev_errors_reset(ca);
+
return size;
}
SYSFS_OPS(bch2_dev);
&sysfs_label,
&sysfs_has_data,
- &sysfs_iodone,
+ &sysfs_io_done,
+ &sysfs_io_errors,
+ &sysfs_io_errors_reset,
&sysfs_io_latency_read,
&sysfs_io_latency_write,