#include "btree_cache.h"
#include "btree_io.h"
#include "btree_iter.h"
+#include "btree_locking.h"
#include "btree_update.h"
#include "buckets.h"
#include "debug.h"
#include <linux/console.h>
#include <linux/debugfs.h>
#include <linux/module.h>
-#include <linux/pretty-printers.h>
#include <linux/random.h>
#include <linux/seq_file.h>
struct bset *sorted, *inmemory = &b->data->keys;
struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
struct bio *bio;
- bool failed = false;
+ bool failed = false, saw_error = false;
if (!bch2_dev_get_ioref(ca, READ))
return false;
memcpy(n_ondisk, n_sorted, btree_bytes(c));
v->written = 0;
- if (bch2_btree_node_read_done(c, ca, v, false))
+ if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
return false;
n_sorted = c->verify_data->data;
struct bch_fs *c;
enum btree_id id;
struct bpos from;
+ struct bpos prev_node;
u64 iter;
struct printbuf buf;
ssize_t ret; /* bytes read so far */
};
-static int flush_buf(struct dump_iter *i)
+static ssize_t flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos);
}
- return 0;
+ return i->size ? 0 : i->ret;
}
static int bch2_dump_open(struct inode *inode, struct file *file)
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- int err;
+ ssize_t ret;
i->ubuf = buf;
i->size = size;
i->ret = 0;
- err = flush_buf(i);
- if (err)
- return err;
-
- if (!i->size)
- return i->ret;
-
bch2_trans_init(&trans, i->c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, i->id, i->from,
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS);
- k = bch2_btree_iter_peek(&iter);
+ ret = for_each_btree_key2(&trans, iter, i->id, i->from,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ ret = flush_buf(i);
+ if (ret)
+ break;
- while (k.k && !(err = bkey_err(k))) {
bch2_bkey_val_to_text(&i->buf, i->c, k);
- prt_char(&i->buf, '\n');
-
- k = bch2_btree_iter_next(&iter);
- i->from = iter.pos;
-
- err = flush_buf(i);
- if (err)
- break;
+ prt_newline(&i->buf);
+ 0;
+ }));
+ i->from = iter.pos;
- if (!i->size)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+ if (!ret)
+ ret = flush_buf(i);
bch2_trans_exit(&trans);
- return err < 0 ? err : i->ret;
+ return ret ?: i->ret;
}
static const struct file_operations btree_debug_ops = {
struct btree_trans trans;
struct btree_iter iter;
struct btree *b;
- int err;
+ ssize_t ret;
i->ubuf = buf;
i->size = size;
i->ret = 0;
- err = flush_buf(i);
- if (err)
- return err;
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
- if (!i->size || !bpos_cmp(SPOS_MAX, i->from))
+ if (bpos_eq(SPOS_MAX, i->from))
return i->ret;
bch2_trans_init(&trans, i->c, 0, 0);
- for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) {
- bch2_btree_node_to_text(&i->buf, i->c, b);
- err = flush_buf(i);
- if (err)
+ for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) {
+ ret = flush_buf(i);
+ if (ret)
break;
- /*
- * can't easily correctly restart a btree node traversal across
- * all nodes, meh
- */
- i->from = bpos_cmp(SPOS_MAX, b->key.k.p)
+ bch2_btree_node_to_text(&i->buf, i->c, b);
+ i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
? bpos_successor(b->key.k.p)
: b->key.k.p;
-
- if (!i->size)
- break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
- return err < 0 ? err : i->ret;
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
}
static const struct file_operations btree_format_debug_ops = {
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- struct btree *prev_node = NULL;
- int err;
+ ssize_t ret;
i->ubuf = buf;
i->size = size;
i->ret = 0;
- err = flush_buf(i);
- if (err)
- return err;
-
- if (!i->size)
- return i->ret;
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
bch2_trans_init(&trans, i->c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, i->id, i->from,
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS);
-
- while ((k = bch2_btree_iter_peek(&iter)).k &&
- !(err = bkey_err(k))) {
+ ret = for_each_btree_key2(&trans, iter, i->id, i->from,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct btree_path_level *l = &iter.path->l[0];
struct bkey_packed *_k =
bch2_btree_node_iter_peek(&l->iter, l->b);
- if (l->b != prev_node) {
+ ret = flush_buf(i);
+ if (ret)
+ break;
+
+ if (bpos_gt(l->b->key.k.p, i->prev_node)) {
bch2_btree_node_to_text(&i->buf, i->c, l->b);
- err = flush_buf(i);
- if (err)
- break;
+ i->prev_node = l->b->key.k.p;
}
- prev_node = l->b;
bch2_bfloat_to_text(&i->buf, l->b, _k);
- err = flush_buf(i);
- if (err)
- break;
-
- bch2_btree_iter_advance(&iter);
- i->from = iter.pos;
-
- err = flush_buf(i);
- if (err)
- break;
-
- if (!i->size)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+ 0;
+ }));
+ i->from = iter.pos;
bch2_trans_exit(&trans);
- return err < 0 ? err : i->ret;
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
}
static const struct file_operations bfloat_failed_debug_ops = {
static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
struct btree *b)
{
- out->tabstops[0] = 32;
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 32);
prt_printf(out, "%px btree=%s l=%u ",
b,
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
bool done = false;
- int err;
+ ssize_t ret = 0;
i->ubuf = buf;
i->size = size;
struct rhash_head *pos;
struct btree *b;
- err = flush_buf(i);
- if (err)
- return err;
-
- if (!i->size)
- break;
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
rcu_read_lock();
i->buf.atomic++;
if (i->iter < tbl->size) {
rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
bch2_cached_btree_node_to_text(&i->buf, c, b);
- i->iter++;;
+ i->iter++;
} else {
done = true;
}
} while (!done);
if (i->buf.allocation_failure)
- return -ENOMEM;
+ ret = -ENOMEM;
- return i->ret;
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
}
static const struct file_operations cached_btree_nodes_ops = {
.read = bch2_cached_btree_nodes_read,
};
+#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
+static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ struct btree_trans *trans;
+ ssize_t ret = 0;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ mutex_lock(&c->btree_trans_lock);
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if (trans->locking_wait.task->pid <= i->iter)
+ continue;
+
+ ret = flush_buf(i);
+ if (ret)
+ break;
+
+ bch2_btree_trans_to_text(&i->buf, trans);
+
+ prt_printf(&i->buf, "backtrace:");
+ prt_newline(&i->buf);
+ printbuf_indent_add(&i->buf, 2);
+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task);
+ printbuf_indent_sub(&i->buf, 2);
+ prt_newline(&i->buf);
+
+ i->iter = trans->locking_wait.task->pid;
+ }
+ mutex_unlock(&c->btree_trans_lock);
+
+ if (i->buf.allocation_failure)
+ ret = -ENOMEM;
+
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
+}
+
+static const struct file_operations btree_transactions_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_transactions_read,
+};
+#endif /* CONFIG_BCACHEFS_DEBUG_TRANSACTIONS */
+
static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
.read = bch2_journal_pins_read,
};
+static int lock_held_stats_open(struct inode *inode, struct file *file)
+{
+ struct bch_fs *c = inode->i_private;
+ struct dump_iter *i;
+
+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+
+ if (!i)
+ return -ENOMEM;
+
+ i->iter = 0;
+ i->c = c;
+ i->buf = PRINTBUF;
+ file->private_data = i;
+
+ return 0;
+}
+
+static int lock_held_stats_release(struct inode *inode, struct file *file)
+{
+ struct dump_iter *i = file->private_data;
+
+ printbuf_exit(&i->buf);
+ kfree(i);
+
+ return 0;
+}
+
+static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ while (1) {
+ struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
+
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (!i->size)
+ break;
+
+ if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) ||
+ !bch2_btree_transaction_fns[i->iter])
+ break;
+
+ prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]);
+ prt_newline(&i->buf);
+ printbuf_indent_add(&i->buf, 2);
+
+ mutex_lock(&s->lock);
+
+ prt_printf(&i->buf, "Max mem used: %u", s->max_mem);
+ prt_newline(&i->buf);
+
+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+ prt_printf(&i->buf, "Lock hold times:");
+ prt_newline(&i->buf);
+
+ printbuf_indent_add(&i->buf, 2);
+ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
+ printbuf_indent_sub(&i->buf, 2);
+ }
+
+ if (s->max_paths_text) {
+ prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths);
+ prt_newline(&i->buf);
+
+ printbuf_indent_add(&i->buf, 2);
+ prt_str_indented(&i->buf, s->max_paths_text);
+ printbuf_indent_sub(&i->buf, 2);
+ }
+
+ mutex_unlock(&s->lock);
+
+ printbuf_indent_sub(&i->buf, 2);
+ prt_newline(&i->buf);
+ i->iter++;
+ }
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations lock_held_stats_op = {
+ .owner = THIS_MODULE,
+ .open = lock_held_stats_open,
+ .release = lock_held_stats_release,
+ .read = lock_held_stats_read,
+};
+
+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ struct btree_trans *trans;
+ ssize_t ret = 0;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (i->iter)
+ goto out;
+
+ mutex_lock(&c->btree_trans_lock);
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if (trans->locking_wait.task->pid <= i->iter)
+ continue;
+
+ ret = flush_buf(i);
+ if (ret)
+ break;
+
+ bch2_check_for_deadlock(trans, &i->buf);
+
+ i->iter = trans->locking_wait.task->pid;
+ }
+ mutex_unlock(&c->btree_trans_lock);
+out:
+ if (i->buf.allocation_failure)
+ ret = -ENOMEM;
+
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
+}
+
+static const struct file_operations btree_deadlock_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_deadlock_read,
+};
+
void bch2_fs_debug_exit(struct bch_fs *c)
{
if (!IS_ERR_OR_NULL(c->fs_debug_dir))
debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
c->btree_debug, &cached_btree_nodes_ops);
+#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
+ debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_transactions_ops);
+#endif
+
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
+ debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
+ c, &lock_held_stats_op);
+
+ debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_deadlock_ops);
+
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;