X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fdebug.c;h=de5bfc0d46844166b3543b4bf3ee1a7a841bbc3a;hb=1f79cf3825e94fcb146d417b6dda9b94c93c7a53;hp=05cae0ed41ae7c07361e0b512c6fb61e5464891e;hpb=bad0c8c50758b4447d529f61017c1a8c85976a3e;p=bcachefs-tools-debian diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 05cae0e..de5bfc0 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -11,6 +11,7 @@ #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" +#include "btree_locking.h" #include "btree_update.h" #include "buckets.h" #include "debug.h" @@ -18,13 +19,11 @@ #include "extents.h" #include "fsck.h" #include "inode.h" -#include "io.h" #include "super.h" #include #include #include -#include #include #include @@ -39,7 +38,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, struct bset *sorted, *inmemory = &b->data->keys; struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev); struct bio *bio; - bool failed = false; + bool failed = false, saw_error = false; if (!bch2_dev_get_ioref(ca, READ)) return false; @@ -47,7 +46,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, bio = bio_alloc_bioset(ca->disk_sb.bdev, buf_pages(n_sorted, btree_bytes(c)), REQ_OP_READ|REQ_META, - GFP_NOIO, + GFP_NOFS, &c->btree_bio); bio->bi_iter.bi_sector = pick.ptr.offset; bch2_bio_map(bio, n_sorted, btree_bytes(c)); @@ -60,7 +59,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, memcpy(n_ondisk, n_sorted, btree_bytes(c)); v->written = 0; - if (bch2_btree_node_read_done(c, ca, v, false)) + if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) return false; n_sorted = c->verify_data->data; @@ -153,11 +152,9 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) BUG_ON(b->nsets != 1); - for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_next(k)) - if (k->type == KEY_TYPE_btree_ptr_v2) { - struct bch_btree_ptr_v2 *v = (void *) bkeyp_val(&b->format, k); - v->mem_ptr = 0; - } + for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k)) + if (k->type == KEY_TYPE_btree_ptr_v2) + ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0; v = c->verify_data; bkey_copy(&v->key, &b->key); @@ -181,6 +178,125 @@ out: bch2_btree_node_io_unlock(b); } +void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, + const struct btree *b) +{ + struct btree_node *n_ondisk = NULL; + struct extent_ptr_decoded pick; + struct bch_dev *ca; + struct bio *bio = NULL; + unsigned offset = 0; + int ret; + + if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) { + prt_printf(out, "error getting device to read from: invalid device\n"); + return; + } + + ca = bch_dev_bkey_exists(c, pick.ptr.dev); + if (!bch2_dev_get_ioref(ca, READ)) { + prt_printf(out, "error getting device to read from: not online\n"); + return; + } + + n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); + if (!n_ondisk) { + prt_printf(out, "memory allocation failure\n"); + goto out; + } + + bio = bio_alloc_bioset(ca->disk_sb.bdev, + buf_pages(n_ondisk, btree_bytes(c)), + REQ_OP_READ|REQ_META, + GFP_NOFS, + &c->btree_bio); + bio->bi_iter.bi_sector = pick.ptr.offset; + bch2_bio_map(bio, n_ondisk, btree_bytes(c)); + + ret = submit_bio_wait(bio); + if (ret) { + prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret)); + goto out; + } + + while (offset < btree_sectors(c)) { + struct bset *i; + struct nonce nonce; + struct bch_csum csum; + struct bkey_packed *k; + unsigned sectors; + + if (!offset) { + i = &n_ondisk->keys; + + if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { + prt_printf(out, "unknown checksum type at offset %u: %llu\n", + offset, BSET_CSUM_TYPE(i)); + goto out; + } + + nonce = btree_nonce(i, offset << 9); + csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk); + + if (bch2_crc_cmp(csum, n_ondisk->csum)) { + prt_printf(out, "invalid checksum\n"); + goto out; + } + + bset_encrypt(c, i, offset << 9); + + sectors = vstruct_sectors(n_ondisk, c->block_bits); + } else { + struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9); + + i = &bne->keys; + + if (i->seq != n_ondisk->keys.seq) + break; + + if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { + prt_printf(out, "unknown checksum type at offset %u: %llu\n", + offset, BSET_CSUM_TYPE(i)); + goto out; + } + + nonce = btree_nonce(i, offset << 9); + csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); + + if (bch2_crc_cmp(csum, bne->csum)) { + prt_printf(out, "invalid checksum"); + goto out; + } + + bset_encrypt(c, i, offset << 9); + + sectors = vstruct_sectors(bne, c->block_bits); + } + + prt_printf(out, " offset %u version %u, journal seq %llu\n", + offset, + le16_to_cpu(i->version), + le64_to_cpu(i->journal_seq)); + offset += sectors; + + printbuf_indent_add(out, 4); + + for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { + struct bkey u; + + bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); + prt_newline(out); + } + + printbuf_indent_sub(out, 4); + } +out: + if (bio) + bio_put(bio); + kvpfree(n_ondisk, btree_bytes(c)); + percpu_ref_put(&ca->io_ref); +} + #ifdef CONFIG_DEBUG_FS /* XXX: bch_fs refcounting */ @@ -189,6 +305,7 @@ struct dump_iter { struct bch_fs *c; enum btree_id id; struct bpos from; + struct bpos prev_node; u64 iter; struct printbuf buf; @@ -198,23 +315,23 @@ struct dump_iter { ssize_t ret; /* bytes read so far */ }; -static int flush_buf(struct dump_iter *i) +static ssize_t flush_buf(struct dump_iter *i) { if (i->buf.pos) { size_t bytes = min_t(size_t, i->buf.pos, i->size); - int err = copy_to_user(i->ubuf, i->buf.buf, bytes); + int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes); - if (err) - return err; + i->ret += copied; + i->ubuf += copied; + i->size -= copied; + i->buf.pos -= copied; + memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos); - i->ret += bytes; - i->ubuf += bytes; - i->size -= bytes; - i->buf.pos -= bytes; - memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos); + if (copied != bytes) + return -EFAULT; } - return 0; + return i->size ? 0 : i->ret; } static int bch2_dump_open(struct inode *inode, struct file *file) @@ -249,48 +366,23 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - int err; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - return i->ret; - - bch2_trans_init(&trans, i->c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(&iter); - - while (k.k && !(err = bkey_err(k))) { - bch2_bkey_val_to_text(&i->buf, i->c, k); - prt_char(&i->buf, '\n'); - - k = bch2_btree_iter_next(&iter); - i->from = iter.pos; - - err = flush_buf(i); - if (err) - break; - - if (!i->size) - break; - } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - - return err < 0 ? err : i->ret; + return flush_buf(i) ?: + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + bch2_bkey_val_to_text(&i->buf, i->c, k); + prt_newline(&i->buf); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + flush_buf(i); + }))) ?: + i->ret; } static const struct file_operations btree_debug_ops = { @@ -304,46 +396,47 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct btree *b; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; + ret = flush_buf(i); + if (ret) + return ret; - if (!i->size || !bpos_cmp(SPOS_MAX, i->from)) + if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - bch2_trans_init(&trans, i->c, 0, 0); + trans = bch2_trans_get(i->c); +retry: + bch2_trans_begin(trans); - for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) { + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { bch2_btree_node_to_text(&i->buf, i->c, b); - err = flush_buf(i); - if (err) - break; - - /* - * can't easily correctly restart a btree node traversal across - * all nodes, meh - */ - i->from = bpos_cmp(SPOS_MAX, b->key.k.p) + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) ? bpos_successor(b->key.k.p) : b->key.k.p; - if (!i->size) + ret = drop_locks_do(trans, flush_buf(i)); + if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); - return err < 0 ? err : i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations btree_format_debug_ops = { @@ -357,63 +450,32 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct btree *prev_node = NULL; - int err; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - return i->ret; - - bch2_trans_init(&trans, i->c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - - while ((k = bch2_btree_iter_peek(&iter)).k && - !(err = bkey_err(k))) { - struct btree_path_level *l = &iter.path->l[0]; - struct bkey_packed *_k = - bch2_btree_node_iter_peek(&l->iter, l->b); - - if (l->b != prev_node) { - bch2_btree_node_to_text(&i->buf, i->c, l->b); - err = flush_buf(i); - if (err) - break; - } - prev_node = l->b; - - bch2_bfloat_to_text(&i->buf, l->b, _k); - err = flush_buf(i); - if (err) - break; - - bch2_btree_iter_advance(&iter); - i->from = iter.pos; - - err = flush_buf(i); - if (err) - break; - - if (!i->size) - break; - } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - - return err < 0 ? err : i->ret; + return flush_buf(i) ?: + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + struct btree_path_level *l = + &btree_iter_path(trans, &iter)->l[0]; + struct bkey_packed *_k = + bch2_btree_node_iter_peek(&l->iter, l->b); + + if (bpos_gt(l->b->key.k.p, i->prev_node)) { + bch2_btree_node_to_text(&i->buf, i->c, l->b); + i->prev_node = l->b->key.k.p; + } + + bch2_bfloat_to_text(&i->buf, l->b, _k); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + flush_buf(i); + }))) ?: + i->ret; } static const struct file_operations bfloat_failed_debug_ops = { @@ -426,11 +488,12 @@ static const struct file_operations bfloat_failed_debug_ops = { static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, struct btree *b) { - out->tabstops[0] = 32; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); prt_printf(out, "%px btree=%s l=%u ", b, - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level); prt_newline(out); @@ -483,7 +546,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; bool done = false; - int err; + ssize_t ret = 0; i->ubuf = buf; i->size = size; @@ -494,12 +557,9 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, struct rhash_head *pos; struct btree *b; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - break; + ret = flush_buf(i); + if (ret) + return ret; rcu_read_lock(); i->buf.atomic++; @@ -508,7 +568,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, if (i->iter < tbl->size) { rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) bch2_cached_btree_node_to_text(&i->buf, c, b); - i->iter++;; + i->iter++; } else { done = true; } @@ -517,9 +577,12 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, } while (!done); if (i->buf.allocation_failure) - return -ENOMEM; + ret = -ENOMEM; - return i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations cached_btree_nodes_ops = { @@ -529,67 +592,61 @@ static const struct file_operations cached_btree_nodes_ops = { .read = bch2_cached_btree_nodes_read, }; -static int prt_backtrace(struct printbuf *out, struct task_struct *task) -{ - unsigned long entries[32]; - unsigned i, nr_entries; - int ret; - - ret = down_read_killable(&task->signal->exec_update_lock); - if (ret) - return ret; - - nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0); - for (i = 0; i < nr_entries; i++) { - prt_printf(out, "[<0>] %pB", (void *)entries[i]); - prt_newline(out); - } - - up_read(&task->signal->exec_update_lock); - return 0; -} - static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; struct btree_trans *trans; - int err; + ssize_t ret = 0; + u32 seq; i->ubuf = buf; i->size = size; i->ret = 0; - - mutex_lock(&c->btree_trans_lock); +restart: + seqmutex_lock(&c->btree_trans_lock); list_for_each_entry(trans, &c->btree_trans_list, list) { - if (trans->task->pid <= i->iter) + struct task_struct *task = READ_ONCE(trans->locking_wait.task); + + if (!task || task->pid <= i->iter) continue; - err = flush_buf(i); - if (err) - return err; + closure_get(&trans->ref); + seq = seqmutex_seq(&c->btree_trans_lock); + seqmutex_unlock(&c->btree_trans_lock); - if (!i->size) - break; + ret = flush_buf(i); + if (ret) { + closure_put(&trans->ref); + goto unlocked; + } bch2_btree_trans_to_text(&i->buf, trans); prt_printf(&i->buf, "backtrace:"); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); - prt_backtrace(&i->buf, trans->task); + bch2_prt_task_backtrace(&i->buf, task); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); - i->iter = trans->task->pid; - } - mutex_unlock(&c->btree_trans_lock); + i->iter = task->pid; + + closure_put(&trans->ref); + if (!seqmutex_relock(&c->btree_trans_lock, seq)) + goto restart; + } + seqmutex_unlock(&c->btree_trans_lock); +unlocked: if (i->buf.allocation_failure) - return -ENOMEM; + ret = -ENOMEM; - return i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations btree_transactions_ops = { @@ -636,6 +693,173 @@ static const struct file_operations journal_pins_ops = { .read = bch2_journal_pins_read, }; +static int btree_transaction_stats_open(struct inode *inode, struct file *file) +{ + struct bch_fs *c = inode->i_private; + struct dump_iter *i; + + i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); + + if (!i) + return -ENOMEM; + + i->iter = 1; + i->c = c; + i->buf = PRINTBUF; + file->private_data = i; + + return 0; +} + +static int btree_transaction_stats_release(struct inode *inode, struct file *file) +{ + struct dump_iter *i = file->private_data; + + printbuf_exit(&i->buf); + kfree(i); + + return 0; +} + +static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + while (1) { + struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; + + err = flush_buf(i); + if (err) + return err; + + if (!i->size) + break; + + if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) || + !bch2_btree_transaction_fns[i->iter]) + break; + + prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]); + prt_newline(&i->buf); + printbuf_indent_add(&i->buf, 2); + + mutex_lock(&s->lock); + + prt_printf(&i->buf, "Max mem used: %u", s->max_mem); + prt_newline(&i->buf); + + prt_printf(&i->buf, "Transaction duration:"); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + bch2_time_stats_to_text(&i->buf, &s->duration); + printbuf_indent_sub(&i->buf, 2); + + if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { + prt_printf(&i->buf, "Lock hold times:"); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); + printbuf_indent_sub(&i->buf, 2); + } + + if (s->max_paths_text) { + prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + prt_str_indented(&i->buf, s->max_paths_text); + printbuf_indent_sub(&i->buf, 2); + } + + mutex_unlock(&s->lock); + + printbuf_indent_sub(&i->buf, 2); + prt_newline(&i->buf); + i->iter++; + } + + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; +} + +static const struct file_operations btree_transaction_stats_op = { + .owner = THIS_MODULE, + .open = btree_transaction_stats_open, + .release = btree_transaction_stats_release, + .read = btree_transaction_stats_read, +}; + +static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + struct btree_trans *trans; + ssize_t ret = 0; + u32 seq; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + if (i->iter) + goto out; +restart: + seqmutex_lock(&c->btree_trans_lock); + list_for_each_entry(trans, &c->btree_trans_list, list) { + struct task_struct *task = READ_ONCE(trans->locking_wait.task); + + if (!task || task->pid <= i->iter) + continue; + + closure_get(&trans->ref); + seq = seqmutex_seq(&c->btree_trans_lock); + seqmutex_unlock(&c->btree_trans_lock); + + ret = flush_buf(i); + if (ret) { + closure_put(&trans->ref); + goto out; + } + + bch2_check_for_deadlock(trans, &i->buf); + + i->iter = task->pid; + + closure_put(&trans->ref); + + if (!seqmutex_relock(&c->btree_trans_lock, seq)) + goto restart; + } + seqmutex_unlock(&c->btree_trans_lock); +out: + if (i->buf.allocation_failure) + ret = -ENOMEM; + + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; +} + +static const struct file_operations btree_deadlock_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_btree_deadlock_read, +}; + void bch2_fs_debug_exit(struct bch_fs *c) { if (!IS_ERR_OR_NULL(c->fs_debug_dir)) @@ -664,6 +888,12 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, c->btree_debug, &journal_pins_ops); + debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, + c, &btree_transaction_stats_op); + + debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir, + c->btree_debug, &btree_deadlock_ops); + c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); if (IS_ERR_OR_NULL(c->btree_debug_dir)) return; @@ -672,18 +902,18 @@ void bch2_fs_debug_init(struct bch_fs *c) bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); bd++) { bd->id = bd - c->btree_debug; - debugfs_create_file(bch2_btree_ids[bd->id], + debugfs_create_file(bch2_btree_id_str(bd->id), 0400, c->btree_debug_dir, bd, &btree_debug_ops); snprintf(name, sizeof(name), "%s-formats", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); debugfs_create_file(name, 0400, c->btree_debug_dir, bd, &btree_format_debug_ops); snprintf(name, sizeof(name), "%s-bfloat-failed", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); debugfs_create_file(name, 0400, c->btree_debug_dir, bd, &bfloat_failed_debug_ops);