X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fdebug.c;h=b1f147e6be4d5cdd0ab491932db9c625b763e29e;hb=HEAD;hp=4e0d14e372874ffd621be7e020a17227c55b98b6;hpb=05408b6f8fea54bf53e68a4ef24291214970f6d0;p=bcachefs-tools-debian diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 4e0d14e..b1f147e 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -11,6 +11,7 @@ #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" +#include "btree_locking.h" #include "btree_update.h" #include "buckets.h" #include "debug.h" @@ -18,7 +19,6 @@ #include "extents.h" #include "fsck.h" #include "inode.h" -#include "io.h" #include "super.h" #include @@ -29,62 +29,41 @@ static struct dentry *bch_debug; -#ifdef CONFIG_BCACHEFS_DEBUG - -void __bch2_btree_verify(struct bch_fs *c, struct btree *b) +static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, + struct extent_ptr_decoded pick) { struct btree *v = c->verify_data; - struct btree_node *n_ondisk, *n_sorted, *n_inmemory; - struct bset *sorted, *inmemory; - struct extent_ptr_decoded pick; - struct bch_dev *ca; + struct btree_node *n_ondisk = c->verify_ondisk; + struct btree_node *n_sorted = c->verify_data->data; + struct bset *sorted, *inmemory = &b->data->keys; + struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev); struct bio *bio; + bool failed = false, saw_error = false; - if (c->opts.nochanges) - return; - - btree_node_io_lock(b); - mutex_lock(&c->verify_lock); - - n_ondisk = c->verify_ondisk; - n_sorted = c->verify_data->data; - n_inmemory = b->data; - - bkey_copy(&v->key, &b->key); - v->written = 0; - v->c.level = b->c.level; - v->c.btree_id = b->c.btree_id; - bch2_btree_keys_init(v, &c->expensive_debug_checks); - - if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), - NULL, &pick) <= 0) - return; - - ca = bch_dev_bkey_exists(c, pick.ptr.dev); if (!bch2_dev_get_ioref(ca, READ)) - return; + return false; - bio = bio_alloc_bioset(GFP_NOIO, - buf_pages(n_sorted, btree_bytes(c)), - &c->btree_bio); - bio_set_dev(bio, ca->disk_sb.bdev); - bio->bi_opf = REQ_OP_READ|REQ_META; + bio = bio_alloc_bioset(ca->disk_sb.bdev, + buf_pages(n_sorted, btree_buf_bytes(b)), + REQ_OP_READ|REQ_META, + GFP_NOFS, + &c->btree_bio); bio->bi_iter.bi_sector = pick.ptr.offset; - bch2_bio_map(bio, n_sorted, btree_bytes(c)); + bch2_bio_map(bio, n_sorted, btree_buf_bytes(b)); submit_bio_wait(bio); bio_put(bio); percpu_ref_put(&ca->io_ref); - memcpy(n_ondisk, n_sorted, btree_bytes(c)); + memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); - if (bch2_btree_node_read_done(c, v, false)) - goto out; + v->written = 0; + if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) + return false; n_sorted = c->verify_data->data; sorted = &n_sorted->keys; - inmemory = &n_inmemory->keys; if (inmemory->u64s != sorted->u64s || memcmp(inmemory->start, @@ -97,13 +76,13 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) console_lock(); printk(KERN_ERR "*** in memory:\n"); - bch2_dump_bset(b, inmemory, 0); + bch2_dump_bset(c, b, inmemory, 0); printk(KERN_ERR "*** read back in:\n"); - bch2_dump_bset(v, sorted, 0); + bch2_dump_bset(c, v, sorted, 0); - while (offset < b->written) { - if (!offset ) { + while (offset < v->written) { + if (!offset) { i = &n_ondisk->keys; sectors = vstruct_blocks(n_ondisk, c->block_bits) << c->block_bits; @@ -117,64 +96,242 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) } printk(KERN_ERR "*** on disk block %u:\n", offset); - bch2_dump_bset(b, i, offset); + bch2_dump_bset(c, b, i, offset); offset += sectors; } - printk(KERN_ERR "*** block %u/%u not written\n", - offset >> c->block_bits, btree_blocks(c)); - for (j = 0; j < le16_to_cpu(inmemory->u64s); j++) if (inmemory->_data[j] != sorted->_data[j]) break; - printk(KERN_ERR "b->written %u\n", b->written); - console_unlock(); - panic("verify failed at %u\n", j); + bch_err(c, "verify failed at key %u", j); + + failed = true; + } + + if (v->written != b->written) { + bch_err(c, "written wrong: expected %u, got %u", + b->written, v->written); + failed = true; + } + + return failed; +} + +void __bch2_btree_verify(struct bch_fs *c, struct btree *b) +{ + struct bkey_ptrs_c ptrs; + struct extent_ptr_decoded p; + const union bch_extent_entry *entry; + struct btree *v; + struct bset *inmemory = &b->data->keys; + struct bkey_packed *k; + bool failed = false; + + if (c->opts.nochanges) + return; + + bch2_btree_node_io_lock(b); + mutex_lock(&c->verify_lock); + + if (!c->verify_ondisk) { + c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL); + if (!c->verify_ondisk) + goto out; + } + + if (!c->verify_data) { + c->verify_data = __bch2_btree_node_mem_alloc(c); + if (!c->verify_data) + goto out; + + list_del_init(&c->verify_data->list); + } + + BUG_ON(b->nsets != 1); + + for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k)) + if (k->type == KEY_TYPE_btree_ptr_v2) + ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0; + + v = c->verify_data; + bkey_copy(&v->key, &b->key); + v->c.level = b->c.level; + v->c.btree_id = b->c.btree_id; + bch2_btree_keys_init(v); + + ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)); + bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry) + failed |= bch2_btree_verify_replica(c, b, p); + + if (failed) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf); + printbuf_exit(&buf); } out: mutex_unlock(&c->verify_lock); - btree_node_io_unlock(b); + bch2_btree_node_io_unlock(b); } -#endif +void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, + const struct btree *b) +{ + struct btree_node *n_ondisk = NULL; + struct extent_ptr_decoded pick; + struct bch_dev *ca; + struct bio *bio = NULL; + unsigned offset = 0; + int ret; + + if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) { + prt_printf(out, "error getting device to read from: invalid device\n"); + return; + } + + ca = bch_dev_bkey_exists(c, pick.ptr.dev); + if (!bch2_dev_get_ioref(ca, READ)) { + prt_printf(out, "error getting device to read from: not online\n"); + return; + } + + n_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL); + if (!n_ondisk) { + prt_printf(out, "memory allocation failure\n"); + goto out; + } + + bio = bio_alloc_bioset(ca->disk_sb.bdev, + buf_pages(n_ondisk, btree_buf_bytes(b)), + REQ_OP_READ|REQ_META, + GFP_NOFS, + &c->btree_bio); + bio->bi_iter.bi_sector = pick.ptr.offset; + bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b)); + + ret = submit_bio_wait(bio); + if (ret) { + prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret)); + goto out; + } + + while (offset < btree_sectors(c)) { + struct bset *i; + struct nonce nonce; + struct bch_csum csum; + struct bkey_packed *k; + unsigned sectors; + + if (!offset) { + i = &n_ondisk->keys; + + if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { + prt_printf(out, "unknown checksum type at offset %u: %llu\n", + offset, BSET_CSUM_TYPE(i)); + goto out; + } + + nonce = btree_nonce(i, offset << 9); + csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk); + + if (bch2_crc_cmp(csum, n_ondisk->csum)) { + prt_printf(out, "invalid checksum\n"); + goto out; + } + + bset_encrypt(c, i, offset << 9); + + sectors = vstruct_sectors(n_ondisk, c->block_bits); + } else { + struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9); + + i = &bne->keys; + + if (i->seq != n_ondisk->keys.seq) + break; + + if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { + prt_printf(out, "unknown checksum type at offset %u: %llu\n", + offset, BSET_CSUM_TYPE(i)); + goto out; + } + + nonce = btree_nonce(i, offset << 9); + csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); + + if (bch2_crc_cmp(csum, bne->csum)) { + prt_printf(out, "invalid checksum"); + goto out; + } + + bset_encrypt(c, i, offset << 9); + + sectors = vstruct_sectors(bne, c->block_bits); + } + + prt_printf(out, " offset %u version %u, journal seq %llu\n", + offset, + le16_to_cpu(i->version), + le64_to_cpu(i->journal_seq)); + offset += sectors; + + printbuf_indent_add(out, 4); + + for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { + struct bkey u; + + bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); + prt_newline(out); + } + + printbuf_indent_sub(out, 4); + } +out: + if (bio) + bio_put(bio); + kvfree(n_ondisk); + percpu_ref_put(&ca->io_ref); +} #ifdef CONFIG_DEBUG_FS /* XXX: bch_fs refcounting */ struct dump_iter { - struct bpos from; - struct bch_fs *c; + struct bch_fs *c; enum btree_id id; + struct bpos from; + struct bpos prev_node; + u64 iter; - char buf[PAGE_SIZE]; - size_t bytes; /* what's currently in buf */ + struct printbuf buf; char __user *ubuf; /* destination user buffer */ size_t size; /* size of requested read */ ssize_t ret; /* bytes read so far */ }; -static int flush_buf(struct dump_iter *i) +static ssize_t flush_buf(struct dump_iter *i) { - if (i->bytes) { - size_t bytes = min(i->bytes, i->size); - int err = copy_to_user(i->ubuf, i->buf, bytes); - - if (err) - return err; - - i->ret += bytes; - i->ubuf += bytes; - i->size -= bytes; - i->bytes -= bytes; - memmove(i->buf, i->buf + bytes, i->bytes); + if (i->buf.pos) { + size_t bytes = min_t(size_t, i->buf.pos, i->size); + int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes); + + i->ret += copied; + i->ubuf += copied; + i->size -= copied; + i->buf.pos -= copied; + memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos); + + if (copied != bytes) + return -EFAULT; } - return 0; + return i->size ? 0 : i->ret; } static int bch2_dump_open(struct inode *inode, struct file *file) @@ -188,15 +345,20 @@ static int bch2_dump_open(struct inode *inode, struct file *file) file->private_data = i; i->from = POS_MIN; + i->iter = 0; i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); i->id = bd->id; + i->buf = PRINTBUF; return 0; } static int bch2_dump_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct dump_iter *i = file->private_data; + + printbuf_exit(&i->buf); + kfree(i); return 0; } @@ -204,47 +366,23 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; - int err; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - return i->ret; - - bch2_trans_init(&trans, i->c, 0, 0); - - iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); - k = bch2_btree_iter_peek(iter); - - while (k.k && !(err = bkey_err(k))) { - bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); - i->bytes = strlen(i->buf); - BUG_ON(i->bytes >= PAGE_SIZE); - i->buf[i->bytes] = '\n'; - i->bytes++; - - k = bch2_btree_iter_next(iter); - i->from = iter->pos; - - err = flush_buf(i); - if (err) - break; - - if (!i->size) - break; - } - bch2_trans_exit(&trans); - - return err < 0 ? err : i->ret; + return flush_buf(i) ?: + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + bch2_bkey_val_to_text(&i->buf, i->c, k); + prt_newline(&i->buf); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + flush_buf(i); + }))) ?: + i->ret; } static const struct file_operations btree_debug_ops = { @@ -258,45 +396,47 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; - struct btree_iter *iter; + struct btree_trans *trans; + struct btree_iter iter; struct btree *b; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; + ret = flush_buf(i); + if (ret) + return ret; - if (!i->size || !bkey_cmp(POS_MAX, i->from)) + if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - bch2_trans_init(&trans, i->c, 0, 0); + trans = bch2_trans_get(i->c); +retry: + bch2_trans_begin(trans); - for_each_btree_node(&trans, iter, i->id, i->from, 0, b) { - bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); - i->bytes = strlen(i->buf); - err = flush_buf(i); - if (err) - break; - - /* - * can't easily correctly restart a btree node traversal across - * all nodes, meh - */ - i->from = bkey_cmp(POS_MAX, b->key.k.p) - ? bkey_successor(b->key.k.p) + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) : b->key.k.p; - if (!i->size) + ret = drop_locks_do(trans, flush_buf(i)); + if (ret) break; } - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; + + bch2_trans_put(trans); - return err < 0 ? err : i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations btree_format_debug_ops = { @@ -310,74 +450,420 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; - struct btree *prev_node = NULL; - int err; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; + return flush_buf(i) ?: + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + struct btree_path_level *l = + &btree_iter_path(trans, &iter)->l[0]; + struct bkey_packed *_k = + bch2_btree_node_iter_peek(&l->iter, l->b); + + if (bpos_gt(l->b->key.k.p, i->prev_node)) { + bch2_btree_node_to_text(&i->buf, i->c, l->b); + i->prev_node = l->b->key.k.p; + } + + bch2_bfloat_to_text(&i->buf, l->b, _k); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + flush_buf(i); + }))) ?: + i->ret; +} - if (!i->size) - return i->ret; +static const struct file_operations bfloat_failed_debug_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_read_bfloat_failed, +}; - bch2_trans_init(&trans, i->c, 0, 0); +static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, + struct btree *b) +{ + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); + + prt_printf(out, "%px btree=%s l=%u ", + b, + bch2_btree_id_str(b->c.btree_id), + b->c.level); + prt_newline(out); + + printbuf_indent_add(out, 2); + + bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); + prt_newline(out); + + prt_printf(out, "flags: "); + prt_tab(out); + prt_bitflags(out, bch2_btree_node_flags, b->flags); + prt_newline(out); + + prt_printf(out, "pcpu read locks: "); + prt_tab(out); + prt_printf(out, "%u", b->c.lock.readers != NULL); + prt_newline(out); + + prt_printf(out, "written:"); + prt_tab(out); + prt_printf(out, "%u", b->written); + prt_newline(out); + + prt_printf(out, "writes blocked:"); + prt_tab(out); + prt_printf(out, "%u", !list_empty_careful(&b->write_blocked)); + prt_newline(out); + + prt_printf(out, "will make reachable:"); + prt_tab(out); + prt_printf(out, "%lx", b->will_make_reachable); + prt_newline(out); + + prt_printf(out, "journal pin %px:", &b->writes[0].journal); + prt_tab(out); + prt_printf(out, "%llu", b->writes[0].journal.seq); + prt_newline(out); + + prt_printf(out, "journal pin %px:", &b->writes[1].journal); + prt_tab(out); + prt_printf(out, "%llu", b->writes[1].journal.seq); + prt_newline(out); + + printbuf_indent_sub(out, 2); +} - iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); +static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + bool done = false; + ssize_t ret = 0; - while ((k = bch2_btree_iter_peek(iter)).k && - !(err = bkey_err(k))) { - struct btree_iter_level *l = &iter->l[0]; - struct bkey_packed *_k = - bch2_btree_node_iter_peek(&l->iter, l->b); + i->ubuf = buf; + i->size = size; + i->ret = 0; - if (l->b != prev_node) { - bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b); - i->bytes = strlen(i->buf); - err = flush_buf(i); - if (err) - break; + do { + struct bucket_table *tbl; + struct rhash_head *pos; + struct btree *b; + + ret = flush_buf(i); + if (ret) + return ret; + + rcu_read_lock(); + i->buf.atomic++; + tbl = rht_dereference_rcu(c->btree_cache.table.tbl, + &c->btree_cache.table); + if (i->iter < tbl->size) { + rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) + bch2_cached_btree_node_to_text(&i->buf, c, b); + i->iter++; + } else { + done = true; } - prev_node = l->b; + --i->buf.atomic; + rcu_read_unlock(); + } while (!done); + + if (i->buf.allocation_failure) + ret = -ENOMEM; + + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; +} + +static const struct file_operations cached_btree_nodes_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_cached_btree_nodes_read, +}; + +static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + struct btree_trans *trans; + ssize_t ret = 0; + u32 seq; + + i->ubuf = buf; + i->size = size; + i->ret = 0; +restart: + seqmutex_lock(&c->btree_trans_lock); + list_for_each_entry(trans, &c->btree_trans_list, list) { + struct task_struct *task = READ_ONCE(trans->locking_wait.task); + + if (!task || task->pid <= i->iter) + continue; + + closure_get(&trans->ref); + seq = seqmutex_seq(&c->btree_trans_lock); + seqmutex_unlock(&c->btree_trans_lock); + + ret = flush_buf(i); + if (ret) { + closure_put(&trans->ref); + goto unlocked; + } + + bch2_btree_trans_to_text(&i->buf, trans); + + prt_printf(&i->buf, "backtrace:"); + prt_newline(&i->buf); + printbuf_indent_add(&i->buf, 2); + bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL); + printbuf_indent_sub(&i->buf, 2); + prt_newline(&i->buf); + + i->iter = task->pid; + + closure_put(&trans->ref); + + if (!seqmutex_relock(&c->btree_trans_lock, seq)) + goto restart; + } + seqmutex_unlock(&c->btree_trans_lock); +unlocked: + if (i->buf.allocation_failure) + ret = -ENOMEM; + + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; +} + +static const struct file_operations btree_transactions_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_btree_transactions_read, +}; + +static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + bool done = false; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; - bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k); - i->bytes = strlen(i->buf); + do { err = flush_buf(i); if (err) + return err; + + if (!i->size) break; - bch2_btree_iter_next(iter); - i->from = iter->pos; + done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter); + i->iter++; + } while (!done); + + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; +} + +static const struct file_operations journal_pins_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_journal_pins_read, +}; + +static int btree_transaction_stats_open(struct inode *inode, struct file *file) +{ + struct bch_fs *c = inode->i_private; + struct dump_iter *i; + + i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); + + if (!i) + return -ENOMEM; + + i->iter = 1; + i->c = c; + i->buf = PRINTBUF; + file->private_data = i; + + return 0; +} + +static int btree_transaction_stats_release(struct inode *inode, struct file *file) +{ + struct dump_iter *i = file->private_data; + + printbuf_exit(&i->buf); + kfree(i); + + return 0; +} + +static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + while (1) { + struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; err = flush_buf(i); if (err) - break; + return err; if (!i->size) break; + + if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) || + !bch2_btree_transaction_fns[i->iter]) + break; + + prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]); + prt_newline(&i->buf); + printbuf_indent_add(&i->buf, 2); + + mutex_lock(&s->lock); + + prt_printf(&i->buf, "Max mem used: %u", s->max_mem); + prt_newline(&i->buf); + + prt_printf(&i->buf, "Transaction duration:"); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + bch2_time_stats_to_text(&i->buf, &s->duration); + printbuf_indent_sub(&i->buf, 2); + + if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { + prt_printf(&i->buf, "Lock hold times:"); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); + printbuf_indent_sub(&i->buf, 2); + } + + if (s->max_paths_text) { + prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + prt_str_indented(&i->buf, s->max_paths_text); + printbuf_indent_sub(&i->buf, 2); + } + + mutex_unlock(&s->lock); + + printbuf_indent_sub(&i->buf, 2); + prt_newline(&i->buf); + i->iter++; } - bch2_trans_exit(&trans); - return err < 0 ? err : i->ret; + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; } -static const struct file_operations bfloat_failed_debug_ops = { +static const struct file_operations btree_transaction_stats_op = { + .owner = THIS_MODULE, + .open = btree_transaction_stats_open, + .release = btree_transaction_stats_release, + .read = btree_transaction_stats_read, +}; + +static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + struct btree_trans *trans; + ssize_t ret = 0; + u32 seq; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + if (i->iter) + goto out; +restart: + seqmutex_lock(&c->btree_trans_lock); + list_for_each_entry(trans, &c->btree_trans_list, list) { + struct task_struct *task = READ_ONCE(trans->locking_wait.task); + + if (!task || task->pid <= i->iter) + continue; + + closure_get(&trans->ref); + seq = seqmutex_seq(&c->btree_trans_lock); + seqmutex_unlock(&c->btree_trans_lock); + + ret = flush_buf(i); + if (ret) { + closure_put(&trans->ref); + goto out; + } + + bch2_check_for_deadlock(trans, &i->buf); + + i->iter = task->pid; + + closure_put(&trans->ref); + + if (!seqmutex_relock(&c->btree_trans_lock, seq)) + goto restart; + } + seqmutex_unlock(&c->btree_trans_lock); +out: + if (i->buf.allocation_failure) + ret = -ENOMEM; + + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; +} + +static const struct file_operations btree_deadlock_ops = { .owner = THIS_MODULE, .open = bch2_dump_open, .release = bch2_dump_release, - .read = bch2_read_bfloat_failed, + .read = bch2_btree_deadlock_read, }; void bch2_fs_debug_exit(struct bch_fs *c) { - if (!IS_ERR_OR_NULL(c->debug)) - debugfs_remove_recursive(c->debug); + if (!IS_ERR_OR_NULL(c->fs_debug_dir)) + debugfs_remove_recursive(c->fs_debug_dir); } void bch2_fs_debug_init(struct bch_fs *c) @@ -389,29 +875,48 @@ void bch2_fs_debug_init(struct bch_fs *c) return; snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b); - c->debug = debugfs_create_dir(name, bch_debug); - if (IS_ERR_OR_NULL(c->debug)) + c->fs_debug_dir = debugfs_create_dir(name, bch_debug); + if (IS_ERR_OR_NULL(c->fs_debug_dir)) + return; + + debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir, + c->btree_debug, &cached_btree_nodes_ops); + + debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir, + c->btree_debug, &btree_transactions_ops); + + debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, + c->btree_debug, &journal_pins_ops); + + debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, + c, &btree_transaction_stats_op); + + debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir, + c->btree_debug, &btree_deadlock_ops); + + c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); + if (IS_ERR_OR_NULL(c->btree_debug_dir)) return; for (bd = c->btree_debug; bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); bd++) { bd->id = bd - c->btree_debug; - bd->btree = debugfs_create_file(bch2_btree_ids[bd->id], - 0400, c->debug, bd, - &btree_debug_ops); + debugfs_create_file(bch2_btree_id_str(bd->id), + 0400, c->btree_debug_dir, bd, + &btree_debug_ops); snprintf(name, sizeof(name), "%s-formats", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); - bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd, - &btree_format_debug_ops); + debugfs_create_file(name, 0400, c->btree_debug_dir, bd, + &btree_format_debug_ops); snprintf(name, sizeof(name), "%s-bfloat-failed", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); - bd->failed = debugfs_create_file(name, 0400, c->debug, bd, - &bfloat_failed_debug_ops); + debugfs_create_file(name, 0400, c->btree_debug_dir, bd, + &bfloat_failed_debug_ops); } } @@ -425,8 +930,6 @@ void bch2_debug_exit(void) int __init bch2_debug_init(void) { - int ret = 0; - bch_debug = debugfs_create_dir("bcachefs", NULL); - return ret; + return 0; }