X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_cache.c;h=8a4667ba6b189e41d39aea23edec58ebf3ad33a1;hb=a2094890a90a2f865e49f94e8448deca7e5852ef;hp=7366711128613d6d9ed614291645a2d0157c5a86;hpb=da730dc67c1bd10842c49a1534fe23e0d8fdb4be;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 7366711..8a4667b 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -1,23 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" +#include "error.h" #include #include #include -const char * const bch2_btree_ids[] = { -#define x(kwd, val, name) name, - BCH_BTREE_IDS() -#undef x - NULL -}; - void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; @@ -151,6 +146,11 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, b->c.level = level; b->c.btree_id = id; + if (level) + six_lock_pcpu_alloc(&b->c.lock); + else + six_lock_pcpu_free_rcu(&b->c.lock); + mutex_lock(&bc->lock); ret = __bch2_btree_node_hash_insert(bc, b); if (!ret) @@ -211,7 +211,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) * - unless btree verify mode is enabled, since it runs out of * the post write cleanup: */ - if (verify_btree_ondisk(c)) + if (bch2_verify_btree_ondisk) bch2_btree_node_write(c, b, SIX_LOCK_intent); else __bch2_btree_node_write(c, b, SIX_LOCK_read); @@ -252,9 +252,9 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, unsigned long can_free; unsigned long touched = 0; unsigned long freed = 0; - unsigned i; + unsigned i, flags; - if (btree_shrinker_disabled(c)) + if (bch2_btree_shrinker_disabled) return SHRINK_STOP; /* Return -1 if we can't do anything right now */ @@ -263,6 +263,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, else if (!mutex_trylock(&bc->lock)) return -1; + flags = memalloc_nofs_save(); + /* * It's _really_ critical that we don't free too many btree nodes - we * have to always leave ourselves a reserve. The reserve is how we @@ -328,6 +330,7 @@ restart: mutex_unlock(&bc->lock); out: + memalloc_nofs_restore(flags); return (unsigned long) freed * btree_pages(c); } @@ -338,7 +341,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink, btree_cache.shrink); struct btree_cache *bc = &c->btree_cache; - if (btree_shrinker_disabled(c)) + if (bch2_btree_shrinker_disabled) return 0; return btree_cache_can_free(bc) * btree_pages(c); @@ -348,11 +351,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; - unsigned i; + unsigned i, flags; if (bc->shrink.list.next) unregister_shrinker(&bc->shrink); + /* vfree() can allocate memory: */ + flags = memalloc_nofs_save(); mutex_lock(&bc->lock); #ifdef CONFIG_BCACHEFS_DEBUG @@ -376,18 +381,22 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) if (btree_node_dirty(b)) bch2_btree_complete_write(c, b, btree_current_write(b)); - clear_btree_node_dirty(b); + clear_btree_node_dirty(c, b); btree_node_data_free(c, b); } + BUG_ON(atomic_read(&c->btree_cache.dirty)); + while (!list_empty(&bc->freed)) { b = list_first_entry(&bc->freed, struct btree, list); list_del(&b->list); + six_lock_pcpu_free(&b->c.lock); kfree(b); } mutex_unlock(&bc->lock); + memalloc_nofs_restore(flags); if (bc->table_init_done) rhashtable_destroy(&bc->table); @@ -439,7 +448,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bc->shrink.scan_objects = bch2_btree_cache_scan; bc->shrink.seeks = 4; bc->shrink.batch = btree_pages(c) * 2; - register_shrinker(&bc->shrink); + ret = register_shrinker(&bc->shrink); out: pr_verbose_init(c->opts, "ret %i", ret); return ret; @@ -584,7 +593,7 @@ out: b->sib_u64s[0] = 0; b->sib_u64s[1] = 0; b->whiteout_u64s = 0; - bch2_btree_keys_init(b, &c->expensive_debug_checks); + bch2_btree_keys_init(b); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], start_time); @@ -699,7 +708,8 @@ static int lock_node_check_fn(struct six_lock *lock, void *p) */ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, const struct bkey_i *k, unsigned level, - enum six_lock_type lock_type) + enum six_lock_type lock_type, + unsigned long trace_ip) { struct btree_cache *bc = &c->btree_cache; struct btree *b; @@ -761,7 +771,7 @@ lock_node: btree_node_unlock(iter, level + 1); if (!btree_node_lock(b, k->k.p, level, iter, lock_type, - lock_node_check_fn, (void *) k)) { + lock_node_check_fn, (void *) k, trace_ip)) { if (b->hash_val != btree_ptr_hash_val(k)) goto retry; return ERR_PTR(-EINTR); @@ -802,9 +812,12 @@ lock_node: return ERR_PTR(-EIO); } - EBUG_ON(b->c.btree_id != iter->btree_id || - BTREE_NODE_LEVEL(b->data) != level || - bkey_cmp(b->data->max_key, k->k.p)); + EBUG_ON(b->c.btree_id != iter->btree_id); + EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); + EBUG_ON(bpos_cmp(b->data->max_key, k->k.p)); + EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && + bpos_cmp(b->data->min_key, + bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)); return b; } @@ -812,7 +825,8 @@ lock_node: struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, const struct bkey_i *k, enum btree_id btree_id, - unsigned level) + unsigned level, + bool nofill) { struct btree_cache *bc = &c->btree_cache; struct btree *b; @@ -827,6 +841,9 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { + if (nofill) + goto out; + b = bch2_btree_node_fill(c, NULL, k, btree_id, level, SIX_LOCK_read, true); @@ -834,8 +851,12 @@ retry: if (!b) goto retry; + if (IS_ERR(b) && + !bch2_btree_cache_cannibalize_lock(c, NULL)) + goto retry; + if (IS_ERR(b)) - return b; + goto out; } else { lock_node: ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k); @@ -870,13 +891,18 @@ lock_node: if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); - return ERR_PTR(-EIO); + b = ERR_PTR(-EIO); + goto out; } - EBUG_ON(b->c.btree_id != btree_id || - BTREE_NODE_LEVEL(b->data) != level || - bkey_cmp(b->data->max_key, k->k.p)); - + EBUG_ON(b->c.btree_id != btree_id); + EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); + EBUG_ON(bpos_cmp(b->data->max_key, k->k.p)); + EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && + bpos_cmp(b->data->min_key, + bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)); +out: + bch2_btree_cache_cannibalize_unlock(c); return b; } @@ -889,10 +915,12 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, struct btree *parent; struct btree_node_iter node_iter; struct bkey_packed *k; - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; struct btree *ret = NULL; unsigned level = b->c.level; + bch2_bkey_buf_init(&tmp); + parent = btree_iter_node(iter, level + 1); if (!parent) return NULL; @@ -926,10 +954,10 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, if (!k) goto out; - bch2_bkey_unpack(parent, &tmp.k, k); + bch2_bkey_buf_unpack(&tmp, c, parent, k); - ret = bch2_btree_node_get(c, iter, &tmp.k, level, - SIX_LOCK_intent); + ret = bch2_btree_node_get(c, iter, tmp.k, level, + SIX_LOCK_intent, _THIS_IP_); if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) { struct btree_iter *linked; @@ -942,14 +970,14 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, * holding other locks that would cause us to deadlock: */ trans_for_each_iter(trans, linked) - if (btree_iter_cmp(iter, linked) < 0) + if (btree_iter_lock_cmp(iter, linked) < 0) __bch2_btree_iter_unlock(linked); if (sib == btree_prev_sib) btree_node_unlock(iter, level); - ret = bch2_btree_node_get(c, iter, &tmp.k, level, - SIX_LOCK_intent); + ret = bch2_btree_node_get(c, iter, tmp.k, level, + SIX_LOCK_intent, _THIS_IP_); /* * before btree_iter_relock() calls btree_iter_verify_locks(): @@ -983,30 +1011,46 @@ out: if (sib != btree_prev_sib) swap(n1, n2); - BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p), - n2->data->min_key)); + if (bpos_cmp(bpos_successor(n1->key.k.p), + n2->data->min_key)) { + char buf1[200], buf2[200]; + + bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key)); + bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key)); + + bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n" + "prev: %s\n" + "next: %s\n", + bch2_btree_ids[iter->btree_id], level, + buf1, buf2); + + six_unlock_intent(&ret->c.lock); + ret = NULL; + } } bch2_btree_trans_verify_locks(trans); + bch2_bkey_buf_exit(&tmp, c); + return ret; } void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, - const struct bkey_i *k, unsigned level) + const struct bkey_i *k, + enum btree_id btree_id, unsigned level) { struct btree_cache *bc = &c->btree_cache; struct btree *b; - BUG_ON(!btree_node_locked(iter, level + 1)); + BUG_ON(iter && !btree_node_locked(iter, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); b = btree_cache_find(bc, k); if (b) return; - bch2_btree_node_fill(c, iter, k, iter->btree_id, - level, SIX_LOCK_read, false); + bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, @@ -1019,15 +1063,14 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, bch2_btree_keys_stats(b, &stats); - pr_buf(out, - "l %u %llu:%llu - %llu:%llu:\n" - " ptrs: ", - b->c.level, - b->data->min_key.inode, - b->data->min_key.offset, - b->data->max_key.inode, - b->data->max_key.offset); + pr_buf(out, "l %u ", b->c.level); + bch2_bpos_to_text(out, b->data->min_key); + pr_buf(out, " - "); + bch2_bpos_to_text(out, b->data->max_key); + pr_buf(out, ":\n" + " ptrs: "); bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); + pr_buf(out, "\n" " format: u64s %u fields %u %u %u %u %u\n" " unpack fn len: %u\n" @@ -1055,3 +1098,10 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, stats.floats, stats.failed); } + +void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c) +{ + pr_buf(out, "nr nodes:\t\t%u\n", c->btree_cache.used); + pr_buf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty)); + pr_buf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock); +}