]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_cache.c
Update bcachefs sources to 18686af684 bcachefs: Inode backpointers
[bcachefs-tools-debian] / libbcachefs / btree_cache.c
index 7366711128613d6d9ed614291645a2d0157c5a86..8a4667ba6b189e41d39aea23edec58ebf3ad33a1 100644 (file)
@@ -1,23 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bkey_buf.h"
 #include "btree_cache.h"
 #include "btree_io.h"
 #include "btree_iter.h"
 #include "btree_locking.h"
 #include "debug.h"
+#include "error.h"
 
 #include <linux/prefetch.h>
 #include <linux/sched/mm.h>
 #include <trace/events/bcachefs.h>
 
-const char * const bch2_btree_ids[] = {
-#define x(kwd, val, name) name,
-       BCH_BTREE_IDS()
-#undef x
-       NULL
-};
-
 void bch2_recalc_btree_reserve(struct bch_fs *c)
 {
        unsigned i, reserve = 16;
@@ -151,6 +146,11 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
        b->c.level      = level;
        b->c.btree_id   = id;
 
+       if (level)
+               six_lock_pcpu_alloc(&b->c.lock);
+       else
+               six_lock_pcpu_free_rcu(&b->c.lock);
+
        mutex_lock(&bc->lock);
        ret = __bch2_btree_node_hash_insert(bc, b);
        if (!ret)
@@ -211,7 +211,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
                 * - unless btree verify mode is enabled, since it runs out of
                 * the post write cleanup:
                 */
-               if (verify_btree_ondisk(c))
+               if (bch2_verify_btree_ondisk)
                        bch2_btree_node_write(c, b, SIX_LOCK_intent);
                else
                        __bch2_btree_node_write(c, b, SIX_LOCK_read);
@@ -252,9 +252,9 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
        unsigned long can_free;
        unsigned long touched = 0;
        unsigned long freed = 0;
-       unsigned i;
+       unsigned i, flags;
 
-       if (btree_shrinker_disabled(c))
+       if (bch2_btree_shrinker_disabled)
                return SHRINK_STOP;
 
        /* Return -1 if we can't do anything right now */
@@ -263,6 +263,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
        else if (!mutex_trylock(&bc->lock))
                return -1;
 
+       flags = memalloc_nofs_save();
+
        /*
         * It's _really_ critical that we don't free too many btree nodes - we
         * have to always leave ourselves a reserve. The reserve is how we
@@ -328,6 +330,7 @@ restart:
 
        mutex_unlock(&bc->lock);
 out:
+       memalloc_nofs_restore(flags);
        return (unsigned long) freed * btree_pages(c);
 }
 
@@ -338,7 +341,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
                                        btree_cache.shrink);
        struct btree_cache *bc = &c->btree_cache;
 
-       if (btree_shrinker_disabled(c))
+       if (bch2_btree_shrinker_disabled)
                return 0;
 
        return btree_cache_can_free(bc) * btree_pages(c);
@@ -348,11 +351,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
-       unsigned i;
+       unsigned i, flags;
 
        if (bc->shrink.list.next)
                unregister_shrinker(&bc->shrink);
 
+       /* vfree() can allocate memory: */
+       flags = memalloc_nofs_save();
        mutex_lock(&bc->lock);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -376,18 +381,22 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 
                if (btree_node_dirty(b))
                        bch2_btree_complete_write(c, b, btree_current_write(b));
-               clear_btree_node_dirty(b);
+               clear_btree_node_dirty(c, b);
 
                btree_node_data_free(c, b);
        }
 
+       BUG_ON(atomic_read(&c->btree_cache.dirty));
+
        while (!list_empty(&bc->freed)) {
                b = list_first_entry(&bc->freed, struct btree, list);
                list_del(&b->list);
+               six_lock_pcpu_free(&b->c.lock);
                kfree(b);
        }
 
        mutex_unlock(&bc->lock);
+       memalloc_nofs_restore(flags);
 
        if (bc->table_init_done)
                rhashtable_destroy(&bc->table);
@@ -439,7 +448,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
        bc->shrink.scan_objects         = bch2_btree_cache_scan;
        bc->shrink.seeks                = 4;
        bc->shrink.batch                = btree_pages(c) * 2;
-       register_shrinker(&bc->shrink);
+       ret = register_shrinker(&bc->shrink);
 out:
        pr_verbose_init(c->opts, "ret %i", ret);
        return ret;
@@ -584,7 +593,7 @@ out:
        b->sib_u64s[0]          = 0;
        b->sib_u64s[1]          = 0;
        b->whiteout_u64s        = 0;
-       bch2_btree_keys_init(b, &c->expensive_debug_checks);
+       bch2_btree_keys_init(b);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
                               start_time);
@@ -699,7 +708,8 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
  */
 struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
                                  const struct bkey_i *k, unsigned level,
-                                 enum six_lock_type lock_type)
+                                 enum six_lock_type lock_type,
+                                 unsigned long trace_ip)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
@@ -761,7 +771,7 @@ lock_node:
                        btree_node_unlock(iter, level + 1);
 
                if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
-                                    lock_node_check_fn, (void *) k)) {
+                                    lock_node_check_fn, (void *) k, trace_ip)) {
                        if (b->hash_val != btree_ptr_hash_val(k))
                                goto retry;
                        return ERR_PTR(-EINTR);
@@ -802,9 +812,12 @@ lock_node:
                return ERR_PTR(-EIO);
        }
 
-       EBUG_ON(b->c.btree_id != iter->btree_id ||
-               BTREE_NODE_LEVEL(b->data) != level ||
-               bkey_cmp(b->data->max_key, k->k.p));
+       EBUG_ON(b->c.btree_id != iter->btree_id);
+       EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+       EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
+       EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+               bpos_cmp(b->data->min_key,
+                        bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
 
        return b;
 }
@@ -812,7 +825,8 @@ lock_node:
 struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
                                         const struct bkey_i *k,
                                         enum btree_id btree_id,
-                                        unsigned level)
+                                        unsigned level,
+                                        bool nofill)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
@@ -827,6 +841,9 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
 retry:
        b = btree_cache_find(bc, k);
        if (unlikely(!b)) {
+               if (nofill)
+                       goto out;
+
                b = bch2_btree_node_fill(c, NULL, k, btree_id,
                                         level, SIX_LOCK_read, true);
 
@@ -834,8 +851,12 @@ retry:
                if (!b)
                        goto retry;
 
+               if (IS_ERR(b) &&
+                   !bch2_btree_cache_cannibalize_lock(c, NULL))
+                       goto retry;
+
                if (IS_ERR(b))
-                       return b;
+                       goto out;
        } else {
 lock_node:
                ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
@@ -870,13 +891,18 @@ lock_node:
 
        if (unlikely(btree_node_read_error(b))) {
                six_unlock_read(&b->c.lock);
-               return ERR_PTR(-EIO);
+               b = ERR_PTR(-EIO);
+               goto out;
        }
 
-       EBUG_ON(b->c.btree_id != btree_id ||
-               BTREE_NODE_LEVEL(b->data) != level ||
-               bkey_cmp(b->data->max_key, k->k.p));
-
+       EBUG_ON(b->c.btree_id != btree_id);
+       EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+       EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
+       EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+               bpos_cmp(b->data->min_key,
+                        bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
+out:
+       bch2_btree_cache_cannibalize_unlock(c);
        return b;
 }
 
@@ -889,10 +915,12 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
        struct btree *parent;
        struct btree_node_iter node_iter;
        struct bkey_packed *k;
-       BKEY_PADDED(k) tmp;
+       struct bkey_buf tmp;
        struct btree *ret = NULL;
        unsigned level = b->c.level;
 
+       bch2_bkey_buf_init(&tmp);
+
        parent = btree_iter_node(iter, level + 1);
        if (!parent)
                return NULL;
@@ -926,10 +954,10 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
        if (!k)
                goto out;
 
-       bch2_bkey_unpack(parent, &tmp.k, k);
+       bch2_bkey_buf_unpack(&tmp, c, parent, k);
 
-       ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-                                 SIX_LOCK_intent);
+       ret = bch2_btree_node_get(c, iter, tmp.k, level,
+                                 SIX_LOCK_intent, _THIS_IP_);
 
        if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
                struct btree_iter *linked;
@@ -942,14 +970,14 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                 * holding other locks that would cause us to deadlock:
                 */
                trans_for_each_iter(trans, linked)
-                       if (btree_iter_cmp(iter, linked) < 0)
+                       if (btree_iter_lock_cmp(iter, linked) < 0)
                                __bch2_btree_iter_unlock(linked);
 
                if (sib == btree_prev_sib)
                        btree_node_unlock(iter, level);
 
-               ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-                                         SIX_LOCK_intent);
+               ret = bch2_btree_node_get(c, iter, tmp.k, level,
+                                         SIX_LOCK_intent, _THIS_IP_);
 
                /*
                 * before btree_iter_relock() calls btree_iter_verify_locks():
@@ -983,30 +1011,46 @@ out:
                if (sib != btree_prev_sib)
                        swap(n1, n2);
 
-               BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
-                               n2->data->min_key));
+               if (bpos_cmp(bpos_successor(n1->key.k.p),
+                            n2->data->min_key)) {
+                       char buf1[200], buf2[200];
+
+                       bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
+                       bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
+
+                       bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
+                                            "prev: %s\n"
+                                            "next: %s\n",
+                                            bch2_btree_ids[iter->btree_id], level,
+                                            buf1, buf2);
+
+                       six_unlock_intent(&ret->c.lock);
+                       ret = NULL;
+               }
        }
 
        bch2_btree_trans_verify_locks(trans);
 
+       bch2_bkey_buf_exit(&tmp, c);
+
        return ret;
 }
 
 void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
-                             const struct bkey_i *k, unsigned level)
+                             const struct bkey_i *k,
+                             enum btree_id btree_id, unsigned level)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
 
-       BUG_ON(!btree_node_locked(iter, level + 1));
+       BUG_ON(iter && !btree_node_locked(iter, level + 1));
        BUG_ON(level >= BTREE_MAX_DEPTH);
 
        b = btree_cache_find(bc, k);
        if (b)
                return;
 
-       bch2_btree_node_fill(c, iter, k, iter->btree_id,
-                            level, SIX_LOCK_read, false);
+       bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false);
 }
 
 void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
@@ -1019,15 +1063,14 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
 
        bch2_btree_keys_stats(b, &stats);
 
-       pr_buf(out,
-              "l %u %llu:%llu - %llu:%llu:\n"
-              "    ptrs: ",
-              b->c.level,
-              b->data->min_key.inode,
-              b->data->min_key.offset,
-              b->data->max_key.inode,
-              b->data->max_key.offset);
+       pr_buf(out, "l %u ", b->c.level);
+       bch2_bpos_to_text(out, b->data->min_key);
+       pr_buf(out, " - ");
+       bch2_bpos_to_text(out, b->data->max_key);
+       pr_buf(out, ":\n"
+              "    ptrs: ");
        bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+
        pr_buf(out, "\n"
               "    format: u64s %u fields %u %u %u %u %u\n"
               "    unpack fn len: %u\n"
@@ -1055,3 +1098,10 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
               stats.floats,
               stats.failed);
 }
+
+void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       pr_buf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
+       pr_buf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
+       pr_buf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
+}