]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 60fbf06f49 bcachefs: Fix an unhandled transaction restart
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 5 Aug 2021 17:11:04 +0000 (13:11 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Thu, 5 Aug 2021 17:11:04 +0000 (13:11 -0400)
.bcachefs_revision
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/btree_update_leaf.c
libbcachefs/fs-io.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/io.c
libbcachefs/recovery.c

index f6620a212e9ee434900645e683a1d84b70b27292..ab237af5c1f6263e5c5277bfbe84fb43a43be42a 100644 (file)
@@ -1 +1 @@
-b964c6cba873eb5d2ebd0174876b664730e69a73
+60fbf06f49679fdb2b37e1e863c321dfddfc3a4a
index 3af00e24a84c14856add9a336534e251c73b68a9..fe710d19ca1994d5149f2496dc500704b4c10a42 100644 (file)
 #include <trace/events/bcachefs.h>
 
 static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
+static void btree_trans_sort_iters(struct btree_trans *);
+static void btree_iter_check_sort(struct btree_trans *, struct btree_iter *);
 static struct btree_iter *btree_iter_child_alloc(struct btree_iter *, unsigned long);
-static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *);
+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *,
+                                                struct btree_iter *);
 static void btree_iter_copy(struct btree_iter *, struct btree_iter *);
 
+static inline int btree_iter_cmp(const struct btree_iter *l,
+                                const struct btree_iter *r)
+{
+       return   cmp_int(l->btree_id, r->btree_id) ?:
+               -cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?:
+                bkey_cmp(l->real_pos, r->real_pos);
+}
+
 static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
 {
        EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
@@ -1260,8 +1271,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
 {
        struct bch_fs *c = trans->c;
        struct btree_iter *iter;
-       u8 sorted[BTREE_ITER_MAX];
-       int i, nr_sorted = 0;
+       int i;
 
        if (trans->in_traverse_all)
                return -EINTR;
@@ -1270,22 +1280,14 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
 retry_all:
        trans->restarted = false;
 
-       nr_sorted = 0;
-
-       trans_for_each_iter(trans, iter) {
-               sorted[nr_sorted++] = iter->idx;
+       trans_for_each_iter(trans, iter)
                iter->should_be_locked = false;
-       }
-
-#define btree_iter_cmp_by_idx(_l, _r)                          \
-               btree_iter_lock_cmp(&trans->iters[_l], &trans->iters[_r])
 
-       bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
-#undef btree_iter_cmp_by_idx
+       btree_trans_sort_iters(trans);
 
-       for (i = nr_sorted - 2; i >= 0; --i) {
-               struct btree_iter *iter1 = trans->iters + sorted[i];
-               struct btree_iter *iter2 = trans->iters + sorted[i + 1];
+       for (i = trans->nr_sorted - 2; i >= 0; --i) {
+               struct btree_iter *iter1 = trans->iters + trans->sorted[i];
+               struct btree_iter *iter2 = trans->iters + trans->sorted[i + 1];
 
                if (iter1->btree_id == iter2->btree_id &&
                    iter1->locks_want < iter2->locks_want)
@@ -1316,20 +1318,18 @@ retry_all:
        BUG_ON(ret && ret != -EINTR);
 
        /* Now, redo traversals in correct order: */
-       for (i = 0; i < nr_sorted; i++) {
-               unsigned idx = sorted[i];
-
-               /*
-                * sucessfully traversing one iterator can cause another to be
-                * unlinked, in btree_key_cache_fill()
-                */
-               if (!(trans->iters_linked & (1ULL << idx)))
-                       continue;
+       trans_for_each_iter_inorder(trans, iter) {
+               EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
 
-               ret = btree_iter_traverse_one(&trans->iters[idx], _THIS_IP_);
+               ret = btree_iter_traverse_one(iter, _THIS_IP_);
                if (ret)
                        goto retry_all;
+
+               EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
        }
+
+       trans_for_each_iter(trans, iter)
+               BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
 out:
        bch2_btree_cache_cannibalize_unlock(c);
 
@@ -1613,6 +1613,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
        iter->real_pos = new_pos;
        iter->should_be_locked = false;
 
+       btree_iter_check_sort(iter->trans, iter);
+
        if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
                btree_node_unlock(iter, 0);
                iter->l[0].b = BTREE_ITER_NO_NODE_CACHED;
@@ -2018,6 +2020,151 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
 
 /* new transactional stuff: */
 
+static inline void btree_iter_verify_sorted_ref(struct btree_trans *trans,
+                                               struct btree_iter *iter)
+{
+       EBUG_ON(iter->sorted_idx >= trans->nr_sorted);
+       EBUG_ON(trans->sorted[iter->sorted_idx] != iter->idx);
+       EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
+}
+
+static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       unsigned i;
+
+       for (i = 0; i < trans->nr_sorted; i++)
+               btree_iter_verify_sorted_ref(trans, trans->iters + trans->sorted[i]);
+#endif
+}
+
+static inline void btree_trans_verify_sorted(struct btree_trans *trans)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct btree_iter *iter, *prev = NULL;
+
+       trans_for_each_iter_inorder(trans, iter)
+               BUG_ON(prev && btree_iter_cmp(prev, iter) > 0);
+#endif
+}
+
+static inline void btree_iter_swap(struct btree_trans *trans,
+                                  struct btree_iter *l, struct btree_iter *r)
+{
+       swap(l->sorted_idx, r->sorted_idx);
+       swap(trans->sorted[l->sorted_idx],
+            trans->sorted[r->sorted_idx]);
+
+       btree_iter_verify_sorted_ref(trans, l);
+       btree_iter_verify_sorted_ref(trans, r);
+}
+
+static void btree_trans_sort_iters(struct btree_trans *trans)
+{
+       bool swapped = false;
+       int i, l = 0, r = trans->nr_sorted;
+
+       while (1) {
+               for (i = l; i + 1 < r; i++) {
+                       if (btree_iter_cmp(trans->iters + trans->sorted[i],
+                                          trans->iters + trans->sorted[i + 1]) > 0) {
+                               swap(trans->sorted[i], trans->sorted[i + 1]);
+                               trans->iters[trans->sorted[i]].sorted_idx = i;
+                               trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1;
+                               swapped = true;
+                       }
+               }
+
+               if (!swapped)
+                       break;
+
+               r--;
+               swapped = false;
+
+               for (i = r - 2; i >= l; --i) {
+                       if (btree_iter_cmp(trans->iters + trans->sorted[i],
+                                          trans->iters + trans->sorted[i + 1]) > 0) {
+                               swap(trans->sorted[i],
+                                    trans->sorted[i + 1]);
+                               trans->iters[trans->sorted[i]].sorted_idx = i;
+                               trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1;
+                               swapped = true;
+                       }
+               }
+
+               if (!swapped)
+                       break;
+
+               l++;
+               swapped = false;
+       }
+
+       btree_trans_verify_sorted_refs(trans);
+       btree_trans_verify_sorted(trans);
+}
+
+static void btree_iter_check_sort(struct btree_trans *trans, struct btree_iter *iter)
+{
+       struct btree_iter *n;
+
+       EBUG_ON(iter->sorted_idx == U8_MAX);
+
+       n = next_btree_iter(trans, iter);
+       if (n && btree_iter_cmp(iter, n) > 0) {
+               do {
+                       btree_iter_swap(trans, iter, n);
+                       n = next_btree_iter(trans, iter);
+               } while (n && btree_iter_cmp(iter, n) > 0);
+
+               return;
+       }
+
+       n = prev_btree_iter(trans, iter);
+       if (n && btree_iter_cmp(n, iter) > 0) {
+               do {
+                       btree_iter_swap(trans, n, iter);
+                       n = prev_btree_iter(trans, iter);
+               } while (n && btree_iter_cmp(n, iter) > 0);
+       }
+
+       btree_trans_verify_sorted(trans);
+}
+
+static inline void btree_iter_list_remove(struct btree_trans *trans,
+                                         struct btree_iter *iter)
+{
+       unsigned i;
+
+       EBUG_ON(iter->sorted_idx >= trans->nr_sorted);
+
+       array_remove_item(trans->sorted, trans->nr_sorted, iter->sorted_idx);
+
+       for (i = iter->sorted_idx; i < trans->nr_sorted; i++)
+               trans->iters[trans->sorted[i]].sorted_idx = i;
+
+       iter->sorted_idx = U8_MAX;
+
+       btree_trans_verify_sorted_refs(trans);
+}
+
+static inline void btree_iter_list_add(struct btree_trans *trans,
+                                      struct btree_iter *pos,
+                                      struct btree_iter *iter)
+{
+       unsigned i;
+
+       btree_trans_verify_sorted_refs(trans);
+
+       iter->sorted_idx = pos ? pos->sorted_idx : trans->nr_sorted;
+
+       array_insert_item(trans->sorted, trans->nr_sorted, iter->sorted_idx, iter->idx);
+
+       for (i = iter->sorted_idx; i < trans->nr_sorted; i++)
+               trans->iters[trans->sorted[i]].sorted_idx = i;
+
+       btree_trans_verify_sorted_refs(trans);
+}
+
 static void btree_iter_child_free(struct btree_iter *iter)
 {
        struct btree_iter *child = btree_iter_child(iter);
@@ -2035,7 +2182,7 @@ static struct btree_iter *btree_iter_child_alloc(struct btree_iter *iter,
        struct btree_iter *child = btree_iter_child(iter);
 
        if (!child) {
-               child = btree_trans_iter_alloc(trans);
+               child = btree_trans_iter_alloc(trans, iter);
                child->ip_allocated     = ip;
                iter->child_idx         = child->idx;
 
@@ -2051,6 +2198,8 @@ static inline void __bch2_trans_iter_free(struct btree_trans *trans,
 {
        btree_iter_child_free(&trans->iters[idx]);
 
+       btree_iter_list_remove(trans, &trans->iters[idx]);
+
        __bch2_btree_iter_unlock(&trans->iters[idx]);
        trans->iters_linked             &= ~(1ULL << idx);
        trans->iters_live               &= ~(1ULL << idx);
@@ -2097,10 +2246,12 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
        struct btree_insert_entry *i;
        char buf[100];
 
-       trans_for_each_iter(trans, iter)
+       btree_trans_sort_iters(trans);
+
+       trans_for_each_iter_inorder(trans, iter)
                printk(KERN_ERR "iter: btree %s pos %s%s%s%s %pS\n",
                       bch2_btree_ids[iter->btree_id],
-                      (bch2_bpos_to_text(&PBUF(buf), iter->pos), buf),
+                      (bch2_bpos_to_text(&PBUF(buf), iter->real_pos), buf),
                       btree_iter_live(trans, iter) ? " live" : "",
                       (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
                       iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
@@ -2116,7 +2267,8 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
        panic("trans iter oveflow\n");
 }
 
-static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans,
+                                                struct btree_iter *pos)
 {
        struct btree_iter *iter;
        unsigned idx;
@@ -2131,10 +2283,13 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
        iter->trans             = trans;
        iter->idx               = idx;
        iter->child_idx         = U8_MAX;
+       iter->sorted_idx        = U8_MAX;
        iter->flags             = 0;
        iter->nodes_locked      = 0;
        iter->nodes_intent_locked = 0;
        trans->iters_linked     |= 1ULL << idx;
+
+       btree_iter_list_add(trans, pos, iter);
        return iter;
 }
 
@@ -2155,6 +2310,8 @@ static void btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
 
        dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
        dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
+
+       btree_iter_check_sort(dst->trans, dst);
 }
 
 struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
@@ -2208,10 +2365,10 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
        }
 
        if (!best) {
-               iter = btree_trans_iter_alloc(trans);
+               iter = btree_trans_iter_alloc(trans, NULL);
                bch2_btree_iter_init(trans, iter, btree_id);
        } else if (btree_iter_keep(trans, best)) {
-               iter = btree_trans_iter_alloc(trans);
+               iter = btree_trans_iter_alloc(trans, best);
                btree_iter_copy(iter, best);
        } else {
                iter = best;
@@ -2292,7 +2449,7 @@ struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
 {
        struct btree_iter *iter;
 
-       iter = btree_trans_iter_alloc(trans);
+       iter = btree_trans_iter_alloc(trans, src);
        btree_iter_copy(iter, src);
 
        trans->iters_live |= 1ULL << iter->idx;
@@ -2407,6 +2564,7 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
 {
        size_t iters_bytes      = sizeof(struct btree_iter) * BTREE_ITER_MAX;
        size_t updates_bytes    = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
+       size_t sorted_bytes     = sizeof(u8) * BTREE_ITER_MAX;
        void *p = NULL;
 
        BUG_ON(trans->used_mempool);
@@ -2419,6 +2577,7 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
 
        trans->iters            = p; p += iters_bytes;
        trans->updates          = p; p += updates_bytes;
+       trans->sorted           = p; p += sorted_bytes;
 }
 
 void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
@@ -2621,6 +2780,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
 
        return  init_srcu_struct(&c->btree_trans_barrier) ?:
                mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
+                       sizeof(u8) * nr +
                        sizeof(struct btree_iter) * nr +
                        sizeof(struct btree_insert_entry) * nr) ?:
                mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
index aeabc07d2c9ca5b2f7e29352511f8906b25b6959..39124e68e48828f3870522b625d8a46cc5b0a244 100644 (file)
@@ -71,6 +71,30 @@ __trans_next_iter(struct btree_trans *trans, unsigned idx)
             (_iter);                                                   \
             _iter = __trans_next_iter((_trans), (_iter)->idx + 1))
 
+static inline struct btree_iter *next_btree_iter(struct btree_trans *trans, struct btree_iter *iter)
+{
+       unsigned idx = iter ? iter->sorted_idx + 1 : 0;
+
+       EBUG_ON(idx > trans->nr_sorted);
+
+       return idx < trans->nr_sorted
+               ? trans->iters + trans->sorted[idx]
+               : NULL;
+}
+
+static inline struct btree_iter *prev_btree_iter(struct btree_trans *trans, struct btree_iter *iter)
+{
+       EBUG_ON(iter->sorted_idx >= trans->nr_sorted);
+       return iter->sorted_idx
+               ? trans->iters + trans->sorted[iter->sorted_idx - 1]
+               : NULL;
+}
+
+#define trans_for_each_iter_inorder(_trans, _iter)                     \
+       for (_iter = next_btree_iter(trans, NULL);                      \
+            (_iter);                                                   \
+            _iter = next_btree_iter((_trans), (_iter)))
+
 static inline bool __iter_has_node(const struct btree_iter *iter,
                                   const struct btree *b)
 {
@@ -191,19 +215,14 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
        iter->pos = bkey_start_pos(&iter->k);
 }
 
-static inline struct btree_iter *btree_iter_child(struct btree_iter *iter)
+static inline struct btree_iter *idx_to_btree_iter(struct btree_trans *trans, unsigned idx)
 {
-       return iter->child_idx == U8_MAX ? NULL
-               : iter->trans->iters + iter->child_idx;
+       return idx != U8_MAX ? trans->iters + idx : NULL;
 }
 
-/* Sort order for locking btree iterators: */
-static inline int btree_iter_lock_cmp(const struct btree_iter *l,
-                                     const struct btree_iter *r)
+static inline struct btree_iter *btree_iter_child(struct btree_iter *iter)
 {
-       return   cmp_int(l->btree_id, r->btree_id) ?:
-               -cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?:
-                bkey_cmp(l->real_pos, r->real_pos);
+       return idx_to_btree_iter(iter->trans, iter->child_idx);
 }
 
 /*
index 6882873d149ad9e674106e5035e3d6fd04bb3688..a1e5debf19f3623d10366b399fd944813caa71a4 100644 (file)
@@ -246,6 +246,7 @@ struct btree_iter {
 
        u8                      idx;
        u8                      child_idx;
+       u8                      sorted_idx;
 
        /* btree_iter_copy starts here: */
        u16                     flags;
@@ -379,6 +380,7 @@ struct btree_trans {
        unsigned long           ip;
        int                     srcu_idx;
 
+       u8                      nr_sorted;
        u8                      nr_updates;
        bool                    used_mempool:1;
        bool                    error:1;
@@ -398,6 +400,7 @@ struct btree_trans {
        unsigned                mem_bytes;
        void                    *mem;
 
+       u8                      *sorted;
        struct btree_iter       *iters;
        struct btree_insert_entry *updates;
 
index e9e542260bff0b2c0ef4dabc0d8b2a4c7647378a..7e9909e2dcaf5ef305effe1bfedd9f74625ee710 100644 (file)
@@ -1041,7 +1041,19 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
        if (i < trans->updates + trans->nr_updates &&
            !btree_insert_entry_cmp(&n, i)) {
                BUG_ON(i->trans_triggers_run);
-               *i = n;
+
+               /*
+                * This is a hack to ensure that inode creates update the btree,
+                * not the key cache, which helps with cache coherency issues in
+                * other areas:
+                */
+               if (btree_iter_type(n.iter) == BTREE_ITER_CACHED &&
+                   btree_iter_type(i->iter) != BTREE_ITER_CACHED) {
+                       i->k = n.k;
+                       i->flags = n.flags;
+               } else {
+                       *i = n;
+               }
        } else
                array_insert_item(trans->updates, trans->nr_updates,
                                  i - trans->updates, n);
index 1ac99f37abe2c397edac6082fd0b9be51cd31fe6..3333f6166bf2c13531b915b0f36024a8699f5de7 100644 (file)
@@ -808,6 +808,15 @@ retry:
                unsigned bytes, sectors, offset_into_extent;
                enum btree_id data_btree = BTREE_ID_extents;
 
+               /*
+                * read_extent -> io_time_reset may cause a transaction restart
+                * without returning an error, we need to check for that here:
+                */
+               if (!bch2_trans_relock(trans)) {
+                       ret = -EINTR;
+                       break;
+               }
+
                bch2_btree_iter_set_pos(iter,
                                POS(inum, rbio->bio.bi_iter.bi_sector));
 
index 25607b5848be8a73b5823694f662cfa80bc85cae..3b671082cd1e31f65bcf34be04f9afacd444d578 100644 (file)
@@ -371,6 +371,22 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
        return NULL;
 }
 
+static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
+{
+       pr_buf(out, "mode %o flags %x ", inode->bi_mode, inode->bi_flags);
+
+#define x(_name, _bits)                                                \
+       pr_buf(out, #_name " %llu ", (u64) inode->_name);
+       BCH_INODE_FIELDS()
+#undef  x
+}
+
+void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
+{
+       pr_buf(out, "inum: %llu ", inode->bi_inum);
+       __bch2_inode_unpacked_to_text(out, inode);
+}
+
 void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
                       struct bkey_s_c k)
 {
@@ -382,12 +398,7 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
                return;
        }
 
-       pr_buf(out, "mode: %o ", unpacked.bi_mode);
-
-#define x(_name, _bits)                                                \
-       pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
-       BCH_INODE_FIELDS()
-#undef  x
+       __bch2_inode_unpacked_to_text(out, &unpacked);
 }
 
 const char *bch2_inode_generation_invalid(const struct bch_fs *c,
index 2cb081ae44d92f1c0c589528836397f4a9e967f9..d67af4f56f05b1f7e366e5f4bb036efd54751ffe 100644 (file)
@@ -55,6 +55,8 @@ void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
                     const struct bch_inode_unpacked *);
 int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
 
+void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
+
 struct btree_iter *bch2_inode_peek(struct btree_trans *,
                        struct bch_inode_unpacked *, u64, unsigned);
 int bch2_inode_write(struct btree_trans *, struct btree_iter *,
index a59b291a4806ce799bc8cf349362400adb710827..4585a4036f1b9948aff47f5d83f2266a98d6c977 100644 (file)
@@ -2284,6 +2284,15 @@ retry:
                unsigned bytes, sectors, offset_into_extent;
                enum btree_id data_btree = BTREE_ID_extents;
 
+               /*
+                * read_extent -> io_time_reset may cause a transaction restart
+                * without returning an error, we need to check for that here:
+                */
+               if (!bch2_trans_relock(&trans)) {
+                       ret = -EINTR;
+                       break;
+               }
+
                bch2_btree_iter_set_pos(iter,
                                POS(inode, bvec_iter.bi_sector));
 
index 84e224fb0d019c2e2eb02ded749279b4c53860cc..afb72648fe5416a0cba73f59548f1933ad78fc86 100644 (file)
@@ -39,6 +39,20 @@ static void drop_alloc_keys(struct journal_keys *keys)
        keys->nr = dst;
 }
 
+/*
+ * Btree node pointers have a field to stack a pointer to the in memory btree
+ * node; we need to zero out this field when reading in btree nodes, or when
+ * reading in keys from the journal:
+ */
+static void zero_out_btree_mem_ptr(struct journal_keys *keys)
+{
+       struct journal_key *i;
+
+       for (i = keys->d; i < keys->d + keys->nr; i++)
+               if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
+                       bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
+}
+
 /* iterate over keys read from the journal: */
 
 static int __journal_key_cmp(enum btree_id     l_btree_id,
@@ -1072,6 +1086,8 @@ use_clean:
                drop_alloc_keys(&c->journal_keys);
        }
 
+       zero_out_btree_mem_ptr(&c->journal_keys);
+
        ret = journal_replay_early(c, clean, &c->journal_entries);
        if (ret)
                goto err;