]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/backpointers.c
Update bcachefs sources to 70fa0c1ff4 fixup! bcachefs: Btree key cache improvements
[bcachefs-tools-debian] / libbcachefs / backpointers.c
index 6c74b3a520c1e5d16d0ba17859e4932258252744..dace68e208b670aa05cc3a4400155cac4a385967 100644 (file)
@@ -1,11 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
+#include "bbpos.h"
 #include "alloc_background.h"
 #include "backpointers.h"
 #include "btree_cache.h"
 #include "btree_update.h"
 #include "error.h"
 
+#include <linux/mm.h>
+
 #define MAX_EXTENT_COMPRESS_RATIO_SHIFT                10
 
 /*
@@ -29,10 +32,15 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
                                           u64 bucket_offset)
 {
        struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
+       struct bpos ret;
+
+       ret = POS(bucket.inode,
+                 (bucket_to_sector(ca, bucket.offset) <<
+                  MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+
+       BUG_ON(bkey_cmp(bucket, bp_pos_to_bucket(c, ret)));
 
-       return POS(bucket.inode,
-                  (bucket_to_sector(ca, bucket.offset) <<
-                   MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+       return ret;
 }
 
 void bch2_extent_ptr_to_bp(struct bch_fs *c,
@@ -406,20 +414,24 @@ err:
 int bch2_get_next_backpointer(struct btree_trans *trans,
                              struct bpos bucket, int gen,
                              u64 *bp_offset,
-                             struct bch_backpointer *dst)
+                             struct bch_backpointer *dst,
+                             unsigned iter_flags)
 {
        struct bch_fs *c = trans->c;
-       struct bpos bp_pos =
-               bucket_pos_to_bp(c, bucket,
-                               max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
-       struct bpos bp_end_pos =
-               bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
+       struct bpos bp_pos, bp_end_pos;
        struct btree_iter alloc_iter, bp_iter = { NULL };
        struct bkey_s_c k;
        struct bkey_s_c_alloc_v4 a;
        size_t i;
        int ret;
 
+       if (*bp_offset == U64_MAX)
+               return 0;
+
+       bp_pos = bucket_pos_to_bp(c, bucket,
+                                 max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
+       bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
+
        bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
                             bucket, BTREE_ITER_CACHED);
        k = bch2_btree_iter_peek_slot(&alloc_iter);
@@ -443,8 +455,8 @@ int bch2_get_next_backpointer(struct btree_trans *trans,
                goto out;
        }
 
-       for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers,
-                          bp_pos, 0, k, ret) {
+       for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
+                                    bp_pos, 0, k, ret) {
                if (bpos_cmp(k.k->p, bp_end_pos) >= 0)
                        break;
 
@@ -492,7 +504,7 @@ static void backpointer_not_found(struct btree_trans *trans,
        prt_printf(&buf, "\n  ");
        bch2_bkey_val_to_text(&buf, c, k);
        if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
-               bch_err(c, "%s", buf.buf);
+               bch_err_ratelimited(c, "%s", buf.buf);
        else
                bch2_trans_inconsistent(trans, "%s", buf.buf);
 
@@ -523,12 +535,32 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
        if (bp.level == c->btree_roots[bp.btree_id].level + 1)
                k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
 
-       if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+       if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
                return k;
 
-       backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
-
        bch2_trans_iter_exit(trans, iter);
+
+       if (bp.level) {
+               struct btree *b;
+
+               /*
+                * If a backpointer for a btree node wasn't found, it may be
+                * because it was overwritten by a new btree node that hasn't
+                * been written out yet - backpointer_get_node() checks for
+                * this:
+                */
+               b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
+               if (!IS_ERR_OR_NULL(b))
+                       return bkey_i_to_s_c(&b->key);
+
+               bch2_trans_iter_exit(trans, iter);
+
+               if (IS_ERR(b))
+                       return bkey_s_c_err(PTR_ERR(b));
+               return bkey_s_c_null;
+       }
+
+       backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
        return bkey_s_c_null;
 }
 
@@ -540,7 +572,6 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree *b;
-       struct bkey_s_c k;
 
        BUG_ON(!bp.level);
 
@@ -551,40 +582,36 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
                                  bp.level - 1,
                                  0);
        b = bch2_btree_iter_peek_node(iter);
-       if (IS_ERR(b)) {
-               bch2_trans_iter_exit(trans, iter);
-               return b;
-       }
+       if (IS_ERR(b))
+               goto err;
 
-       if (extent_matches_bp(c, bp.btree_id, bp.level,
-                             bkey_i_to_s_c(&b->key),
-                             bucket, bp))
+       if (b && extent_matches_bp(c, bp.btree_id, bp.level,
+                                  bkey_i_to_s_c(&b->key),
+                                  bucket, bp))
                return b;
 
-       if (!btree_node_will_make_reachable(b))
-               backpointer_not_found(trans, bucket, bp_offset,
-                                     bp, k, "btree node");
-
+       if (b && btree_node_will_make_reachable(b)) {
+               b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
+       } else {
+               backpointer_not_found(trans, bucket, bp_offset, bp,
+                                     bkey_i_to_s_c(&b->key), "btree node");
+               b = NULL;
+       }
+err:
        bch2_trans_iter_exit(trans, iter);
-       return NULL;
+       return b;
 }
 
-static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter)
+static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
+                                       struct bkey_s_c k)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter alloc_iter = { NULL };
        struct bch_dev *ca;
-       struct bkey_s_c k, alloc_k;
+       struct bkey_s_c alloc_k;
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       k = bch2_btree_iter_peek(bp_iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-       if (!k.k)
-               return 0;
-
        if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
                        "backpointer for mising device:\n%s",
                        (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
@@ -619,31 +646,22 @@ fsck_err:
 /* verify that every backpointer has a corresponding alloc key */
 int bch2_check_btree_backpointers(struct bch_fs *c)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c, 0, 0);
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0);
-
-       do {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_LAZY_RW|
-                                     BTREE_INSERT_NOFAIL,
-                                     bch2_check_btree_backpointer(&trans, &iter));
-               if (ret)
-                       break;
-       } while (bch2_btree_iter_advance(&iter));
+       struct bkey_s_c k;
 
-       bch2_trans_iter_exit(&trans, &iter);
-       bch2_trans_exit(&trans);
-       return ret;
+       return bch2_trans_run(c,
+               for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_backpointers, POS_MIN, 0, k,
+                       NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+                 bch2_check_btree_backpointer(&trans, &iter, k)));
 }
 
 static int check_bp_exists(struct btree_trans *trans,
                           struct bpos bucket_pos,
                           struct bch_backpointer bp,
-                          struct bkey_s_c orig_k)
+                          struct bkey_s_c orig_k,
+                          struct bpos bucket_start,
+                          struct bpos bucket_end)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter alloc_iter, bp_iter = { NULL };
@@ -651,6 +669,10 @@ static int check_bp_exists(struct btree_trans *trans,
        struct bkey_s_c alloc_k, bp_k;
        int ret;
 
+       if (bpos_cmp(bucket_pos, bucket_start) < 0 ||
+           bpos_cmp(bucket_pos, bucket_end) > 0)
+               return 0;
+
        bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
        alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
        ret = bkey_err(alloc_k);
@@ -713,7 +735,9 @@ missing:
 }
 
 static int check_extent_to_backpointers(struct btree_trans *trans,
-                                       struct btree_iter *iter)
+                                       struct btree_iter *iter,
+                                       struct bpos bucket_start,
+                                       struct bpos bucket_end)
 {
        struct bch_fs *c = trans->c;
        struct bkey_ptrs_c ptrs;
@@ -740,7 +764,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
                bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
                                      k, p, &bucket_pos, &bp);
 
-               ret = check_bp_exists(trans, bucket_pos, bp, k);
+               ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
                if (ret)
                        return ret;
        }
@@ -749,7 +773,9 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
 }
 
 static int check_btree_root_to_backpointers(struct btree_trans *trans,
-                                           enum btree_id btree_id)
+                                           enum btree_id btree_id,
+                                           struct bpos bucket_start,
+                                           struct bpos bucket_end)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
@@ -781,7 +807,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
                bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
                                      k, p, &bucket_pos, &bp);
 
-               ret = check_bp_exists(trans, bucket_pos, bp, k);
+               ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
                if (ret)
                        goto err;
        }
@@ -790,62 +816,226 @@ err:
        return ret;
 }
 
-int bch2_check_extents_to_backpointers(struct bch_fs *c)
+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
+{
+       return (struct bbpos) {
+               .btree  = bp.btree_id,
+               .pos    = bp.pos,
+       };
+}
+
+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+{
+       struct sysinfo i;
+       u64 mem_bytes;
+
+       si_meminfo(&i);
+       mem_bytes = i.totalram * i.mem_unit;
+       return (mem_bytes >> 1) / btree_bytes(c);
+}
+
+int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
+                                unsigned btree_leaf_mask,
+                                unsigned btree_interior_mask,
+                                struct bbpos start, struct bbpos *end)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
+       enum btree_id btree;
+       int ret = 0;
+
+       for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
+               unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+
+               if (!((1U << btree) & btree_leaf_mask) &&
+                   !((1U << btree) & btree_interior_mask))
+                       continue;
+
+               bch2_trans_node_iter_init(trans, &iter, btree,
+                                         btree == start.btree ? start.pos : POS_MIN,
+                                         0, depth, 0);
+               /*
+                * for_each_btree_key_contineu() doesn't check the return value
+                * from bch2_btree_iter_advance(), which is needed when
+                * iterating over interior nodes where we'll see keys at
+                * SPOS_MAX:
+                */
+               do {
+                       k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
+                       ret = bkey_err(k);
+                       if (!k.k || ret)
+                               break;
+
+                       --btree_nodes;
+                       if (!btree_nodes) {
+                               *end = BBPOS(btree, k.k->p);
+                               bch2_trans_iter_exit(trans, &iter);
+                               return 0;
+                       }
+               } while (bch2_btree_iter_advance(&iter));
+               bch2_trans_iter_exit(trans, &iter);
+       }
+
+       *end = BBPOS_MAX;
+       return ret;
+}
+
+static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
+                                                  struct bpos bucket_start,
+                                                  struct bpos bucket_end)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        enum btree_id btree_id;
        int ret = 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
        for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
-               bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0,
-                                         0,
+               unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
+
+               bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
+                                         depth,
                                          BTREE_ITER_ALL_LEVELS|
                                          BTREE_ITER_PREFETCH);
 
                do {
-                       ret = __bch2_trans_do(&trans, NULL, NULL,
-                                             BTREE_INSERT_LAZY_RW|
-                                             BTREE_INSERT_NOFAIL,
-                                             check_extent_to_backpointers(&trans, &iter));
+                       ret = commit_do(trans, NULL, NULL,
+                                       BTREE_INSERT_LAZY_RW|
+                                       BTREE_INSERT_NOFAIL,
+                                       check_extent_to_backpointers(trans, &iter,
+                                                               bucket_start, bucket_end));
                        if (ret)
                                break;
                } while (!bch2_btree_iter_advance(&iter));
 
-               bch2_trans_iter_exit(&trans, &iter);
+               bch2_trans_iter_exit(trans, &iter);
 
                if (ret)
                        break;
 
-               ret = __bch2_trans_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_LAZY_RW|
-                                     BTREE_INSERT_NOFAIL,
-                                     check_btree_root_to_backpointers(&trans, btree_id));
+               ret = commit_do(trans, NULL, NULL,
+                               BTREE_INSERT_LAZY_RW|
+                               BTREE_INSERT_NOFAIL,
+                               check_btree_root_to_backpointers(trans, btree_id,
+                                                       bucket_start, bucket_end));
                if (ret)
                        break;
        }
+       return ret;
+}
+
+int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
+                                struct bpos start, struct bpos *end)
+{
+       struct btree_iter alloc_iter;
+       struct btree_iter bp_iter;
+       struct bkey_s_c alloc_k, bp_k;
+       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
+       bool alloc_end = false, bp_end = false;
+       int ret = 0;
+
+       bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+                                 start, 0, 1, 0);
+       bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+                                 bucket_pos_to_bp(trans->c, start, 0), 0, 1, 0);
+       while (1) {
+               alloc_k = !alloc_end
+                       ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
+                       : bkey_s_c_null;
+               bp_k = !bp_end
+                       ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
+                       : bkey_s_c_null;
+
+               ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
+               if ((!alloc_k.k && !bp_k.k) || ret) {
+                       *end = SPOS_MAX;
+                       break;
+               }
+
+               --btree_nodes;
+               if (!btree_nodes) {
+                       *end = alloc_k.k->p;
+                       break;
+               }
+
+               if (bpos_cmp(alloc_iter.pos, SPOS_MAX) &&
+                   bpos_cmp(bucket_pos_to_bp(trans->c, alloc_iter.pos, 0), bp_iter.pos) < 0) {
+                       if (!bch2_btree_iter_advance(&alloc_iter))
+                               alloc_end = true;
+               } else {
+                       if (!bch2_btree_iter_advance(&bp_iter))
+                               bp_end = true;
+               }
+       }
+       bch2_trans_iter_exit(trans, &bp_iter);
+       bch2_trans_iter_exit(trans, &alloc_iter);
+       return ret;
+}
+
+int bch2_check_extents_to_backpointers(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct bpos start = POS_MIN, end;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       while (1) {
+               ret = bch2_get_alloc_in_memory_pos(&trans, start, &end);
+               if (ret)
+                       break;
+
+               if (!bpos_cmp(start, POS_MIN) && bpos_cmp(end, SPOS_MAX))
+                       bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
+                                   __func__, btree_nodes_fit_in_ram(c));
+
+               if (bpos_cmp(start, POS_MIN) || bpos_cmp(end, SPOS_MAX)) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "check_extents_to_backpointers(): ");
+                       bch2_bpos_to_text(&buf, start);
+                       prt_str(&buf, "-");
+                       bch2_bpos_to_text(&buf, end);
+
+                       bch_verbose(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+               }
+
+               ret = bch2_check_extents_to_backpointers_pass(&trans, start, end);
+               if (ret || !bpos_cmp(end, SPOS_MAX))
+                       break;
+
+               start = bpos_successor(end);
+       }
        bch2_trans_exit(&trans);
+
        return ret;
 }
 
 static int check_one_backpointer(struct btree_trans *trans,
                                 struct bpos bucket,
-                                u64 *bp_offset)
+                                u64 *bp_offset,
+                                struct bbpos start,
+                                struct bbpos end)
 {
        struct btree_iter iter;
        struct bch_backpointer bp;
+       struct bbpos pos;
        struct bkey_s_c k;
        struct printbuf buf = PRINTBUF;
        int ret;
 
-       ret = bch2_get_next_backpointer(trans, bucket, -1,
-                                       bp_offset, &bp);
+       ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp, 0);
        if (ret || *bp_offset == U64_MAX)
                return ret;
 
+       pos = bp_to_bbpos(bp);
+       if (bbpos_cmp(pos, start) < 0 ||
+           bbpos_cmp(pos, end) > 0)
+               return 0;
+
        k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
        ret = bkey_err(k);
+       if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+               return 0;
        if (ret)
                return ret;
 
@@ -864,29 +1054,73 @@ fsck_err:
        return ret;
 }
 
-int bch2_check_backpointers_to_extents(struct bch_fs *c)
+static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
+                                                  struct bbpos start,
+                                                  struct bbpos end)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
-       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+       for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
                u64 bp_offset = 0;
 
-               while (!(ret = __bch2_trans_do(&trans, NULL, NULL,
-                                              BTREE_INSERT_LAZY_RW|
-                                              BTREE_INSERT_NOFAIL,
-                               check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
+               while (!(ret = commit_do(trans, NULL, NULL,
+                                        BTREE_INSERT_LAZY_RW|
+                                        BTREE_INSERT_NOFAIL,
+                               check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) &&
                       bp_offset < U64_MAX)
                        bp_offset++;
 
                if (ret)
                        break;
        }
-       bch2_trans_iter_exit(&trans, &iter);
-       bch2_trans_exit(&trans);
+       bch2_trans_iter_exit(trans, &iter);
        return ret < 0 ? ret : 0;
 }
+
+int bch2_check_backpointers_to_extents(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       while (1) {
+               ret = bch2_get_btree_in_memory_pos(&trans,
+                                                  (1U << BTREE_ID_extents)|
+                                                  (1U << BTREE_ID_reflink),
+                                                  ~0,
+                                                  start, &end);
+               if (ret)
+                       break;
+
+               if (!bbpos_cmp(start, BBPOS_MIN) &&
+                   bbpos_cmp(end, BBPOS_MAX))
+                       bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
+                                   __func__, btree_nodes_fit_in_ram(c));
+
+               if (bbpos_cmp(start, BBPOS_MIN) ||
+                   bbpos_cmp(end, BBPOS_MAX)) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "check_backpointers_to_extents(): ");
+                       bch2_bbpos_to_text(&buf, start);
+                       prt_str(&buf, "-");
+                       bch2_bbpos_to_text(&buf, end);
+
+                       bch_verbose(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+               }
+
+               ret = bch2_check_backpointers_to_extents_pass(&trans, start, end);
+               if (ret || !bbpos_cmp(end, BBPOS_MAX))
+                       break;
+
+               start = bbpos_successor(end);
+       }
+       bch2_trans_exit(&trans);
+
+       return ret;
+}