]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/backpointers.c
Update bcachefs sources to 70fa0c1ff4 fixup! bcachefs: Btree key cache improvements
[bcachefs-tools-debian] / libbcachefs / backpointers.c
index 029b1ec14283e1f6950a856d89d59acf491edcea..dace68e208b670aa05cc3a4400155cac4a385967 100644 (file)
@@ -1,11 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
+#include "bbpos.h"
 #include "alloc_background.h"
 #include "backpointers.h"
 #include "btree_cache.h"
 #include "btree_update.h"
 #include "error.h"
 
+#include <linux/mm.h>
+
 #define MAX_EXTENT_COMPRESS_RATIO_SHIFT                10
 
 /*
@@ -29,10 +32,15 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
                                           u64 bucket_offset)
 {
        struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
+       struct bpos ret;
+
+       ret = POS(bucket.inode,
+                 (bucket_to_sector(ca, bucket.offset) <<
+                  MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
 
-       return POS(bucket.inode,
-                  (bucket_to_sector(ca, bucket.offset) <<
-                   MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+       BUG_ON(bkey_cmp(bucket, bp_pos_to_bucket(c, ret)));
+
+       return ret;
 }
 
 void bch2_extent_ptr_to_bp(struct bch_fs *c,
@@ -406,20 +414,24 @@ err:
 int bch2_get_next_backpointer(struct btree_trans *trans,
                              struct bpos bucket, int gen,
                              u64 *bp_offset,
-                             struct bch_backpointer *dst)
+                             struct bch_backpointer *dst,
+                             unsigned iter_flags)
 {
        struct bch_fs *c = trans->c;
-       struct bpos bp_pos =
-               bucket_pos_to_bp(c, bucket,
-                               max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
-       struct bpos bp_end_pos =
-               bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
+       struct bpos bp_pos, bp_end_pos;
        struct btree_iter alloc_iter, bp_iter = { NULL };
        struct bkey_s_c k;
        struct bkey_s_c_alloc_v4 a;
        size_t i;
        int ret;
 
+       if (*bp_offset == U64_MAX)
+               return 0;
+
+       bp_pos = bucket_pos_to_bp(c, bucket,
+                                 max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
+       bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
+
        bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
                             bucket, BTREE_ITER_CACHED);
        k = bch2_btree_iter_peek_slot(&alloc_iter);
@@ -523,20 +535,28 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
        if (bp.level == c->btree_roots[bp.btree_id].level + 1)
                k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
 
-       if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+       if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
                return k;
 
        bch2_trans_iter_exit(trans, iter);
 
        if (bp.level) {
+               struct btree *b;
+
                /*
                 * If a backpointer for a btree node wasn't found, it may be
                 * because it was overwritten by a new btree node that hasn't
                 * been written out yet - backpointer_get_node() checks for
                 * this:
                 */
-               bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
+               b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
+               if (!IS_ERR_OR_NULL(b))
+                       return bkey_i_to_s_c(&b->key);
+
                bch2_trans_iter_exit(trans, iter);
+
+               if (IS_ERR(b))
+                       return bkey_s_c_err(PTR_ERR(b));
                return bkey_s_c_null;
        }
 
@@ -565,12 +585,12 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
        if (IS_ERR(b))
                goto err;
 
-       if (extent_matches_bp(c, bp.btree_id, bp.level,
-                             bkey_i_to_s_c(&b->key),
-                             bucket, bp))
+       if (b && extent_matches_bp(c, bp.btree_id, bp.level,
+                                  bkey_i_to_s_c(&b->key),
+                                  bucket, bp))
                return b;
 
-       if (btree_node_will_make_reachable(b)) {
+       if (b && btree_node_will_make_reachable(b)) {
                b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
        } else {
                backpointer_not_found(trans, bucket, bp_offset, bp,
@@ -639,7 +659,9 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
 static int check_bp_exists(struct btree_trans *trans,
                           struct bpos bucket_pos,
                           struct bch_backpointer bp,
-                          struct bkey_s_c orig_k)
+                          struct bkey_s_c orig_k,
+                          struct bpos bucket_start,
+                          struct bpos bucket_end)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter alloc_iter, bp_iter = { NULL };
@@ -647,6 +669,10 @@ static int check_bp_exists(struct btree_trans *trans,
        struct bkey_s_c alloc_k, bp_k;
        int ret;
 
+       if (bpos_cmp(bucket_pos, bucket_start) < 0 ||
+           bpos_cmp(bucket_pos, bucket_end) > 0)
+               return 0;
+
        bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
        alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
        ret = bkey_err(alloc_k);
@@ -709,7 +735,9 @@ missing:
 }
 
 static int check_extent_to_backpointers(struct btree_trans *trans,
-                                       struct btree_iter *iter)
+                                       struct btree_iter *iter,
+                                       struct bpos bucket_start,
+                                       struct bpos bucket_end)
 {
        struct bch_fs *c = trans->c;
        struct bkey_ptrs_c ptrs;
@@ -736,7 +764,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
                bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
                                      k, p, &bucket_pos, &bp);
 
-               ret = check_bp_exists(trans, bucket_pos, bp, k);
+               ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
                if (ret)
                        return ret;
        }
@@ -745,7 +773,9 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
 }
 
 static int check_btree_root_to_backpointers(struct btree_trans *trans,
-                                           enum btree_id btree_id)
+                                           enum btree_id btree_id,
+                                           struct bpos bucket_start,
+                                           struct bpos bucket_end)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
@@ -777,7 +807,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
                bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
                                      k, p, &bucket_pos, &bp);
 
-               ret = check_bp_exists(trans, bucket_pos, bp, k);
+               ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
                if (ret)
                        goto err;
        }
@@ -786,60 +816,222 @@ err:
        return ret;
 }
 
-int bch2_check_extents_to_backpointers(struct bch_fs *c)
+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
+{
+       return (struct bbpos) {
+               .btree  = bp.btree_id,
+               .pos    = bp.pos,
+       };
+}
+
+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+{
+       struct sysinfo i;
+       u64 mem_bytes;
+
+       si_meminfo(&i);
+       mem_bytes = i.totalram * i.mem_unit;
+       return (mem_bytes >> 1) / btree_bytes(c);
+}
+
+int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
+                                unsigned btree_leaf_mask,
+                                unsigned btree_interior_mask,
+                                struct bbpos start, struct bbpos *end)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
+       enum btree_id btree;
+       int ret = 0;
+
+       for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
+               unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+
+               if (!((1U << btree) & btree_leaf_mask) &&
+                   !((1U << btree) & btree_interior_mask))
+                       continue;
+
+               bch2_trans_node_iter_init(trans, &iter, btree,
+                                         btree == start.btree ? start.pos : POS_MIN,
+                                         0, depth, 0);
+               /*
+                * for_each_btree_key_contineu() doesn't check the return value
+                * from bch2_btree_iter_advance(), which is needed when
+                * iterating over interior nodes where we'll see keys at
+                * SPOS_MAX:
+                */
+               do {
+                       k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
+                       ret = bkey_err(k);
+                       if (!k.k || ret)
+                               break;
+
+                       --btree_nodes;
+                       if (!btree_nodes) {
+                               *end = BBPOS(btree, k.k->p);
+                               bch2_trans_iter_exit(trans, &iter);
+                               return 0;
+                       }
+               } while (bch2_btree_iter_advance(&iter));
+               bch2_trans_iter_exit(trans, &iter);
+       }
+
+       *end = BBPOS_MAX;
+       return ret;
+}
+
+static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
+                                                  struct bpos bucket_start,
+                                                  struct bpos bucket_end)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        enum btree_id btree_id;
        int ret = 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
        for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
-               bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0,
-                                         0,
+               unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
+
+               bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
+                                         depth,
                                          BTREE_ITER_ALL_LEVELS|
                                          BTREE_ITER_PREFETCH);
 
                do {
-                       ret = commit_do(&trans, NULL, NULL,
-                                             BTREE_INSERT_LAZY_RW|
-                                             BTREE_INSERT_NOFAIL,
-                                             check_extent_to_backpointers(&trans, &iter));
+                       ret = commit_do(trans, NULL, NULL,
+                                       BTREE_INSERT_LAZY_RW|
+                                       BTREE_INSERT_NOFAIL,
+                                       check_extent_to_backpointers(trans, &iter,
+                                                               bucket_start, bucket_end));
                        if (ret)
                                break;
                } while (!bch2_btree_iter_advance(&iter));
 
-               bch2_trans_iter_exit(&trans, &iter);
+               bch2_trans_iter_exit(trans, &iter);
 
                if (ret)
                        break;
 
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_LAZY_RW|
-                                     BTREE_INSERT_NOFAIL,
-                                     check_btree_root_to_backpointers(&trans, btree_id));
+               ret = commit_do(trans, NULL, NULL,
+                               BTREE_INSERT_LAZY_RW|
+                               BTREE_INSERT_NOFAIL,
+                               check_btree_root_to_backpointers(trans, btree_id,
+                                                       bucket_start, bucket_end));
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
+int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
+                                struct bpos start, struct bpos *end)
+{
+       struct btree_iter alloc_iter;
+       struct btree_iter bp_iter;
+       struct bkey_s_c alloc_k, bp_k;
+       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
+       bool alloc_end = false, bp_end = false;
+       int ret = 0;
+
+       bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+                                 start, 0, 1, 0);
+       bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+                                 bucket_pos_to_bp(trans->c, start, 0), 0, 1, 0);
+       while (1) {
+               alloc_k = !alloc_end
+                       ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
+                       : bkey_s_c_null;
+               bp_k = !bp_end
+                       ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
+                       : bkey_s_c_null;
+
+               ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
+               if ((!alloc_k.k && !bp_k.k) || ret) {
+                       *end = SPOS_MAX;
+                       break;
+               }
+
+               --btree_nodes;
+               if (!btree_nodes) {
+                       *end = alloc_k.k->p;
+                       break;
+               }
+
+               if (bpos_cmp(alloc_iter.pos, SPOS_MAX) &&
+                   bpos_cmp(bucket_pos_to_bp(trans->c, alloc_iter.pos, 0), bp_iter.pos) < 0) {
+                       if (!bch2_btree_iter_advance(&alloc_iter))
+                               alloc_end = true;
+               } else {
+                       if (!bch2_btree_iter_advance(&bp_iter))
+                               bp_end = true;
+               }
+       }
+       bch2_trans_iter_exit(trans, &bp_iter);
+       bch2_trans_iter_exit(trans, &alloc_iter);
+       return ret;
+}
+
+int bch2_check_extents_to_backpointers(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct bpos start = POS_MIN, end;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       while (1) {
+               ret = bch2_get_alloc_in_memory_pos(&trans, start, &end);
                if (ret)
                        break;
+
+               if (!bpos_cmp(start, POS_MIN) && bpos_cmp(end, SPOS_MAX))
+                       bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
+                                   __func__, btree_nodes_fit_in_ram(c));
+
+               if (bpos_cmp(start, POS_MIN) || bpos_cmp(end, SPOS_MAX)) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "check_extents_to_backpointers(): ");
+                       bch2_bpos_to_text(&buf, start);
+                       prt_str(&buf, "-");
+                       bch2_bpos_to_text(&buf, end);
+
+                       bch_verbose(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+               }
+
+               ret = bch2_check_extents_to_backpointers_pass(&trans, start, end);
+               if (ret || !bpos_cmp(end, SPOS_MAX))
+                       break;
+
+               start = bpos_successor(end);
        }
        bch2_trans_exit(&trans);
+
        return ret;
 }
 
 static int check_one_backpointer(struct btree_trans *trans,
                                 struct bpos bucket,
-                                u64 *bp_offset)
+                                u64 *bp_offset,
+                                struct bbpos start,
+                                struct bbpos end)
 {
        struct btree_iter iter;
        struct bch_backpointer bp;
+       struct bbpos pos;
        struct bkey_s_c k;
        struct printbuf buf = PRINTBUF;
        int ret;
 
-       ret = bch2_get_next_backpointer(trans, bucket, -1,
-                                       bp_offset, &bp);
+       ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp, 0);
        if (ret || *bp_offset == U64_MAX)
                return ret;
 
+       pos = bp_to_bbpos(bp);
+       if (bbpos_cmp(pos, start) < 0 ||
+           bbpos_cmp(pos, end) > 0)
+               return 0;
+
        k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
        ret = bkey_err(k);
        if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
@@ -862,29 +1054,73 @@ fsck_err:
        return ret;
 }
 
-int bch2_check_backpointers_to_extents(struct bch_fs *c)
+static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
+                                                  struct bbpos start,
+                                                  struct bbpos end)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
-       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+       for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
                u64 bp_offset = 0;
 
-               while (!(ret = commit_do(&trans, NULL, NULL,
-                                              BTREE_INSERT_LAZY_RW|
-                                              BTREE_INSERT_NOFAIL,
-                               check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
+               while (!(ret = commit_do(trans, NULL, NULL,
+                                        BTREE_INSERT_LAZY_RW|
+                                        BTREE_INSERT_NOFAIL,
+                               check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) &&
                       bp_offset < U64_MAX)
                        bp_offset++;
 
                if (ret)
                        break;
        }
-       bch2_trans_iter_exit(&trans, &iter);
-       bch2_trans_exit(&trans);
+       bch2_trans_iter_exit(trans, &iter);
        return ret < 0 ? ret : 0;
 }
+
+int bch2_check_backpointers_to_extents(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       while (1) {
+               ret = bch2_get_btree_in_memory_pos(&trans,
+                                                  (1U << BTREE_ID_extents)|
+                                                  (1U << BTREE_ID_reflink),
+                                                  ~0,
+                                                  start, &end);
+               if (ret)
+                       break;
+
+               if (!bbpos_cmp(start, BBPOS_MIN) &&
+                   bbpos_cmp(end, BBPOS_MAX))
+                       bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
+                                   __func__, btree_nodes_fit_in_ram(c));
+
+               if (bbpos_cmp(start, BBPOS_MIN) ||
+                   bbpos_cmp(end, BBPOS_MAX)) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "check_backpointers_to_extents(): ");
+                       bch2_bbpos_to_text(&buf, start);
+                       prt_str(&buf, "-");
+                       bch2_bbpos_to_text(&buf, end);
+
+                       bch_verbose(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+               }
+
+               ret = bch2_check_backpointers_to_extents_pass(&trans, start, end);
+               if (ret || !bbpos_cmp(end, BBPOS_MAX))
+                       break;
+
+               start = bbpos_successor(end);
+       }
+       bch2_trans_exit(&trans);
+
+       return ret;
+}