]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 2f4e24d856 bcachefs: Split out dev_buckets_free()
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 15 Jun 2022 18:58:36 +0000 (14:58 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Wed, 15 Jun 2022 18:58:36 +0000 (14:58 -0400)
19 files changed:
.bcachefs_revision
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/backpointers.c [new file with mode: 0644]
libbcachefs/backpointers.h [new file with mode: 0644]
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bkey_methods.c
libbcachefs/btree_types.h
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/buckets_types.h
libbcachefs/fsck.c
libbcachefs/move.c
libbcachefs/move.h
libbcachefs/movinggc.c
libbcachefs/recovery.c
libbcachefs/super.c

index 280ed22c3a3ac2a0f9e1f0a738f8017b46410fea..b5a8d6b1bd7875490ef4b6a4f639d8672d5b29ae 100644 (file)
@@ -1 +1 @@
-c76f7e91e8939751ccc96ca2f8f6bfe6dd368d93
+2f4e24d85692600a698d78938a213f27593bda25
index 288018fb4c48d6ac048c8cf8148b89a04aca4cbc..359cb23f037b2c2890e5621745361816997b0b63 100644 (file)
@@ -2,6 +2,7 @@
 #include "bcachefs.h"
 #include "alloc_background.h"
 #include "alloc_foreground.h"
+#include "backpointers.h"
 #include "btree_cache.h"
 #include "btree_io.h"
 #include "btree_key_cache.h"
@@ -37,8 +38,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
 
 struct bkey_alloc_unpacked {
        u64             journal_seq;
-       u64             bucket;
-       u8              dev;
        u8              gen;
        u8              oldest_gen;
        u8              data_type;
@@ -194,11 +193,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
 
 static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
 {
-       struct bkey_alloc_unpacked ret = {
-               .dev    = k.k->p.inode,
-               .bucket = k.k->p.offset,
-               .gen    = 0,
-       };
+       struct bkey_alloc_unpacked ret = { .gen = 0 };
 
        switch (k.k->type) {
        case KEY_TYPE_alloc:
@@ -215,48 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
        return ret;
 }
 
-void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
-{
-       if (k.k->type == KEY_TYPE_alloc_v4) {
-               *out = *bkey_s_c_to_alloc_v4(k).v;
-       } else {
-               struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
-
-               *out = (struct bch_alloc_v4) {
-                       .journal_seq            = u.journal_seq,
-                       .flags                  = u.need_discard,
-                       .gen                    = u.gen,
-                       .oldest_gen             = u.oldest_gen,
-                       .data_type              = u.data_type,
-                       .stripe_redundancy      = u.stripe_redundancy,
-                       .dirty_sectors          = u.dirty_sectors,
-                       .cached_sectors         = u.cached_sectors,
-                       .io_time[READ]          = u.read_time,
-                       .io_time[WRITE]         = u.write_time,
-                       .stripe                 = u.stripe,
-               };
-       }
-}
-
-struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
-{
-       struct bkey_i_alloc_v4 *ret;
-
-       if (k.k->type == KEY_TYPE_alloc_v4) {
-               ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-               if (!IS_ERR(ret))
-                       bkey_reassemble(&ret->k_i, k);
-       } else {
-               ret = bch2_trans_kmalloc(trans, sizeof(*ret));
-               if (!IS_ERR(ret)) {
-                       bkey_alloc_v4_init(&ret->k_i);
-                       ret->k.p = k.k->p;
-                       bch2_alloc_to_v4(k, &ret->v);
-               }
-       }
-       return ret;
-}
-
 struct bkey_i_alloc_v4 *
 bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
                              struct bpos pos)
@@ -339,9 +292,15 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
 {
        struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
 
-       if (bkey_val_bytes(k.k) != sizeof(struct bch_alloc_v4)) {
-               prt_printf(err, "bad val size (%zu != %zu)",
-                      bkey_val_bytes(k.k), sizeof(struct bch_alloc_v4));
+       if (alloc_v4_u64s(a.v) != bkey_val_u64s(k.k)) {
+               prt_printf(err, "bad val size (%lu != %u)",
+                      bkey_val_u64s(k.k), alloc_v4_u64s(a.v));
+               return -EINVAL;
+       }
+
+       if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
+           BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) {
+               prt_printf(err, "invalid backpointers_start");
                return -EINVAL;
        }
 
@@ -401,9 +360,19 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
        return 0;
 }
 
+static inline u64 swab40(u64 x)
+{
+       return (((x & 0x00000000ffULL) << 32)|
+               ((x & 0x000000ff00ULL) << 16)|
+               ((x & 0x0000ff0000ULL) >>  0)|
+               ((x & 0x00ff000000ULL) >> 16)|
+               ((x & 0xff00000000ULL) >> 32));
+}
+
 void bch2_alloc_v4_swab(struct bkey_s k)
 {
        struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
+       struct bch_backpointer *bp, *bps;
 
        a->journal_seq          = swab64(a->journal_seq);
        a->flags                = swab32(a->flags);
@@ -413,25 +382,135 @@ void bch2_alloc_v4_swab(struct bkey_s k)
        a->io_time[1]           = swab64(a->io_time[1]);
        a->stripe               = swab32(a->stripe);
        a->nr_external_backpointers = swab32(a->nr_external_backpointers);
+
+       bps = alloc_v4_backpointers(a);
+       for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
+               bp->bucket_offset       = swab40(bp->bucket_offset);
+               bp->bucket_len          = swab32(bp->bucket_len);
+               bch2_bpos_swab(&bp->pos);
+       }
 }
 
 void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
 {
-       struct bch_alloc_v4 a;
+       struct bch_alloc_v4 _a;
+       const struct bch_alloc_v4 *a = &_a;
+       const struct bch_backpointer *bps;
+       unsigned i;
 
-       bch2_alloc_to_v4(k, &a);
+       if (k.k->type == KEY_TYPE_alloc_v4)
+               a = bkey_s_c_to_alloc_v4(k).v;
+       else
+               bch2_alloc_to_v4(k, &_a);
+
+       prt_newline(out);
+       printbuf_indent_add(out, 2);
+
+       prt_printf(out, "gen %u oldest_gen %u data_type %s",
+              a->gen, a->oldest_gen, bch2_data_types[a->data_type]);
+       prt_newline(out);
+       prt_printf(out, "journal_seq       %llu",       a->journal_seq);
+       prt_newline(out);
+       prt_printf(out, "need_discard      %llu",       BCH_ALLOC_V4_NEED_DISCARD(a));
+       prt_newline(out);
+       prt_printf(out, "need_inc_gen      %llu",       BCH_ALLOC_V4_NEED_INC_GEN(a));
+       prt_newline(out);
+       prt_printf(out, "dirty_sectors     %u", a->dirty_sectors);
+       prt_newline(out);
+       prt_printf(out, "cached_sectors    %u", a->cached_sectors);
+       prt_newline(out);
+       prt_printf(out, "stripe            %u", a->stripe);
+       prt_newline(out);
+       prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy);
+       prt_newline(out);
+       prt_printf(out, "io_time[READ]     %llu",       a->io_time[READ]);
+       prt_newline(out);
+       prt_printf(out, "io_time[WRITE]    %llu",       a->io_time[WRITE]);
+       prt_newline(out);
+       prt_printf(out, "backpointers:     %llu",       BCH_ALLOC_V4_NR_BACKPOINTERS(a));
+       printbuf_indent_add(out, 2);
+
+       bps = alloc_v4_backpointers_c(a);
+       for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a); i++) {
+               prt_newline(out);
+               bch2_backpointer_to_text(out, &bps[i]);
+       }
 
-       prt_printf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu",
-              a.gen, a.oldest_gen, bch2_data_types[a.data_type],
-              a.journal_seq,
-              BCH_ALLOC_V4_NEED_DISCARD(&a),
-              BCH_ALLOC_V4_NEED_INC_GEN(&a));
-       prt_printf(out, " dirty_sectors %u",    a.dirty_sectors);
-       prt_printf(out, " cached_sectors %u",   a.cached_sectors);
-       prt_printf(out, " stripe %u",           a.stripe);
-       prt_printf(out, " stripe_redundancy %u",        a.stripe_redundancy);
-       prt_printf(out, " read_time %llu",              a.io_time[READ]);
-       prt_printf(out, " write_time %llu",             a.io_time[WRITE]);
+       printbuf_indent_sub(out, 4);
+}
+
+void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
+{
+       if (k.k->type == KEY_TYPE_alloc_v4) {
+               int d;
+
+               *out = *bkey_s_c_to_alloc_v4(k).v;
+
+               d = (int) BCH_ALLOC_V4_U64s -
+                       (int) (BCH_ALLOC_V4_BACKPOINTERS_START(out) ?: BCH_ALLOC_V4_U64s_V0);
+               if (unlikely(d > 0)) {
+                       memset((u64 *) out + BCH_ALLOC_V4_BACKPOINTERS_START(out),
+                              0,
+                              d * sizeof(u64));
+                       SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+               }
+       } else {
+               struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
+
+               *out = (struct bch_alloc_v4) {
+                       .journal_seq            = u.journal_seq,
+                       .flags                  = u.need_discard,
+                       .gen                    = u.gen,
+                       .oldest_gen             = u.oldest_gen,
+                       .data_type              = u.data_type,
+                       .stripe_redundancy      = u.stripe_redundancy,
+                       .dirty_sectors          = u.dirty_sectors,
+                       .cached_sectors         = u.cached_sectors,
+                       .io_time[READ]          = u.read_time,
+                       .io_time[WRITE]         = u.write_time,
+                       .stripe                 = u.stripe,
+               };
+
+               SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+       }
+}
+
+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+       unsigned bytes = k.k->type == KEY_TYPE_alloc_v4
+               ? bkey_bytes(k.k)
+               : sizeof(struct bkey_i_alloc_v4);
+       struct bkey_i_alloc_v4 *ret;
+
+       /*
+        * Reserve space for one more backpointer here:
+        * Not sketchy at doing it this way, nope...
+        */
+       ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer));
+       if (IS_ERR(ret))
+               return ret;
+
+       if (k.k->type == KEY_TYPE_alloc_v4) {
+               bkey_reassemble(&ret->k_i, k);
+
+               if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) {
+                       struct bch_backpointer *src, *dst;
+
+                       src = alloc_v4_backpointers(&ret->v);
+                       SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
+                       dst = alloc_v4_backpointers(&ret->v);
+
+                       memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
+                               sizeof(struct bch_backpointer));
+                       memset(src, 0, dst - src);
+                       set_alloc_v4_u64s(ret);
+               }
+       } else {
+               bkey_alloc_v4_init(&ret->k_i);
+               ret->k.p = k.k->p;
+               bch2_alloc_to_v4(k, &ret->v);
+       }
+       return ret;
 }
 
 int bch2_alloc_read(struct bch_fs *c)
@@ -1052,6 +1131,7 @@ static void bch2_do_discards_work(struct work_struct *work)
                if (ret)
                        break;
 
+               this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
                discarded++;
        }
        bch2_trans_iter_exit(&trans, &iter);
index ff366e61ace51ea4af61b916c88ca88f24d05a1e..2ac6b5046c67b66e27533d1b93f71bd5d0332a78 100644 (file)
@@ -70,6 +70,22 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_
        return pos;
 }
 
+static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
+{
+       unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+                       BCH_ALLOC_V4_U64s_V0) +
+               BCH_ALLOC_V4_NR_BACKPOINTERS(a) *
+               (sizeof(struct bch_backpointer) / sizeof(u64));
+
+       BUG_ON(ret > U8_MAX - BKEY_U64s);
+       return ret;
+}
+
+static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
+{
+       set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v));
+}
+
 struct bkey_i_alloc_v4 *
 bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
 
@@ -143,6 +159,16 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
 
 void bch2_do_invalidates(struct bch_fs *);
 
+static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
+{
+       return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+}
+
+static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a)
+{
+       return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+}
+
 int bch2_fs_freespace_init(struct bch_fs *);
 
 void bch2_recalc_capacity(struct bch_fs *);
index d52282fb6a1dd44887f7e3d76b09e6e77a6a4845..7a878a6906abfe32a9f0aa05cff3b533fc5fe4da 100644 (file)
@@ -506,7 +506,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
        int ret;
 again:
        usage = bch2_dev_usage_read(ca);
-       avail = __dev_buckets_available(ca, usage,reserve);
+       avail = dev_buckets_free(ca, usage,reserve);
 
        if (usage.d[BCH_DATA_need_discard].buckets > avail)
                bch2_do_discards(c);
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
new file mode 100644 (file)
index 0000000..f3260bb
--- /dev/null
@@ -0,0 +1,891 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "btree_cache.h"
+#include "btree_update.h"
+#include "error.h"
+
+#define MAX_EXTENT_COMPRESS_RATIO_SHIFT                10
+
+/*
+ * Convert from pos in backpointer btree to pos of corresponding bucket in alloc
+ * btree:
+ */
+static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
+                                          struct bpos bp_pos)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode);
+       u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
+
+       return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
+}
+
+/*
+ * Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
+ */
+static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
+                                          struct bpos bucket,
+                                          u64 bucket_offset)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
+
+       return POS(bucket.inode,
+                  (bucket_to_sector(ca, bucket.offset) <<
+                   MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+}
+
+void bch2_extent_ptr_to_bp(struct bch_fs *c,
+                          enum btree_id btree_id, unsigned level,
+                          struct bkey_s_c k, struct extent_ptr_decoded p,
+                          struct bpos *bucket_pos, struct bch_backpointer *bp)
+{
+       enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
+       s64 sectors = level ? btree_sectors(c) : k.k->size;
+       u32 bucket_offset;
+
+       *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
+       *bp = (struct bch_backpointer) {
+               .btree_id       = btree_id,
+               .level          = level,
+               .data_type      = data_type,
+               .bucket_offset  = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
+                       p.crc.offset,
+               .bucket_len     = ptr_disk_sectors(sectors, p),
+               .pos            = k.k->p,
+       };
+}
+
+static bool extent_matches_bp(struct bch_fs *c,
+                             enum btree_id btree_id, unsigned level,
+                             struct bkey_s_c k,
+                             struct bpos bucket,
+                             struct bch_backpointer bp)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               struct bpos bucket2;
+               struct bch_backpointer bp2;
+
+               if (p.ptr.cached)
+                       continue;
+
+               bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
+                                     &bucket2, &bp2);
+               if (!bpos_cmp(bucket, bucket2) &&
+                   !memcmp(&bp, &bp2, sizeof(bp)))
+                       return true;
+       }
+
+       return false;
+}
+
+int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                            int rw, struct printbuf *err)
+{
+       struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+       struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
+
+       if (bkey_val_bytes(bp.k) < sizeof(*bp.v)) {
+               prt_str(err, "incorrect value size");
+               return -EINVAL;
+       }
+
+       if (bpos_cmp(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
+               prt_str(err, "backpointer at wrong pos");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
+{
+       prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
+              bch2_btree_ids[bp->btree_id],
+              bp->level,
+              (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
+              (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
+              bp->bucket_len);
+       bch2_bpos_to_text(out, bp->pos);
+}
+
+void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+       bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
+}
+
+void bch2_backpointer_swab(struct bkey_s k)
+{
+       struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
+
+       bp.v->bucket_offset     = swab32(bp.v->bucket_offset);
+       bp.v->bucket_len        = swab32(bp.v->bucket_len);
+       bch2_bpos_swab(&bp.v->pos);
+}
+
+#define BACKPOINTER_OFFSET_MAX ((1ULL << 40) - 1)
+
+static inline int backpointer_cmp(struct bch_backpointer l, struct bch_backpointer r)
+{
+       return cmp_int(l.bucket_offset, r.bucket_offset);
+}
+
+static int bch2_backpointer_del_by_offset(struct btree_trans *trans,
+                                         struct bpos bucket,
+                                         u64 bp_offset,
+                                         struct bch_backpointer bp)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       if (bp_offset < BACKPOINTER_OFFSET_MAX) {
+               struct bch_backpointer *bps;
+               struct bkey_i_alloc_v4 *a;
+               unsigned i, nr;
+
+               bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+                                    bucket,
+                                    BTREE_ITER_INTENT|
+                                    BTREE_ITER_SLOTS|
+                                    BTREE_ITER_WITH_UPDATES);
+               k = bch2_btree_iter_peek_slot(&iter);
+               ret = bkey_err(k);
+               if (ret)
+                       goto err;
+
+               if (k.k->type != KEY_TYPE_alloc_v4) {
+                       ret = -ENOENT;
+                       goto err;
+               }
+
+               a = bch2_alloc_to_v4_mut(trans, k);
+               ret = PTR_ERR_OR_ZERO(a);
+               if (ret)
+                       goto err;
+               bps = alloc_v4_backpointers(&a->v);
+               nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
+
+               for (i = 0; i < nr; i++) {
+                       if (bps[i].bucket_offset == bp_offset)
+                               goto found;
+                       if (bps[i].bucket_offset > bp_offset)
+                               break;
+               }
+
+               ret = -ENOENT;
+               goto err;
+found:
+               if (memcmp(&bps[i], &bp, sizeof(bp))) {
+                       ret = -ENOENT;
+                       goto err;
+               }
+               array_remove_item(bps, nr, i);
+               SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
+               set_alloc_v4_u64s(a);
+               ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+       } else {
+               bp_offset -= BACKPOINTER_OFFSET_MAX;
+
+               bch2_trans_iter_init(trans, &iter, BTREE_ID_backpointers,
+                                    bucket_pos_to_bp(c, bucket, bp_offset),
+                                    BTREE_ITER_INTENT|
+                                    BTREE_ITER_SLOTS|
+                                    BTREE_ITER_WITH_UPDATES);
+               k = bch2_btree_iter_peek_slot(&iter);
+               ret = bkey_err(k);
+               if (ret)
+                       goto err;
+
+               if (k.k->type != KEY_TYPE_backpointer ||
+                   memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
+                       ret = -ENOENT;
+                       goto err;
+               }
+
+               ret = bch2_btree_delete_at(trans, &iter, 0);
+       }
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_bucket_backpointer_del(struct btree_trans *trans,
+                               struct bkey_i_alloc_v4 *a,
+                               struct bch_backpointer bp,
+                               struct bkey_s_c orig_k)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
+       unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
+       struct btree_iter bp_iter;
+       struct bkey_s_c k;
+       int ret;
+
+       for (i = 0; i < nr; i++) {
+               int cmp = backpointer_cmp(bps[i], bp) ?:
+                       memcmp(&bps[i], &bp, sizeof(bp));
+               if (!cmp)
+                       goto found;
+               if (cmp >= 0)
+                       break;
+       }
+
+       goto btree;
+found:
+       array_remove_item(bps, nr, i);
+       SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
+       set_alloc_v4_u64s(a);
+       return 0;
+btree:
+       bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+                            bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_SLOTS|
+                            BTREE_ITER_WITH_UPDATES);
+       k = bch2_btree_iter_peek_slot(&bp_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (k.k->type != KEY_TYPE_backpointer ||
+           memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
+               struct printbuf buf = PRINTBUF;
+
+               prt_printf(&buf, "backpointer not found when deleting");
+               prt_newline(&buf);
+               printbuf_indent_add(&buf, 2);
+
+               prt_printf(&buf, "searching for ");
+               bch2_backpointer_to_text(&buf, &bp);
+               prt_newline(&buf);
+
+               prt_printf(&buf, "got ");
+               bch2_bkey_val_to_text(&buf, c, k);
+               prt_newline(&buf);
+
+               prt_str(&buf, "alloc ");
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
+               prt_newline(&buf);
+
+               prt_printf(&buf, "for ");
+               bch2_bkey_val_to_text(&buf, c, orig_k);
+
+               if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
+                       bch_err(c, "%s", buf.buf);
+               } else {
+                       ret = -EIO;
+                       bch2_trans_inconsistent(trans, "%s", buf.buf);
+               }
+               printbuf_exit(&buf);
+               goto err;
+       }
+
+       ret = bch2_btree_delete_at(trans, &bp_iter, 0);
+err:
+       bch2_trans_iter_exit(trans, &bp_iter);
+       return ret;
+}
+
+int bch2_bucket_backpointer_add(struct btree_trans *trans,
+                               struct bkey_i_alloc_v4 *a,
+                               struct bch_backpointer bp,
+                               struct bkey_s_c orig_k)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_dev *ca;
+       struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
+       unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
+       struct bkey_i_backpointer *bp_k;
+       struct btree_iter bp_iter;
+       struct bkey_s_c k;
+       int ret;
+
+       /* Check for duplicates: */
+       for (i = 0; i < nr; i++) {
+               int cmp = backpointer_cmp(bps[i], bp);
+               if (cmp >= 0)
+                       break;
+       }
+
+       if ((i &&
+            (bps[i - 1].bucket_offset +
+             bps[i - 1].bucket_len > bp.bucket_offset)) ||
+           (i < nr &&
+            (bp.bucket_offset + bp.bucket_len > bps[i].bucket_offset))) {
+               struct printbuf buf = PRINTBUF;
+
+               prt_printf(&buf, "overlapping backpointer found when inserting ");
+               bch2_backpointer_to_text(&buf, &bp);
+               prt_newline(&buf);
+               printbuf_indent_add(&buf, 2);
+
+               prt_printf(&buf, "into ");
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
+               prt_newline(&buf);
+
+               prt_printf(&buf, "for ");
+               bch2_bkey_val_to_text(&buf, c, orig_k);
+
+               if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
+                       bch_err(c, "%s", buf.buf);
+               else {
+                       bch2_trans_inconsistent(trans, "%s", buf.buf);
+                       printbuf_exit(&buf);
+                       return -EIO;
+               }
+       }
+
+       if (nr < BCH_ALLOC_V4_NR_BACKPOINTERS_MAX) {
+               array_insert_item(bps, nr, i, bp);
+               SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
+               set_alloc_v4_u64s(a);
+               return 0;
+       }
+
+       /* Overflow: use backpointer btree */
+       bp_k = bch2_trans_kmalloc(trans, sizeof(*bp_k));
+       ret = PTR_ERR_OR_ZERO(bp_k);
+       if (ret)
+               return ret;
+
+       ca = bch_dev_bkey_exists(c, a->k.p.inode);
+
+       bkey_backpointer_init(&bp_k->k_i);
+       bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
+       bp_k->v = bp;
+
+       bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p,
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_SLOTS|
+                            BTREE_ITER_WITH_UPDATES);
+       k = bch2_btree_iter_peek_slot(&bp_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (k.k->type) {
+               struct printbuf buf = PRINTBUF;
+
+               prt_printf(&buf, "existing btree backpointer key found when inserting ");
+               bch2_backpointer_to_text(&buf, &bp);
+               prt_newline(&buf);
+               printbuf_indent_add(&buf, 2);
+
+               prt_printf(&buf, "found ");
+               bch2_bkey_val_to_text(&buf, c, k);
+               prt_newline(&buf);
+
+               prt_printf(&buf, "for ");
+               bch2_bkey_val_to_text(&buf, c, orig_k);
+
+               if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
+                       bch_err(c, "%s", buf.buf);
+               else {
+                       bch2_trans_inconsistent(trans, "%s", buf.buf);
+                       printbuf_exit(&buf);
+                       ret = -EIO;
+                       goto err;
+               }
+       }
+
+       ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
+err:
+       bch2_trans_iter_exit(trans, &bp_iter);
+       return ret;
+}
+
+/*
+ * Find the next backpointer >= *bp_offset:
+ */
+int bch2_get_next_backpointer(struct btree_trans *trans,
+                             struct bpos bucket, int gen,
+                             u64 *bp_offset,
+                             struct bch_backpointer *dst)
+{
+       struct bch_fs *c = trans->c;
+       struct bpos bp_pos =
+               bucket_pos_to_bp(c, bucket,
+                               max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
+       struct bpos bp_end_pos =
+               bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
+       struct btree_iter alloc_iter, bp_iter = { NULL };
+       struct bkey_s_c k;
+       struct bkey_s_c_alloc_v4 a;
+       size_t i;
+       int ret;
+
+       bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+                            bucket, BTREE_ITER_CACHED);
+       k = bch2_btree_iter_peek_slot(&alloc_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto out;
+
+       if (k.k->type != KEY_TYPE_alloc_v4)
+               goto done;
+
+       a = bkey_s_c_to_alloc_v4(k);
+       if (gen >= 0 && a.v->gen != gen)
+               goto done;
+
+       for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) {
+               if (alloc_v4_backpointers_c(a.v)[i].bucket_offset < *bp_offset)
+                       continue;
+
+               *dst = alloc_v4_backpointers_c(a.v)[i];
+               *bp_offset = dst->bucket_offset;
+               goto out;
+       }
+
+       for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers,
+                          bp_pos, 0, k, ret) {
+               if (bpos_cmp(k.k->p, bp_end_pos) >= 0)
+                       break;
+
+               if (k.k->type != KEY_TYPE_backpointer)
+                       continue;
+
+               *dst = *bkey_s_c_to_backpointer(k).v;
+               *bp_offset = dst->bucket_offset + BACKPOINTER_OFFSET_MAX;
+               goto out;
+       }
+done:
+       *bp_offset = U64_MAX;
+out:
+       bch2_trans_iter_exit(trans, &bp_iter);
+       bch2_trans_iter_exit(trans, &alloc_iter);
+       return ret;
+}
+
+static void backpointer_not_found(struct btree_trans *trans,
+                                 struct bpos bucket,
+                                 u64 bp_offset,
+                                 struct bch_backpointer bp,
+                                 struct bkey_s_c k,
+                                 const char *thing_it_points_to)
+{
+       struct bch_fs *c = trans->c;
+       struct printbuf buf = PRINTBUF;
+
+       prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
+                  thing_it_points_to);
+       prt_printf(&buf, "bucket: ");
+       bch2_bpos_to_text(&buf, bucket);
+       prt_printf(&buf, "\n  ");
+
+       if (bp_offset >= BACKPOINTER_OFFSET_MAX) {
+               struct bpos bp_pos =
+                       bucket_pos_to_bp(c, bucket,
+                                       bp_offset - BACKPOINTER_OFFSET_MAX);
+               prt_printf(&buf, "backpointer pos: ");
+               bch2_bpos_to_text(&buf, bp_pos);
+               prt_printf(&buf, "\n  ");
+       }
+
+       bch2_backpointer_to_text(&buf, &bp);
+       prt_printf(&buf, "\n  ");
+       bch2_bkey_val_to_text(&buf, c, k);
+       if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
+               bch_err(c, "%s", buf.buf);
+       else
+               bch2_trans_inconsistent(trans, "%s", buf.buf);
+
+       printbuf_exit(&buf);
+}
+
+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
+                                        struct btree_iter *iter,
+                                        struct bpos bucket,
+                                        u64 bp_offset,
+                                        struct bch_backpointer bp)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c k;
+
+       bch2_trans_node_iter_init(trans, iter,
+                                 bp.btree_id,
+                                 bp.pos,
+                                 0,
+                                 min(bp.level, c->btree_roots[bp.btree_id].level),
+                                 0);
+       k = bch2_btree_iter_peek_slot(iter);
+       if (bkey_err(k)) {
+               bch2_trans_iter_exit(trans, iter);
+               return k;
+       }
+
+       if (bp.level == c->btree_roots[bp.btree_id].level + 1)
+               k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
+
+       if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+               return k;
+
+       backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
+
+       bch2_trans_iter_exit(trans, iter);
+       return bkey_s_c_null;
+}
+
+struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
+                                       struct btree_iter *iter,
+                                       struct bpos bucket,
+                                       u64 bp_offset,
+                                       struct bch_backpointer bp)
+{
+       struct bch_fs *c = trans->c;
+       struct btree *b;
+       struct bkey_s_c k;
+
+       BUG_ON(!bp.level);
+
+       bch2_trans_node_iter_init(trans, iter,
+                                 bp.btree_id,
+                                 bp.pos,
+                                 0,
+                                 bp.level - 1,
+                                 0);
+       b = bch2_btree_iter_peek_node(iter);
+       if (IS_ERR(b)) {
+               bch2_trans_iter_exit(trans, iter);
+               return b;
+       }
+
+       if (extent_matches_bp(c, bp.btree_id, bp.level,
+                             bkey_i_to_s_c(&b->key),
+                             bucket, bp))
+               return b;
+
+       if (!btree_node_will_make_reachable(b))
+               backpointer_not_found(trans, bucket, bp_offset,
+                                     bp, k, "btree node");
+
+       bch2_trans_iter_exit(trans, iter);
+       return NULL;
+}
+
+static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter alloc_iter = { NULL };
+       struct bch_dev *ca;
+       struct bkey_s_c k, alloc_k;
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
+
+       k = bch2_btree_iter_peek(bp_iter);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+       if (!k.k)
+               return 0;
+
+       if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
+                       "backpointer for mising device:\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+               ret = bch2_btree_delete_at(trans, bp_iter, 0);
+               goto out;
+       }
+
+       ca = bch_dev_bkey_exists(c, k.k->p.inode);
+
+       bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+                            bp_pos_to_bucket(c, k.k->p), 0);
+
+       alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
+       ret = bkey_err(alloc_k);
+       if (ret)
+               goto out;
+
+       if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
+                       "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
+                       alloc_iter.pos.inode, alloc_iter.pos.offset,
+                       (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+               ret = bch2_btree_delete_at(trans, bp_iter, 0);
+               goto out;
+       }
+out:
+fsck_err:
+       bch2_trans_iter_exit(trans, &alloc_iter);
+       printbuf_exit(&buf);
+       return ret;
+}
+
+/* verify that every backpointer has a corresponding alloc key */
+int bch2_check_btree_backpointers(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0);
+
+       do {
+               ret = __bch2_trans_do(&trans, NULL, NULL,
+                                     BTREE_INSERT_LAZY_RW|
+                                     BTREE_INSERT_NOFAIL,
+                                     bch2_check_btree_backpointer(&trans, &iter));
+               if (ret)
+                       break;
+       } while (bch2_btree_iter_advance(&iter));
+
+       bch2_trans_iter_exit(&trans, &iter);
+       bch2_trans_exit(&trans);
+       return ret;
+}
+
+static int check_bp_exists(struct btree_trans *trans,
+                          struct bpos bucket_pos,
+                          struct bch_backpointer bp,
+                          struct bkey_s_c orig_k)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter alloc_iter, bp_iter = { NULL };
+       struct printbuf buf = PRINTBUF;
+       struct bkey_s_c alloc_k, bp_k;
+       int ret;
+
+       bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
+       alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
+       ret = bkey_err(alloc_k);
+       if (ret)
+               goto err;
+
+       if (alloc_k.k->type == KEY_TYPE_alloc_v4) {
+               struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(alloc_k);
+               const struct bch_backpointer *bps = alloc_v4_backpointers_c(a.v);
+               unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(a.v);
+
+               for (i = 0; i < nr; i++) {
+                       int cmp = backpointer_cmp(bps[i], bp) ?:
+                               memcmp(&bps[i], &bp, sizeof(bp));
+                       if (!cmp)
+                               goto out;
+                       if (cmp >= 0)
+                               break;
+               }
+       } else {
+               goto missing;
+       }
+
+       bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+                            bucket_pos_to_bp(c, bucket_pos, bp.bucket_offset),
+                            0);
+       bp_k = bch2_btree_iter_peek_slot(&bp_iter);
+       ret = bkey_err(bp_k);
+       if (ret)
+               goto err;
+
+       if (bp_k.k->type != KEY_TYPE_backpointer ||
+           memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp)))
+               goto missing;
+out:
+err:
+fsck_err:
+       bch2_trans_iter_exit(trans, &bp_iter);
+       bch2_trans_iter_exit(trans, &alloc_iter);
+       printbuf_exit(&buf);
+       return ret;
+missing:
+       prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
+              bch2_btree_ids[bp.btree_id], bp.level);
+       bch2_bkey_val_to_text(&buf, c, orig_k);
+       prt_printf(&buf, "\nin alloc key ");
+       bch2_bkey_val_to_text(&buf, c, alloc_k);
+
+       if (c->sb.version < bcachefs_metadata_version_backpointers ||
+           fsck_err(c, "%s", buf.buf)) {
+               struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k);
+
+               ret   = PTR_ERR_OR_ZERO(a) ?:
+                       bch2_bucket_backpointer_add(trans, a, bp, orig_k) ?:
+                       bch2_trans_update(trans, &alloc_iter, &a->k_i, 0);
+       }
+
+       goto out;
+}
+
+static int check_extent_to_backpointers(struct btree_trans *trans,
+                                       struct btree_iter *iter)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_ptrs_c ptrs;
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct bkey_s_c k;
+       int ret;
+
+       k = bch2_btree_iter_peek_all_levels(iter);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+       if (!k.k)
+               return 0;
+
+       ptrs = bch2_bkey_ptrs_c(k);
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               struct bpos bucket_pos;
+               struct bch_backpointer bp;
+
+               if (p.ptr.cached)
+                       continue;
+
+               bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
+                                     k, p, &bucket_pos, &bp);
+
+               ret = check_bp_exists(trans, bucket_pos, bp, k);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int check_btree_root_to_backpointers(struct btree_trans *trans,
+                                           enum btree_id btree_id)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct btree *b;
+       struct bkey_s_c k;
+       struct bkey_ptrs_c ptrs;
+       struct extent_ptr_decoded p;
+       const union bch_extent_entry *entry;
+       int ret;
+
+       bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
+                                 c->btree_roots[btree_id].level, 0);
+       b = bch2_btree_iter_peek_node(&iter);
+       ret = PTR_ERR_OR_ZERO(b);
+       if (ret)
+               goto err;
+
+       BUG_ON(b != btree_node_root(c, b));
+
+       k = bkey_i_to_s_c(&b->key);
+       ptrs = bch2_bkey_ptrs_c(k);
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               struct bpos bucket_pos;
+               struct bch_backpointer bp;
+
+               if (p.ptr.cached)
+                       continue;
+
+               bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
+                                     k, p, &bucket_pos, &bp);
+
+               ret = check_bp_exists(trans, bucket_pos, bp, k);
+               if (ret)
+                       goto err;
+       }
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_check_extents_to_backpointers(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       enum btree_id btree_id;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
+               bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0,
+                                         0,
+                                         BTREE_ITER_ALL_LEVELS|
+                                         BTREE_ITER_PREFETCH);
+
+               do {
+                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                                             BTREE_INSERT_LAZY_RW|
+                                             BTREE_INSERT_NOFAIL,
+                                             check_extent_to_backpointers(&trans, &iter));
+                       if (ret)
+                               break;
+               } while (!bch2_btree_iter_advance(&iter));
+
+               bch2_trans_iter_exit(&trans, &iter);
+
+               if (ret)
+                       break;
+
+               ret = __bch2_trans_do(&trans, NULL, NULL,
+                                     BTREE_INSERT_LAZY_RW|
+                                     BTREE_INSERT_NOFAIL,
+                                     check_btree_root_to_backpointers(&trans, btree_id));
+               if (ret)
+                       break;
+       }
+       bch2_trans_exit(&trans);
+       return ret;
+}
+
+static int check_one_backpointer(struct btree_trans *trans,
+                                struct bpos bucket,
+                                u64 *bp_offset)
+{
+       struct btree_iter iter;
+       struct bch_backpointer bp;
+       struct bkey_s_c k;
+       struct printbuf buf = PRINTBUF;
+       int ret;
+
+       ret = bch2_get_next_backpointer(trans, bucket, -1,
+                                       bp_offset, &bp);
+       if (ret || *bp_offset == U64_MAX)
+               return ret;
+
+       k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       if (fsck_err_on(!k.k, trans->c,
+                       "%s backpointer points to missing extent\n%s",
+                       *bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree",
+                       (bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
+               ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp);
+               if (ret == -ENOENT)
+                       bch_err(trans->c, "backpointer at %llu not found", *bp_offset);
+       }
+
+       bch2_trans_iter_exit(trans, &iter);
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
+int bch2_check_backpointers_to_extents(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+                          BTREE_ITER_PREFETCH, k, ret) {
+               u64 bp_offset = 0;
+
+               while (!(ret = __bch2_trans_do(&trans, NULL, NULL,
+                                              BTREE_INSERT_LAZY_RW|
+                                              BTREE_INSERT_NOFAIL,
+                               check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
+                      bp_offset < U64_MAX)
+                       bp_offset++;
+
+               if (ret)
+                       break;
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+       bch2_trans_exit(&trans);
+       return ret < 0 ? ret : 0;
+}
diff --git a/libbcachefs/backpointers.h b/libbcachefs/backpointers.h
new file mode 100644 (file)
index 0000000..fe42af2
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
+#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
+
+#include "super.h"
+
+int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k,
+                            int, struct printbuf *);
+void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
+void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+void bch2_backpointer_swab(struct bkey_s);
+
+#define bch2_bkey_ops_backpointer (struct bkey_ops) {  \
+       .key_invalid    = bch2_backpointer_invalid,     \
+       .val_to_text    = bch2_backpointer_k_to_text,   \
+       .swab           = bch2_backpointer_swab,        \
+}
+
+void bch2_extent_ptr_to_bp(struct bch_fs *, enum btree_id, unsigned,
+                          struct bkey_s_c, struct extent_ptr_decoded,
+                          struct bpos *, struct bch_backpointer *);
+
+int bch2_bucket_backpointer_del(struct btree_trans *, struct bkey_i_alloc_v4 *,
+                               struct bch_backpointer, struct bkey_s_c);
+int bch2_bucket_backpointer_add(struct btree_trans *, struct bkey_i_alloc_v4 *,
+                               struct bch_backpointer, struct bkey_s_c);
+int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int,
+                             u64 *, struct bch_backpointer *);
+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
+                                        struct bpos, u64, struct bch_backpointer);
+struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
+                                       struct bpos, u64, struct bch_backpointer);
+
+int bch2_check_btree_backpointers(struct bch_fs *);
+int bch2_check_extents_to_backpointers(struct bch_fs *);
+int bch2_check_backpointers_to_extents(struct bch_fs *);
+
+#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */
index 2eced20667fa585d7299bd7341b1467034b8f3d7..1f0484aa65016f4da59e918f9d648d87f10fce59 100644 (file)
@@ -509,6 +509,7 @@ enum {
        BCH_FS_TOPOLOGY_REPAIR_DONE,
        BCH_FS_INITIAL_GC_DONE,         /* kill when we enumerate fsck passes */
        BCH_FS_CHECK_LRUS_DONE,
+       BCH_FS_CHECK_BACKPOINTERS_DONE,
        BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
        BCH_FS_FSCK_DONE,
        BCH_FS_INITIAL_GC_UNFIXED,      /* kill when we enumerate fsck errors */
index dbe9a37fcd41b594ae40db24b9bd1b399c3fffa3..147fde1417b05fc031802973f54b6b3440eb9a27 100644 (file)
@@ -365,7 +365,8 @@ static inline void bkey_init(struct bkey *k)
        x(alloc_v3,             24)                     \
        x(set,                  25)                     \
        x(lru,                  26)                     \
-       x(alloc_v4,             27)
+       x(alloc_v4,             27)                     \
+       x(backpointer,          28)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -886,6 +887,12 @@ struct bch_alloc {
        x(stripe,               32)             \
        x(stripe_redundancy,    8)
 
+enum {
+#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
+       BCH_ALLOC_FIELDS_V1()
+#undef x
+};
+
 struct bch_alloc_v2 {
        struct bch_val          v;
        __u8                    nr_fields;
@@ -914,6 +921,9 @@ struct bch_alloc_v3 {
        __u8                    data[];
 } __attribute__((packed, aligned(8)));
 
+LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags,  0,  1)
+LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags,  1,  2)
+
 struct bch_alloc_v4 {
        struct bch_val          v;
        __u64                   journal_seq;
@@ -927,22 +937,27 @@ struct bch_alloc_v4 {
        __u64                   io_time[2];
        __u32                   stripe;
        __u32                   nr_external_backpointers;
-       struct bpos             backpointers[0];
 } __attribute__((packed, aligned(8)));
 
-LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags,  0,  1)
-LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags,  1,  2)
+#define BCH_ALLOC_V4_U64s_V0   6
+#define BCH_ALLOC_V4_U64s      (sizeof(struct bch_alloc_v4) / sizeof(u64))
 
 BITMASK(BCH_ALLOC_V4_NEED_DISCARD,     struct bch_alloc_v4, flags,  0,  1)
 BITMASK(BCH_ALLOC_V4_NEED_INC_GEN,     struct bch_alloc_v4, flags,  1,  2)
 BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags,  2,  8)
 BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS,  struct bch_alloc_v4, flags,  8,  14)
 
-enum {
-#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
-       BCH_ALLOC_FIELDS_V1()
-#undef x
-};
+#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX       40
+
+struct bch_backpointer {
+       struct bch_val          v;
+       __u8                    btree_id;
+       __u8                    level;
+       __u8                    data_type;
+       __u64                   bucket_offset:40;
+       __u32                   bucket_len;
+       struct bpos             pos;
+} __attribute__((packed, aligned(8)));
 
 /* Quotas: */
 
@@ -1326,7 +1341,8 @@ struct bch_sb_field_disk_groups {
        x(io_read,              0)                      \
        x(io_write,             1)                      \
        x(io_move,              2)                      \
-       x(bucket_invalidate,    3)
+       x(bucket_invalidate,    3)                      \
+       x(bucket_discard,       4)
 
 enum bch_persistent_counters {
 #define x(t, n, ...) BCH_COUNTER_##t,
@@ -1407,7 +1423,8 @@ struct bch_sb_field_journal_seq_blacklist {
        x(inode_v2,                     18)             \
        x(freespace,                    19)             \
        x(alloc_v4,                     20)             \
-       x(new_data_types,               21)
+       x(new_data_types,               21)             \
+       x(backpointers,                 22)
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
index 229d51578086f4c16feb9b430d97f9df4f48c085..fd352a672d624a73dd7e1a6e4f2f1c889d49db0f 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "backpointers.h"
 #include "bkey_methods.h"
 #include "btree_types.h"
 #include "alloc_background.h"
@@ -191,6 +192,9 @@ static unsigned bch2_key_types_allowed[] = {
        [BKEY_TYPE_need_discard] =
                (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_set),
+       [BKEY_TYPE_backpointers] =
+               (1U << KEY_TYPE_deleted)|
+               (1U << KEY_TYPE_backpointer),
        [BKEY_TYPE_btree] =
                (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_btree_ptr)|
index d8f92cc96c4f8b75a79eb4188007d42b5186560e..5382f2b85e194aeab13e361f45061c4cc95e40da 100644 (file)
@@ -376,7 +376,7 @@ struct btree_trans_commit_hook {
        struct btree_trans_commit_hook  *next;
 };
 
-#define BTREE_TRANS_MEM_MAX    (1U << 14)
+#define BTREE_TRANS_MEM_MAX    (1U << 16)
 
 struct btree_trans {
        struct bch_fs           *c;
@@ -638,6 +638,11 @@ static inline bool btree_type_has_snapshots(enum btree_id id)
        return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
 }
 
+static inline bool btree_type_has_ptrs(enum btree_id id)
+{
+       return (1 << id) & BTREE_ID_HAS_PTRS;
+}
+
 static inline bool btree_node_type_needs_gc(enum btree_node_type type)
 {
        return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
index e2944fc4cfe2760975cf370482e05f5f3f5ee722..1ea7e2baf32323a2cc6b4726869c3bedd841737b 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "bcachefs.h"
 #include "alloc_background.h"
+#include "backpointers.h"
 #include "bset.h"
 #include "btree_gc.h"
 #include "btree_update.h"
@@ -655,16 +656,6 @@ err:
        return ret;
 }
 
-static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
-{
-       EBUG_ON(sectors < 0);
-
-       return crc_is_compressed(p.crc)
-               ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
-                                  p.crc.uncompressed_size)
-               : sectors;
-}
-
 static int check_bucket_ref(struct bch_fs *c,
                            struct bkey_s_c k,
                            const struct bch_extent_ptr *ptr,
@@ -1368,21 +1359,43 @@ need_mark:
 /* trans_mark: */
 
 static int bch2_trans_mark_pointer(struct btree_trans *trans,
-                       struct bkey_s_c k, struct extent_ptr_decoded p,
-                       s64 sectors, enum bch_data_type data_type)
+                                  enum btree_id btree_id, unsigned level,
+                                  struct bkey_s_c k, struct extent_ptr_decoded p,
+                                  unsigned flags)
 {
+       bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
        struct btree_iter iter;
        struct bkey_i_alloc_v4 *a;
+       struct bpos bucket_pos;
+       struct bch_backpointer bp;
+       s64 sectors;
        int ret;
 
-       a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
+       bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp);
+       sectors = bp.bucket_len;
+       if (!insert)
+               sectors = -sectors;
+
+       a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos);
        if (IS_ERR(a))
                return PTR_ERR(a);
 
-       ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
+       ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
                             a->v.gen, &a->v.data_type,
-                            &a->v.dirty_sectors, &a->v.cached_sectors) ?:
-               bch2_trans_update(trans, &iter, &a->k_i, 0);
+                            &a->v.dirty_sectors, &a->v.cached_sectors);
+       if (ret)
+               goto err;
+
+       if (!p.ptr.cached) {
+               ret = insert
+                       ? bch2_bucket_backpointer_add(trans, a, bp, k)
+                       : bch2_bucket_backpointer_del(trans, a, bp, k);
+               if (ret)
+                       goto err;
+       }
+
+       ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+err:
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
@@ -1476,8 +1489,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans,
                if (flags & BTREE_TRIGGER_OVERWRITE)
                        disk_sectors = -disk_sectors;
 
-               ret = bch2_trans_mark_pointer(trans, k, p,
-                                       disk_sectors, data_type);
+               ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
                if (ret < 0)
                        return ret;
 
index 3469327d6c9d73724df8b6a8c85abc3a363a1e42..670b95b824429d07fb4d2c6281740b711c96ba69 100644 (file)
@@ -75,6 +75,15 @@ static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c,
        return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
 }
 
+static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c,
+                                               const struct bch_extent_ptr *ptr,
+                                               u32 *bucket_offset)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+       return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset));
+}
+
 static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca,
                                           const struct bch_extent_ptr *ptr)
 {
@@ -90,6 +99,16 @@ static inline enum bch_data_type ptr_data_type(const struct bkey *k,
        return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
 }
 
+static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
+{
+       EBUG_ON(sectors < 0);
+
+       return crc_is_compressed(p.crc)
+               ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
+                                  p.crc.uncompressed_size)
+               : sectors;
+}
+
 static inline int gen_cmp(u8 a, u8 b)
 {
        return (s8) (a - b);
@@ -144,12 +163,25 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reser
        return reserved;
 }
 
+static inline u64 dev_buckets_free(struct bch_dev *ca,
+                                  struct bch_dev_usage usage,
+                                  enum alloc_reserve reserve)
+{
+       return max_t(s64, 0,
+                    usage.d[BCH_DATA_free].buckets -
+                    ca->nr_open_buckets -
+                    bch2_dev_buckets_reserved(ca, reserve));
+}
+
 static inline u64 __dev_buckets_available(struct bch_dev *ca,
                                          struct bch_dev_usage usage,
                                          enum alloc_reserve reserve)
 {
        return max_t(s64, 0,
                     usage.d[BCH_DATA_free].buckets -
+                    usage.d[BCH_DATA_cached].buckets -
+                    usage.d[BCH_DATA_need_gc_gens].buckets -
+                    usage.d[BCH_DATA_need_discard].buckets -
                     ca->nr_open_buckets -
                     bch2_dev_buckets_reserved(ca, reserve));
 }
index 0a9dd5af352435a7bb54d4726ed9ea57cb5d38a0..1dbba7d906dd883a12db432766a02fb527e56207 100644 (file)
@@ -95,7 +95,7 @@ struct copygc_heap_entry {
        u8                      replicas;
        u32                     fragmentation;
        u32                     sectors;
-       u64                     offset;
+       u64                     bucket;
 };
 
 typedef HEAP(struct copygc_heap_entry) copygc_heap;
index f1abec95a740cb03e0b9facd37512677206d4e23..81bfd6ea273eb76988e83ba5af3b32e910f88190 100644 (file)
@@ -1015,47 +1015,6 @@ static int check_subvols(struct bch_fs *c)
        return ret;
 }
 
-/*
- * Checking for overlapping extents needs to be reimplemented
- */
-#if 0
-static int fix_overlapping_extent(struct btree_trans *trans,
-                                      struct bkey_s_c k, struct bpos cut_at)
-{
-       struct btree_iter iter;
-       struct bkey_i *u;
-       int ret;
-
-       u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-       ret = PTR_ERR_OR_ZERO(u);
-       if (ret)
-               return ret;
-
-       bkey_reassemble(u, k);
-       bch2_cut_front(cut_at, u);
-
-
-       /*
-        * We don't want to go through the extent_handle_overwrites path:
-        *
-        * XXX: this is going to screw up disk accounting, extent triggers
-        * assume things about extent overwrites - we should be running the
-        * triggers manually here
-        */
-       bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
-                            BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
-
-       BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
-       ret   = bch2_btree_iter_traverse(&iter) ?:
-               bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
-               bch2_trans_commit(trans, NULL, NULL,
-                                 BTREE_INSERT_NOFAIL|
-                                 BTREE_INSERT_LAZY_RW);
-       bch2_trans_iter_exit(trans, &iter);
-       return ret;
-}
-#endif
-
 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
                                                struct btree_iter *iter,
                                                struct bpos pos)
@@ -1150,9 +1109,72 @@ fsck_err:
        return ret ?: ret2;
 }
 
+struct extent_end {
+       u32             snapshot;
+       u64             offset;
+};
+
+typedef DARRAY(struct extent_end) extent_ends;
+
+static int extent_ends_at(extent_ends *extent_ends,
+                         struct bkey_s_c k)
+{
+       struct extent_end *i, n = (struct extent_end) {
+               .snapshot       = k.k->p.snapshot,
+               .offset         = k.k->p.offset,
+       };
+
+       darray_for_each(*extent_ends, i) {
+               if (i->snapshot == k.k->p.snapshot) {
+                       *i = n;
+                       return 0;
+               }
+
+               if (i->snapshot >= k.k->p.snapshot)
+                       break;
+       }
+
+       return darray_insert_item(extent_ends, i - extent_ends->data, n);
+}
+
+static int check_extent_start(struct btree_trans *trans,
+                             struct snapshots_seen *s,
+                             extent_ends *extent_ends,
+                             struct bkey_s_c k,
+                             struct btree_iter *iter)
+{
+       struct bch_fs *c = trans->c;
+       struct extent_end *i;
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
+
+       darray_for_each(*extent_ends, i) {
+               if (fsck_err_on(i->offset > bkey_start_offset(k.k) &&
+                               key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot), c,
+                               "overlapping extents: extent in snapshot %u ends at %llu overlaps with\n%s",
+                               i->snapshot,
+                               i->offset,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+                       struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+                       bkey_reassemble(update, k);
+                       ret = bch2_trans_update_extent(trans, iter, update, 0);
+                       if (!ret)
+                               goto err;
+               }
+       }
+err:
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        struct inode_walker *inode,
-                       struct snapshots_seen *s)
+                       struct snapshots_seen *s,
+                       extent_ends *extent_ends)
 {
        struct bch_fs *c = trans->c;
        struct bkey_s_c k;
@@ -1182,6 +1204,8 @@ peek:
                goto out;
 
        if (inode->cur_inum != k.k->p.inode) {
+               extent_ends->nr = 0;
+
                ret = check_i_sectors(trans, inode);
                if (ret)
                        goto err;
@@ -1195,20 +1219,7 @@ peek:
                 */
                goto peek;
        }
-#if 0
-       if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
-               char buf1[200];
-               char buf2[200];
-
-               bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
-               bch2_bkey_val_to_text(&PBUF(buf2), c, k);
 
-               if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
-                       ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR;
-                       goto out;
-               }
-       }
-#endif
        ret = __walk_inode(trans, inode, k.k->p);
        if (ret < 0)
                goto err;
@@ -1259,13 +1270,17 @@ peek:
                }
        }
 
+       ret = check_extent_start(trans, s, extent_ends, k, iter);
+       if (ret)
+               goto err;
+
        if (bkey_extent_is_allocation(k.k))
                for_each_visible_inode(c, s, inode, k.k->p.snapshot, i)
                        i->count += k.k->size;
-#if 0
-       bch2_bkey_buf_reassemble(&prev, c, k);
-#endif
 
+       ret = extent_ends_at(extent_ends, k);
+       if (ret)
+               goto err;
 out:
 err:
 fsck_err:
@@ -1287,13 +1302,9 @@ static int check_extents(struct bch_fs *c)
        struct snapshots_seen s;
        struct btree_trans trans;
        struct btree_iter iter;
+       extent_ends extent_ends = { 0 };
        int ret = 0;
 
-#if 0
-       struct bkey_buf prev;
-       bch2_bkey_buf_init(&prev);
-       prev.k->k = KEY(0, 0, 0);
-#endif
        snapshots_seen_init(&s);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
@@ -1309,14 +1320,12 @@ static int check_extents(struct bch_fs *c)
                ret = __bch2_trans_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW|
                                      BTREE_INSERT_NOFAIL,
-                       check_extent(&trans, &iter, &w, &s));
+                       check_extent(&trans, &iter, &w, &s, &extent_ends));
                if (ret)
                        break;
        } while (bch2_btree_iter_advance(&iter));
        bch2_trans_iter_exit(&trans, &iter);
-#if 0
-       bch2_bkey_buf_exit(&prev, c);
-#endif
+       darray_exit(&extent_ends);
        inode_walker_exit(&w);
        bch2_trans_exit(&trans);
        snapshots_seen_exit(&s);
index a8f6d5a3e1baf7ac3931413eb28ea92aa1cb64fa..36d20dc8aaf30b7dc99f56517fb0bb106e4c499e 100644 (file)
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "alloc_foreground.h"
+#include "backpointers.h"
 #include "bkey_buf.h"
 #include "btree_gc.h"
 #include "btree_update.h"
@@ -9,6 +11,7 @@
 #include "buckets.h"
 #include "disk_groups.h"
 #include "ec.h"
+#include "error.h"
 #include "inode.h"
 #include "io.h"
 #include "journal_reclaim.h"
@@ -632,6 +635,70 @@ err:
        return ret;
 }
 
+static int move_ratelimit(struct btree_trans *trans,
+                         struct moving_context *ctxt,
+                         struct bch_ratelimit *rate)
+{
+       u64 delay;
+
+       do {
+               delay = rate ? bch2_ratelimit_delay(rate) : 0;
+
+               if (delay) {
+                       bch2_trans_unlock(trans);
+                       set_current_state(TASK_INTERRUPTIBLE);
+               }
+
+               if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
+                       __set_current_state(TASK_RUNNING);
+                       return 1;
+               }
+
+               if (delay)
+                       schedule_timeout(delay);
+
+               if (unlikely(freezing(current))) {
+                       move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads));
+                       try_to_freeze();
+               }
+       } while (delay);
+
+       move_ctxt_wait_event(ctxt, trans,
+               atomic_read(&ctxt->write_sectors) <
+               SECTORS_IN_FLIGHT_PER_DEVICE);
+
+       move_ctxt_wait_event(ctxt, trans,
+               atomic_read(&ctxt->read_sectors) <
+               SECTORS_IN_FLIGHT_PER_DEVICE);
+
+       return 0;
+}
+
+static int move_get_io_opts(struct btree_trans *trans,
+                           struct bch_io_opts *io_opts,
+                           struct bkey_s_c k, u64 *cur_inum)
+{
+       struct bch_inode_unpacked inode;
+       int ret;
+
+       if (*cur_inum == k.k->p.inode)
+               return 0;
+
+       *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
+
+       ret = lookup_inode(trans,
+                          SPOS(0, k.k->p.inode, k.k->p.snapshot),
+                          &inode);
+       if (ret == -EINTR)
+               return ret;
+
+       if (!ret)
+               bch2_io_opts_apply(io_opts, bch2_inode_opts_get(&inode));
+
+       *cur_inum = k.k->p.inode;
+       return 0;
+}
+
 static int __bch2_move_data(struct bch_fs *c,
                struct moving_context *ctxt,
                struct bch_ratelimit *rate,
@@ -642,7 +709,6 @@ static int __bch2_move_data(struct bch_fs *c,
                struct bch_move_stats *stats,
                enum btree_id btree_id)
 {
-       bool kthread = (current->flags & PF_KTHREAD) != 0;
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        struct bkey_buf sk;
        struct btree_trans trans;
@@ -650,7 +716,7 @@ static int __bch2_move_data(struct bch_fs *c,
        struct bkey_s_c k;
        struct data_opts data_opts;
        enum data_cmd data_cmd;
-       u64 delay, cur_inum = U64_MAX;
+       u64 cur_inum = U64_MAX;
        int ret = 0, ret2;
 
        bch2_bkey_buf_init(&sk);
@@ -667,37 +733,7 @@ static int __bch2_move_data(struct bch_fs *c,
        if (rate)
                bch2_ratelimit_reset(rate);
 
-       while (1) {
-               do {
-                       delay = rate ? bch2_ratelimit_delay(rate) : 0;
-
-                       if (delay) {
-                               bch2_trans_unlock(&trans);
-                               set_current_state(TASK_INTERRUPTIBLE);
-                       }
-
-                       if (kthread && (ret = kthread_should_stop())) {
-                               __set_current_state(TASK_RUNNING);
-                               goto out;
-                       }
-
-                       if (delay)
-                               schedule_timeout(delay);
-
-                       if (unlikely(freezing(current))) {
-                               move_ctxt_wait_event(ctxt, &trans, list_empty(&ctxt->reads));
-                               try_to_freeze();
-                       }
-               } while (delay);
-
-               move_ctxt_wait_event(ctxt, &trans,
-                       atomic_read(&ctxt->write_sectors) <
-                       SECTORS_IN_FLIGHT_PER_DEVICE);
-
-               move_ctxt_wait_event(ctxt, &trans,
-                       atomic_read(&ctxt->read_sectors) <
-                       SECTORS_IN_FLIGHT_PER_DEVICE);
-
+       while (!move_ratelimit(&trans, ctxt, rate)) {
                bch2_trans_begin(&trans);
 
                k = bch2_btree_iter_peek(&iter);
@@ -718,23 +754,9 @@ static int __bch2_move_data(struct bch_fs *c,
                if (!bkey_extent_is_direct_data(k.k))
                        goto next_nondata;
 
-               if (btree_id == BTREE_ID_extents &&
-                   cur_inum != k.k->p.inode) {
-                       struct bch_inode_unpacked inode;
-
-                       io_opts = bch2_opts_to_inode_opts(c->opts);
-
-                       ret = lookup_inode(&trans,
-                                       SPOS(0, k.k->p.inode, k.k->p.snapshot),
-                                       &inode);
-                       if (ret == -EINTR)
-                               continue;
-
-                       if (!ret)
-                               bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
-
-                       cur_inum = k.k->p.inode;
-               }
+               ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
+               if (ret)
+                       continue;
 
                switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
                case DATA_SKIP:
@@ -779,7 +801,6 @@ next:
 next_nondata:
                bch2_btree_iter_advance(&iter);
        }
-out:
 
        bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
@@ -848,7 +869,6 @@ int bch2_move_data(struct bch_fs *c,
                        break;
        }
 
-
        move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
        closure_sync(&ctxt.cl);
 
@@ -862,6 +882,167 @@ int bch2_move_data(struct bch_fs *c,
        return ret;
 }
 
+static int verify_bucket_evacuated(struct btree_trans *trans, struct bpos bucket, int gen)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+                            bucket, BTREE_ITER_CACHED);
+       k = bch2_btree_iter_peek_slot(&iter);
+       ret = bkey_err(k);
+
+       if (!ret && k.k->type == KEY_TYPE_alloc_v4) {
+               struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
+
+               if (a.v->gen == gen &&
+                   a.v->dirty_sectors) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "failed to evacuate bucket ");
+                       bch2_bkey_val_to_text(&buf, c, k);
+
+                       bch_err_ratelimited(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+               }
+       }
+
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_evacuate_bucket(struct bch_fs *c,
+                        struct bpos bucket, int gen,
+                        struct bch_ratelimit *rate,
+                        struct write_point_specifier wp,
+                        enum data_cmd data_cmd,
+                        struct data_opts *data_opts,
+                        struct bch_move_stats *stats)
+{
+       struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+       struct moving_context ctxt = { .stats = stats };
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_buf sk;
+       struct bch_backpointer bp;
+       u64 bp_offset = 0, cur_inum = U64_MAX;
+       int ret = 0;
+
+       bch2_bkey_buf_init(&sk);
+       bch2_trans_init(&trans, c, 0, 0);
+       progress_list_add(c, stats);
+       closure_init_stack(&ctxt.cl);
+       INIT_LIST_HEAD(&ctxt.reads);
+       init_waitqueue_head(&ctxt.wait);
+
+       stats->data_type = BCH_DATA_user;
+
+       while (!(ret = move_ratelimit(&trans, &ctxt, rate))) {
+               bch2_trans_begin(&trans);
+
+               ret = bch2_get_next_backpointer(&trans, bucket, gen,
+                                               &bp_offset, &bp);
+               if (ret == -EINTR)
+                       continue;
+               if (ret)
+                       goto err;
+               if (bp_offset == U64_MAX)
+                       break;
+
+               if (!bp.level) {
+                       struct bkey_s_c k;
+
+                       k = bch2_backpointer_get_key(&trans, &iter,
+                                               bucket, bp_offset, bp);
+                       ret = bkey_err(k);
+                       if (ret == -EINTR)
+                               continue;
+                       if (ret)
+                               goto err;
+                       if (!k.k)
+                               continue;
+
+                       bch2_bkey_buf_reassemble(&sk, c, k);
+                       k = bkey_i_to_s_c(sk.k);
+                       bch2_trans_iter_exit(&trans, &iter);
+
+                       ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
+                       if (ret)
+                               continue;
+
+                       data_opts->target       = io_opts.background_target;
+                       data_opts->rewrite_dev  = bucket.inode;
+
+                       ret = bch2_move_extent(&trans, &ctxt, wp, io_opts, bp.btree_id, k,
+                                               data_cmd, *data_opts);
+                       if (ret == -EINTR)
+                               continue;
+                       if (ret == -ENOMEM) {
+                               /* memory allocation failure, wait for some IO to finish */
+                               bch2_move_ctxt_wait_for_io(&ctxt, &trans);
+                               continue;
+                       }
+                       if (ret)
+                               goto err;
+
+                       if (rate)
+                               bch2_ratelimit_increment(rate, k.k->size);
+                       atomic64_add(k.k->size, &stats->sectors_seen);
+               } else {
+                       struct btree *b;
+
+                       b = bch2_backpointer_get_node(&trans, &iter,
+                                               bucket, bp_offset, bp);
+                       ret = PTR_ERR_OR_ZERO(b);
+                       if (ret == -EINTR)
+                               continue;
+                       if (ret)
+                               goto err;
+                       if (!b)
+                               continue;
+
+                       ret = bch2_btree_node_rewrite(&trans, &iter, b, 0);
+                       bch2_trans_iter_exit(&trans, &iter);
+
+                       if (ret == -EINTR)
+                               continue;
+                       if (ret)
+                               goto err;
+
+                       if (rate)
+                               bch2_ratelimit_increment(rate, c->opts.btree_node_size >> 9);
+                       atomic64_add(c->opts.btree_node_size >> 9, &stats->sectors_seen);
+                       atomic64_add(c->opts.btree_node_size >> 9, &stats->sectors_moved);
+               }
+
+               bp_offset++;
+       }
+
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
+               bch2_trans_unlock(&trans);
+               move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
+               closure_sync(&ctxt.cl);
+               lockrestart_do(&trans, verify_bucket_evacuated(&trans, bucket, gen));
+       }
+err:
+       bch2_trans_exit(&trans);
+       bch2_bkey_buf_exit(&sk, c);
+
+       move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
+       closure_sync(&ctxt.cl);
+       progress_list_del(c, stats);
+
+       EBUG_ON(atomic_read(&ctxt.write_sectors));
+
+       trace_move_data(c,
+                       atomic64_read(&stats->sectors_moved),
+                       atomic64_read(&stats->keys_moved));
+
+       return ret;
+}
+
 typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
                                         struct btree *, struct bch_io_opts *,
                                         struct data_opts *);
index 2a789a1158ca22e4e7efb1cd4298b4115da3216d..c69b6b5abe9e4e6d03595cbbf1d7e30531f3fc1f 100644 (file)
@@ -62,6 +62,12 @@ int bch2_move_data(struct bch_fs *,
                   move_pred_fn, void *,
                   struct bch_move_stats *);
 
+int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
+                        struct bch_ratelimit *,
+                        struct write_point_specifier,
+                        enum data_cmd,
+                        struct data_opts *,
+                        struct bch_move_stats *);
 int bch2_data_job(struct bch_fs *,
                  struct bch_move_stats *,
                  struct bch_ioctl_data);
index 99980c3d5d557c4147174f38190c536a4365f4e4..efb09e1c36520bea55f187024dac7dcced292c33 100644 (file)
 #include <linux/sort.h>
 #include <linux/wait.h>
 
-static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
-{
-       const struct copygc_heap_entry *l = _l;
-       const struct copygc_heap_entry *r = _r;
-
-       return  cmp_int(l->dev,    r->dev) ?:
-               cmp_int(l->offset, r->offset);
-}
-
-static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
-                                struct bkey_s_c k,
-                                struct bch_io_opts *io_opts,
-                                struct data_opts *data_opts)
-{
-       copygc_heap *h = &c->copygc_heap;
-       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-       const union bch_extent_entry *entry;
-       struct extent_ptr_decoded p = { 0 };
-
-       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-               struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-               struct copygc_heap_entry search = {
-                       .dev    = p.ptr.dev,
-                       .offset = p.ptr.offset,
-               };
-               ssize_t i;
-
-               if (p.ptr.cached)
-                       continue;
-
-               i = eytzinger0_find_le(h->data, h->used,
-                                      sizeof(h->data[0]),
-                                      bucket_offset_cmp, &search);
-#if 0
-               /* eytzinger search verify code: */
-               ssize_t j = -1, k;
-
-               for (k = 0; k < h->used; k++)
-                       if (h->data[k].offset <= ptr->offset &&
-                           (j < 0 || h->data[k].offset > h->data[j].offset))
-                               j = k;
-
-               BUG_ON(i != j);
-#endif
-               if (i >= 0 &&
-                   p.ptr.dev == h->data[i].dev &&
-                   p.ptr.offset < h->data[i].offset + ca->mi.bucket_size &&
-                   p.ptr.gen == h->data[i].gen) {
-                       /*
-                        * We need to use the journal reserve here, because
-                        *  - journal reclaim depends on btree key cache
-                        *    flushing to make forward progress,
-                        *  - which has to make forward progress when the
-                        *    journal is pre-reservation full,
-                        *  - and depends on allocation - meaning allocator and
-                        *    copygc
-                        */
-
-                       data_opts->target               = io_opts->background_target;
-                       data_opts->nr_replicas          = 1;
-                       data_opts->btree_insert_flags   = BTREE_INSERT_USE_RESERVE|
-                               JOURNAL_WATERMARK_copygc;
-                       data_opts->rewrite_dev          = p.ptr.dev;
-
-                       if (p.has_ec)
-                               data_opts->nr_replicas += p.ec.redundancy;
-
-                       return DATA_REWRITE;
-               }
-       }
-
-       return DATA_SKIP;
-}
-
 static inline int fragmentation_cmp(copygc_heap *heap,
                                   struct copygc_heap_entry l,
                                   struct copygc_heap_entry r)
@@ -111,7 +37,7 @@ static inline int fragmentation_cmp(copygc_heap *heap,
        return cmp_int(l.fragmentation, r.fragmentation);
 }
 
-static int walk_buckets_to_copygc(struct bch_fs *c)
+static int find_buckets_to_copygc(struct bch_fs *c)
 {
        copygc_heap *h = &c->copygc_heap;
        struct btree_trans trans;
@@ -122,6 +48,14 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, 0, 0);
 
+       /*
+        * Find buckets with lowest sector counts, skipping completely
+        * empty buckets, by building a maxheap sorted by sector count,
+        * and repeatedly replacing the maximum element until all
+        * buckets have been visited.
+        */
+       h->used = 0;
+
        for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
                struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode);
@@ -129,7 +63,8 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
 
                bch2_alloc_to_v4(k, &a);
 
-               if (a.data_type != BCH_DATA_user ||
+               if ((a.data_type != BCH_DATA_btree &&
+                    a.data_type != BCH_DATA_user) ||
                    a.dirty_sectors >= ca->mi.bucket_size ||
                    bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset))
                        continue;
@@ -141,7 +76,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
                        .fragmentation  = div_u64((u64) a.dirty_sectors * (1ULL << 31),
                                                  ca->mi.bucket_size),
                        .sectors        = a.dirty_sectors,
-                       .offset         = bucket_to_sector(ca, iter.pos.offset),
+                       .bucket         = iter.pos.offset,
                };
                heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
 
@@ -152,77 +87,22 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
        return ret;
 }
 
-static int bucket_inorder_cmp(const void *_l, const void *_r)
-{
-       const struct copygc_heap_entry *l = _l;
-       const struct copygc_heap_entry *r = _r;
-
-       return cmp_int(l->dev, r->dev) ?: cmp_int(l->offset, r->offset);
-}
-
-static int check_copygc_was_done(struct bch_fs *c,
-                                u64 *sectors_not_moved,
-                                u64 *buckets_not_moved)
-{
-       copygc_heap *h = &c->copygc_heap;
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct bch_alloc_v4 a;
-       struct copygc_heap_entry *i;
-       int ret = 0;
-
-       sort(h->data, h->used, sizeof(h->data[0]), bucket_inorder_cmp, NULL);
-
-       bch2_trans_init(&trans, c, 0, 0);
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN, 0);
-
-       for (i = h->data; i < h->data + h->used; i++) {
-               struct bch_dev *ca = bch_dev_bkey_exists(c, i->dev);
-
-               bch2_btree_iter_set_pos(&iter, POS(i->dev, sector_to_bucket(ca, i->offset)));
-
-               ret = lockrestart_do(&trans,
-                               bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
-               if (ret)
-                       break;
-
-               bch2_alloc_to_v4(k, &a);
-
-               if (a.gen == i->gen && a.dirty_sectors) {
-                       *sectors_not_moved += a.dirty_sectors;
-                       *buckets_not_moved += 1;
-               }
-       }
-       bch2_trans_iter_exit(&trans, &iter);
-
-       bch2_trans_exit(&trans);
-       return ret;
-}
-
 static int bch2_copygc(struct bch_fs *c)
 {
        copygc_heap *h = &c->copygc_heap;
-       struct copygc_heap_entry e, *i;
+       struct copygc_heap_entry e;
        struct bch_move_stats move_stats;
-       u64 sectors_to_move = 0, sectors_to_write = 0, sectors_not_moved = 0;
-       u64 sectors_reserved = 0;
-       u64 buckets_to_move, buckets_not_moved = 0;
        struct bch_dev *ca;
        unsigned dev_idx;
        size_t heap_size = 0;
+       struct data_opts data_opts = {
+               .nr_replicas            = 1,
+               .btree_insert_flags     = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
+       };
        int ret;
 
        bch_move_stats_init(&move_stats, "copygc");
 
-       /*
-        * Find buckets with lowest sector counts, skipping completely
-        * empty buckets, by building a maxheap sorted by sector count,
-        * and repeatedly replacing the maximum element until all
-        * buckets have been visited.
-        */
-       h->used = 0;
-
        for_each_rw_member(ca, c, dev_idx)
                heap_size += ca->mi.nbuckets >> 7;
 
@@ -234,21 +114,7 @@ static int bch2_copygc(struct bch_fs *c)
                }
        }
 
-       for_each_rw_member(ca, c, dev_idx) {
-               struct bch_dev_usage usage = bch2_dev_usage_read(ca);
-
-               u64 avail = max_t(s64, 0,
-                                 usage.d[BCH_DATA_free].buckets +
-                                 usage.d[BCH_DATA_need_discard].buckets -
-                                 ca->nr_open_buckets -
-                                 bch2_dev_buckets_reserved(ca, RESERVE_movinggc));
-
-               avail = min(avail, ca->mi.nbuckets >> 6);
-
-               sectors_reserved += avail * ca->mi.bucket_size;
-       }
-
-       ret = walk_buckets_to_copygc(c);
+       ret = find_buckets_to_copygc(c);
        if (ret) {
                bch2_fs_fatal_error(c, "error walking buckets to copygc!");
                return ret;
@@ -259,68 +125,24 @@ static int bch2_copygc(struct bch_fs *c)
                return 0;
        }
 
-       /*
-        * Our btree node allocations also come out of RESERVE_movingc:
-        */
-       sectors_reserved = (sectors_reserved * 3) / 4;
-       if (!sectors_reserved) {
-               bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!");
-               return -1;
-       }
+       heap_resort(h, fragmentation_cmp, NULL);
 
-       for (i = h->data; i < h->data + h->used; i++) {
-               sectors_to_move += i->sectors;
-               sectors_to_write += i->sectors * i->replicas;
-       }
-
-       while (sectors_to_write > sectors_reserved) {
+       while (h->used) {
                BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL));
-               sectors_to_write -= e.sectors * e.replicas;
-       }
-
-       buckets_to_move = h->used;
-
-       if (!buckets_to_move) {
-               bch_err_ratelimited(c, "copygc cannot run - sectors_reserved %llu!",
-                                   sectors_reserved);
-               return 0;
-       }
-
-       eytzinger0_sort(h->data, h->used,
-                       sizeof(h->data[0]),
-                       bucket_offset_cmp, NULL);
-
-       ret = bch2_move_data(c,
-                            0,                 POS_MIN,
-                            BTREE_ID_NR,       POS_MAX,
-                            NULL,
-                            writepoint_ptr(&c->copygc_write_point),
-                            copygc_pred, NULL,
-                            &move_stats);
-       if (ret < 0)
-               bch_err(c, "error %i from bch2_move_data() in copygc", ret);
-       if (ret)
-               return ret;
-
-       ret = check_copygc_was_done(c, &sectors_not_moved, &buckets_not_moved);
-       if (ret) {
-               bch_err(c, "error %i from check_copygc_was_done()", ret);
-               return ret;
+               /* not correct w.r.t. device removal */
+
+               ret = bch2_evacuate_bucket(c, POS(e.dev, e.bucket), e.gen, NULL,
+                                          writepoint_ptr(&c->copygc_write_point),
+                                          DATA_REWRITE, &data_opts,
+                                          &move_stats);
+               if (ret < 0)
+                       bch_err(c, "error %i from bch2_move_data() in copygc", ret);
+               if (ret)
+                       return ret;
        }
 
-       if (sectors_not_moved)
-               bch_warn_ratelimited(c,
-                       "copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved (move stats: moved %llu sectors, raced %llu keys, %llu sectors)",
-                        sectors_not_moved, sectors_to_move,
-                        buckets_not_moved, buckets_to_move,
-                        atomic64_read(&move_stats.sectors_moved),
-                        atomic64_read(&move_stats.keys_raced),
-                        atomic64_read(&move_stats.sectors_raced));
-
-       trace_copygc(c,
-                    atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
-                    buckets_to_move, buckets_not_moved);
-       return 0;
+       trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
+       return ret;
 }
 
 /*
index 480abf13afcbf5cc6a3cd8d511310fec167bc82c..63e8c1c3d940db02f3a55720cd0977732e2e55b8 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "backpointers.h"
 #include "bkey_buf.h"
 #include "alloc_background.h"
 #include "btree_gc.h"
@@ -1075,8 +1076,8 @@ int bch2_fs_recovery(struct bch_fs *c)
        }
 
        if (!c->opts.nochanges) {
-               if (c->sb.version < bcachefs_metadata_version_new_data_types) {
-                       bch_info(c, "version prior to new_data_types, upgrade and fsck required");
+               if (c->sb.version < bcachefs_metadata_version_backpointers) {
+                       bch_info(c, "version prior to backpointers, upgrade and fsck required");
                        c->opts.version_upgrade = true;
                        c->opts.fsck            = true;
                        c->opts.fix_errors      = FSCK_OPT_YES;
@@ -1254,6 +1255,28 @@ use_clean:
                bch_verbose(c, "done checking lrus");
                set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
 
+               bch_info(c, "checking backpointers to alloc keys");
+               err = "error checking backpointers to alloc keys";
+               ret = bch2_check_btree_backpointers(c);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking backpointers to alloc keys");
+
+               bch_info(c, "checking backpointers to extents");
+               err = "error checking backpointers to extents";
+               ret = bch2_check_backpointers_to_extents(c);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking backpointers to extents");
+
+               bch_info(c, "checking extents to backpointers");
+               err = "error checking extents to backpointers";
+               ret = bch2_check_extents_to_backpointers(c);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking extents to backpointers");
+               set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
+
                bch_info(c, "checking alloc to lru refs");
                err = "error checking alloc to lru refs";
                ret = bch2_check_alloc_to_lru_refs(c);
@@ -1265,6 +1288,7 @@ use_clean:
                set_bit(BCH_FS_MAY_GO_RW, &c->flags);
                set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
                set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
+               set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
                set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
                set_bit(BCH_FS_FSCK_DONE, &c->flags);
 
@@ -1417,6 +1441,9 @@ int bch2_fs_initialize(struct bch_fs *c)
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
 
+       if (c->sb.version < bcachefs_metadata_version_backpointers)
+               c->opts.version_upgrade = true;
+
        if (c->opts.version_upgrade) {
                c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
                c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
index 71fc231d380c190b41b2ea6135c594cc517a7f6a..2908974034ca0babfc12c049277af9b2cc58ef3c 100644 (file)
@@ -1433,6 +1433,8 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
                                        BTREE_TRIGGER_NORUN, NULL) ?:
                bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
                                        BTREE_TRIGGER_NORUN, NULL) ?:
+               bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
+                                       BTREE_TRIGGER_NORUN, NULL) ?:
                bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
                                        BTREE_TRIGGER_NORUN, NULL);
        if (ret)