]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 4dd9a5a488 bcachefs: Fix for leaking of reflinked extents
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 20 Oct 2021 17:43:31 +0000 (13:43 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Wed, 20 Oct 2021 17:43:31 +0000 (13:43 -0400)
.bcachefs_revision
libbcachefs/bcachefs_format.h
libbcachefs/buckets.c
libbcachefs/fsck.c
libbcachefs/recovery.c
libbcachefs/reflink.c

index cd1097b57c3b9d23501341540afd33af477a8b0b..43da7f3f20dfe3ff6307a7c1c4dcff3f4ab34648 100644 (file)
@@ -1 +1 @@
-d9d1235f3c568a47b3547c0b0adad0d7948f18aa
+4dd9a5a488857137ce6eecadddd9304440fb03e9
index 0b8eabe5eaa47a2d3922e2a2d12aa5deef6df18d..e268125b057e195d53c7343bea5f5d22d47317d2 100644 (file)
@@ -913,18 +913,24 @@ struct bch_stripe {
 struct bch_reflink_p {
        struct bch_val          v;
        __le64                  idx;
-
-       __le32                  reservation_generation;
-       __u8                    nr_replicas;
-       __u8                    pad[3];
-};
+       /*
+        * A reflink pointer might point to an indirect extent which is then
+        * later split (by copygc or rebalance). If we only pointed to part of
+        * the original indirect extent, and then one of the fragments is
+        * outside the range we point to, we'd leak a refcount: so when creating
+        * reflink pointers, we need to store pad values to remember the full
+        * range we were taking a reference on.
+        */
+       __le32                  front_pad;
+       __le32                  back_pad;
+} __attribute__((packed, aligned(8)));
 
 struct bch_reflink_v {
        struct bch_val          v;
        __le64                  refcount;
        union bch_extent_entry  start[0];
        __u64                   _data[0];
-};
+} __attribute__((packed, aligned(8)));
 
 struct bch_indirect_inline_data {
        struct bch_val          v;
@@ -1259,7 +1265,8 @@ enum bcachefs_metadata_version {
        bcachefs_metadata_version_inode_backpointers    = 13,
        bcachefs_metadata_version_btree_ptr_sectors_written = 14,
        bcachefs_metadata_version_snapshot_2            = 15,
-       bcachefs_metadata_version_max                   = 16,
+       bcachefs_metadata_version_reflink_p_fix         = 16,
+       bcachefs_metadata_version_max                   = 17,
 };
 
 #define bcachefs_metadata_version_current      (bcachefs_metadata_version_max - 1)
index d5ec4d727d0e56ad2e35cdc8832b781aa1358dc9..97151ec80c5220573cf48598bcadcc20dae97da7 100644 (file)
@@ -1176,8 +1176,10 @@ static int bch2_mark_reflink_p(struct bch_fs *c,
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
        struct reflink_gc *ref;
        size_t l, r, m;
-       u64 idx = le64_to_cpu(p.v->idx);
-       unsigned sectors = p.k->size;
+       u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+       u64 sectors = (u64) le32_to_cpu(p.v->front_pad) +
+                           le32_to_cpu(p.v->back_pad) +
+                           p.k->size;
        s64 ret = 0;
 
        BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
@@ -1753,12 +1755,33 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
                bch2_fs_inconsistent(c,
                        "%llu:%llu len %u points to nonexistent indirect extent %llu",
                        p.k->p.inode, p.k->p.offset, p.k->size, idx);
-               bch2_inconsistent_error(c);
                ret = -EIO;
                goto err;
        }
 
-       BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE));
+       if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
+               bch2_fs_inconsistent(c,
+                       "%llu:%llu len %u idx %llu indirect extent refcount underflow",
+                       p.k->p.inode, p.k->p.offset, p.k->size, idx);
+               ret = -EIO;
+               goto err;
+       }
+
+       if (flags & BTREE_TRIGGER_INSERT) {
+               struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+               u64 pad;
+
+               pad = max_t(s64, le32_to_cpu(v->front_pad),
+                           le64_to_cpu(v->idx) - bkey_start_offset(k.k));
+               BUG_ON(pad > U32_MAX);
+               v->front_pad = cpu_to_le32(pad);
+
+               pad = max_t(s64, le32_to_cpu(v->back_pad),
+                           k.k->p.offset - p.k->size - le64_to_cpu(v->idx));
+               BUG_ON(pad > U32_MAX);
+               v->back_pad = cpu_to_le32(pad);
+       }
+
        le64_add_cpu(refcount, add);
 
        if (!*refcount) {
@@ -1781,10 +1804,20 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
                                     struct bkey_s_c k, unsigned flags)
 {
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-       u64 idx = le64_to_cpu(p.v->idx);
-       unsigned sectors = p.k->size;
+       u64 idx, sectors;
        s64 ret = 0;
 
+       if (flags & BTREE_TRIGGER_INSERT) {
+               struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+
+               v->front_pad = v->back_pad = 0;
+       }
+
+       idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+       sectors = (u64) le32_to_cpu(p.v->front_pad) +
+                       le32_to_cpu(p.v->back_pad) +
+                       p.k->size;
+
        while (sectors) {
                ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
                if (ret < 0)
index a36bc840a62c56e2828b2b3152647f299a9d8b60..c99e1514fd4f7e394e6e76d977b6f631b3150183 100644 (file)
@@ -2154,6 +2154,72 @@ static int check_nlinks(struct bch_fs *c)
        return ret;
 }
 
+static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
+{
+       struct bkey_s_c k;
+       struct bkey_s_c_reflink_p p;
+       struct bkey_i_reflink_p *u;
+       int ret;
+
+       k = bch2_btree_iter_peek(iter);
+       if (!k.k)
+               return 0;
+
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_reflink_p)
+               return 0;
+
+       p = bkey_s_c_to_reflink_p(k);
+
+       if (!p.v->front_pad && !p.v->back_pad)
+               return 0;
+
+       u = bch2_trans_kmalloc(trans, sizeof(*u));
+       ret = PTR_ERR_OR_ZERO(u);
+       if (ret)
+               return ret;
+
+       bkey_reassemble(&u->k_i, k);
+       u->v.front_pad  = 0;
+       u->v.back_pad   = 0;
+
+       return bch2_trans_update(trans, iter, &u->k_i, 0);
+}
+
+static int fix_reflink_p(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
+               return 0;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN,
+                          BTREE_ITER_INTENT|
+                          BTREE_ITER_PREFETCH|
+                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+               if (k.k->type == KEY_TYPE_reflink_p) {
+                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                                             BTREE_INSERT_NOFAIL|
+                                             BTREE_INSERT_LAZY_RW,
+                                             fix_reflink_p_key(&trans, &iter));
+                       if (ret)
+                               break;
+               }
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+
+       bch2_trans_exit(&trans);
+       return ret;
+}
+
 /*
  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
  * Doesn't fix them yet, mainly because they haven't yet been observed:
@@ -2168,7 +2234,8 @@ int bch2_fsck_full(struct bch_fs *c)
                check_xattrs(c) ?:
                check_root(c) ?:
                check_directory_structure(c) ?:
-               check_nlinks(c);
+               check_nlinks(c) ?:
+               fix_reflink_p(c);
 }
 
 int bch2_fsck_walk_inodes_only(struct bch_fs *c)
index 6afb37a2e1b04a07bb92111857dfe62f700d3185..8c53b1e977d1bf2d272e05e52be8dc812a8fcf4b 100644 (file)
@@ -1086,12 +1086,10 @@ int bch2_fs_recovery(struct bch_fs *c)
                c->opts.version_upgrade = true;
                c->opts.fsck            = true;
                c->opts.fix_errors      = FSCK_OPT_YES;
-       } else if (c->sb.version < bcachefs_metadata_version_btree_ptr_sectors_written) {
-               bch_info(c, "version prior to btree_ptr_sectors_written, upgrade required");
-               c->opts.version_upgrade = true;
-       } else if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
-               bch_info(c, "filesystem version is prior to snapshots - upgrading");
+       } else if (c->sb.version < bcachefs_metadata_version_reflink_p_fix) {
+               bch_info(c, "filesystem version is prior to reflink_p fix - upgrading");
                c->opts.version_upgrade = true;
+               c->opts.fsck            = true;
        }
 
        ret = bch2_blacklist_table_initialize(c);
index 9bcf4216a286bd63abe96d0f6d6f08b22ab43aa3..2827d0ef10195dd36a45ac87dca7522ebd5dab9b 100644 (file)
@@ -32,6 +32,10 @@ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
        if (bkey_val_bytes(p.k) != sizeof(*p.v))
                return "incorrect value size";
 
+       if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix &&
+           le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad))
+               return "idx < front_pad";
+
        return NULL;
 }