]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 5b8c4a1366 bcachefs: bcachefs_metadata_version_deleted_inodes
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 3 Aug 2023 17:26:22 +0000 (13:26 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 3 Aug 2023 17:26:22 +0000 (13:26 -0400)
.bcachefs_revision
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/fs-io.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/opts.c
libbcachefs/recovery.c
libbcachefs/recovery_types.h [new file with mode: 0644]
libbcachefs/tests.c

index 7fb9f9a3516d4563bd23574cffae854d7ed753ab..b2d874146ec6b06a9bcd8ff0ae205403bdfa0b73 100644 (file)
@@ -1 +1 @@
-33a60d9b05f523be93973b25e0df1ab2d65fa4fc
+5b8c4a1366df20bc043404cb882230ce86296590
index 82b0706a89365ee4ebdf2f1a77069c3cab93d2c5..e1f1e8e871a81259104a0e16c43fdba739995aa2 100644 (file)
 #include "fifo.h"
 #include "nocow_locking_types.h"
 #include "opts.h"
+#include "recovery_types.h"
 #include "seqmutex.h"
 #include "util.h"
 
@@ -452,6 +453,7 @@ enum gc_phase {
        GC_PHASE_BTREE_backpointers,
        GC_PHASE_BTREE_bucket_gens,
        GC_PHASE_BTREE_snapshot_trees,
+       GC_PHASE_BTREE_deleted_inodes,
 
        GC_PHASE_PENDING_DELETE,
 };
@@ -655,48 +657,6 @@ enum bch_write_ref {
        BCH_WRITE_REF_NR,
 };
 
-#define PASS_SILENT            BIT(0)
-#define PASS_FSCK              BIT(1)
-#define PASS_UNCLEAN           BIT(2)
-#define PASS_ALWAYS            BIT(3)
-
-#define BCH_RECOVERY_PASSES()                                                                  \
-       x(alloc_read,                   PASS_ALWAYS)                                            \
-       x(stripes_read,                 PASS_ALWAYS)                                            \
-       x(initialize_subvolumes,        0)                                                      \
-       x(snapshots_read,               PASS_ALWAYS)                                            \
-       x(check_topology,               0)                                                      \
-       x(check_allocations,            PASS_FSCK)                                              \
-       x(set_may_go_rw,                PASS_ALWAYS|PASS_SILENT)                                \
-       x(journal_replay,               PASS_ALWAYS)                                            \
-       x(check_alloc_info,             PASS_FSCK)                                              \
-       x(check_lrus,                   PASS_FSCK)                                              \
-       x(check_btree_backpointers,     PASS_FSCK)                                              \
-       x(check_backpointers_to_extents,PASS_FSCK)                                              \
-       x(check_extents_to_backpointers,PASS_FSCK)                                              \
-       x(check_alloc_to_lru_refs,      PASS_FSCK)                                              \
-       x(fs_freespace_init,            PASS_ALWAYS|PASS_SILENT)                                \
-       x(bucket_gens_init,             0)                                                      \
-       x(check_snapshot_trees,         PASS_FSCK)                                              \
-       x(check_snapshots,              PASS_FSCK)                                              \
-       x(check_subvols,                PASS_FSCK)                                              \
-       x(delete_dead_snapshots,        PASS_FSCK|PASS_UNCLEAN)                                 \
-       x(fs_upgrade_for_subvolumes,    0)                                                      \
-       x(check_inodes,                 PASS_FSCK|PASS_UNCLEAN)                                 \
-       x(check_extents,                PASS_FSCK)                                              \
-       x(check_dirents,                PASS_FSCK)                                              \
-       x(check_xattrs,                 PASS_FSCK)                                              \
-       x(check_root,                   PASS_FSCK)                                              \
-       x(check_directory_structure,    PASS_FSCK)                                              \
-       x(check_nlinks,                 PASS_FSCK)                                              \
-       x(fix_reflink_p,                0)                                                      \
-
-enum bch_recovery_pass {
-#define x(n, when)     BCH_RECOVERY_PASS_##n,
-       BCH_RECOVERY_PASSES()
-#undef x
-};
-
 struct bch_fs {
        struct closure          cl;
 
index b771d80f636186f27bb524efc0fb0b06088f45f3..5ec218ee356947140c4e91f32a38c65c6609db54 100644 (file)
@@ -1629,7 +1629,9 @@ struct bch_sb_field_journal_seq_blacklist {
        x(major_minor,                  BCH_VERSION(1,  0),             \
          0)                                                            \
        x(snapshot_skiplists,           BCH_VERSION(1,  1),             \
-         BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))
+         BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))                   \
+       x(deleted_inodes,               BCH_VERSION(1,  2),             \
+         BIT_ULL(BCH_RECOVERY_PASS_check_inodes))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
@@ -2251,7 +2253,9 @@ enum btree_id_flags {
        x(bucket_gens,          14,     0,                                      \
          BIT_ULL(KEY_TYPE_bucket_gens))                                        \
        x(snapshot_trees,       15,     0,                                      \
-         BIT_ULL(KEY_TYPE_snapshot_tree))
+         BIT_ULL(KEY_TYPE_snapshot_tree))                                      \
+       x(deleted_inodes,       16,     BTREE_ID_SNAPSHOTS,                     \
+         BIT_ULL(KEY_TYPE_set))
 
 enum btree_id {
 #define x(name, nr, ...) BTREE_ID_##name = nr,
index 6b691b2b52afeb70703dd5387ee670e2c2931580..d433f4d5662da9fd8991823ca2c37d1c0aae3082 100644 (file)
@@ -35,7 +35,7 @@
 
 #include <trace/events/writeback.h>
 
-static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned);
+static int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool);
 
 struct folio_vec {
        struct folio    *fv_folio;
@@ -3410,11 +3410,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
                }
 
                if (!(mode & FALLOC_FL_ZERO_RANGE)) {
-                       ret = drop_locks_do(&trans,
-                               (bch2_clamp_data_hole(&inode->v,
-                                                     &hole_start,
-                                                     &hole_end,
-                                                     opts.data_replicas), 0));
+                       if (bch2_clamp_data_hole(&inode->v,
+                                                &hole_start,
+                                                &hole_end,
+                                                opts.data_replicas, true))
+                               ret = drop_locks_do(&trans,
+                                       (bch2_clamp_data_hole(&inode->v,
+                                                             &hole_start,
+                                                             &hole_end,
+                                                             opts.data_replicas, false), 0));
                        bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
 
                        if (ret)
@@ -3714,7 +3718,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos,
 static loff_t bch2_seek_pagecache_data(struct inode *vinode,
                                       loff_t start_offset,
                                       loff_t end_offset,
-                                      unsigned min_replicas)
+                                      unsigned min_replicas,
+                                      bool nonblock)
 {
        struct folio_batch fbatch;
        pgoff_t start_index     = start_offset >> PAGE_SHIFT;
@@ -3731,7 +3736,13 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        struct folio *folio = fbatch.folios[i];
 
-                       folio_lock(folio);
+                       if (!nonblock) {
+                               folio_lock(folio);
+                       } else if (!folio_trylock(folio)) {
+                               folio_batch_release(&fbatch);
+                               return -EAGAIN;
+                       }
+
                        offset = folio_data_offset(folio,
                                        max(folio_pos(folio), start_offset),
                                        min_replicas);
@@ -3796,7 +3807,7 @@ err:
 
        if (next_data > offset)
                next_data = bch2_seek_pagecache_data(&inode->v,
-                                                    offset, next_data, 0);
+                                       offset, next_data, 0, false);
 
        if (next_data >= isize)
                return -ENXIO;
@@ -3804,18 +3815,24 @@ err:
        return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
 }
 
-static bool folio_hole_offset(struct address_space *mapping, loff_t *offset,
-                             unsigned min_replicas)
+static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
+                             unsigned min_replicas, bool nonblock)
 {
        struct folio *folio;
        struct bch_folio *s;
        unsigned i, sectors;
        bool ret = true;
 
-       folio = filemap_lock_folio(mapping, *offset >> PAGE_SHIFT);
+       folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
+                                   !nonblock ? FGP_LOCK : 0, 0);
        if (IS_ERR_OR_NULL(folio))
                return true;
 
+       if (nonblock && !folio_trylock(folio)) {
+               folio_put(folio);
+               return -EAGAIN;
+       }
+
        s = bch2_folio(folio);
        if (!s)
                goto unlock;
@@ -3833,37 +3850,51 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset,
        ret = false;
 unlock:
        folio_unlock(folio);
+       folio_put(folio);
        return ret;
 }
 
 static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
                                       loff_t start_offset,
                                       loff_t end_offset,
-                                      unsigned min_replicas)
+                                      unsigned min_replicas,
+                                      bool nonblock)
 {
        struct address_space *mapping = vinode->i_mapping;
        loff_t offset = start_offset;
 
        while (offset < end_offset &&
-              !folio_hole_offset(mapping, &offset, min_replicas))
+              !folio_hole_offset(mapping, &offset, min_replicas, nonblock))
                ;
 
        return min(offset, end_offset);
 }
 
-static void bch2_clamp_data_hole(struct inode *inode,
-                                u64 *hole_start,
-                                u64 *hole_end,
-                                unsigned min_replicas)
+static int bch2_clamp_data_hole(struct inode *inode,
+                               u64 *hole_start,
+                               u64 *hole_end,
+                               unsigned min_replicas,
+                               bool nonblock)
 {
-       *hole_start = bch2_seek_pagecache_hole(inode,
-               *hole_start << 9, *hole_end << 9, min_replicas) >> 9;
+       loff_t ret;
+
+       ret = bch2_seek_pagecache_hole(inode,
+               *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
+       if (ret < 0)
+               return ret;
+
+       *hole_start = ret;
 
        if (*hole_start == *hole_end)
-               return;
+               return 0;
 
-       *hole_end = bch2_seek_pagecache_data(inode,
-               *hole_start << 9, *hole_end << 9, min_replicas) >> 9;
+       ret = bch2_seek_pagecache_data(inode,
+               *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
+       if (ret < 0)
+               return ret;
+
+       *hole_end = ret;
+       return 0;
 }
 
 static loff_t bch2_seek_hole(struct file *file, u64 offset)
@@ -3895,12 +3926,12 @@ retry:
                           BTREE_ITER_SLOTS, k, ret) {
                if (k.k->p.inode != inode->v.i_ino) {
                        next_hole = bch2_seek_pagecache_hole(&inode->v,
-                                       offset, MAX_LFS_FILESIZE, 0);
+                                       offset, MAX_LFS_FILESIZE, 0, false);
                        break;
                } else if (!bkey_extent_is_data(k.k)) {
                        next_hole = bch2_seek_pagecache_hole(&inode->v,
                                        max(offset, bkey_start_offset(k.k) << 9),
-                                       k.k->p.offset << 9, 0);
+                                       k.k->p.offset << 9, 0, false);
 
                        if (next_hole < k.k->p.offset << 9)
                                break;
index 8c186acc285011f18cb917f9db0a31e3712edbb1..0852dbe988ad1fb5bc6cdcd8f2da821c287017a1 100644 (file)
@@ -1123,73 +1123,100 @@ static int extent_ends_at(struct bch_fs *c,
 static int overlapping_extents_found(struct btree_trans *trans,
                                     enum btree_id btree,
                                     struct bpos pos1, struct bkey pos2,
-                                    bool *fixed)
+                                    bool *fixed,
+                                    struct extent_end *extent_end)
 {
        struct bch_fs *c = trans->c;
        struct printbuf buf = PRINTBUF;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       u32 snapshot = min(pos1.snapshot, pos2.p.snapshot);
+       struct btree_iter iter1, iter2 = { NULL };
+       struct bkey_s_c k1, k2;
        int ret;
 
        BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
 
-       bch2_trans_iter_init(trans, &iter, btree, SPOS(pos1.inode, pos1.offset - 1, snapshot), 0);
-       k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
-       ret = bkey_err(k);
+       bch2_trans_iter_init(trans, &iter1, btree, pos1,
+                            BTREE_ITER_ALL_SNAPSHOTS|
+                            BTREE_ITER_NOT_EXTENTS);
+       k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX));
+       ret = bkey_err(k1);
        if (ret)
                goto err;
 
        prt_str(&buf, "\n  ");
-       bch2_bkey_val_to_text(&buf, c, k);
+       bch2_bkey_val_to_text(&buf, c, k1);
 
-       if (!bpos_eq(pos1, k.k->p)) {
-               bch_err(c, "%s: error finding first overlapping extent when repairing%s",
+       if (!bpos_eq(pos1, k1.k->p)) {
+               prt_str(&buf, "\n  wanted\n  ");
+               bch2_bpos_to_text(&buf, pos1);
+               prt_str(&buf, "\n  ");
+               bch2_bkey_to_text(&buf, &pos2);
+
+               bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
                        __func__, buf.buf);
                ret = -BCH_ERR_internal_fsck_err;
                goto err;
        }
 
+       bch2_trans_copy_iter(&iter2, &iter1);
+
        while (1) {
-               bch2_btree_iter_advance(&iter);
+               bch2_btree_iter_advance(&iter2);
 
-               k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
-               ret = bkey_err(k);
+               k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX));
+               ret = bkey_err(k2);
                if (ret)
                        goto err;
 
-               if (bkey_ge(k.k->p, pos2.p))
+               if (bpos_ge(k2.k->p, pos2.p))
                        break;
-
        }
 
        prt_str(&buf, "\n  ");
-       bch2_bkey_val_to_text(&buf, c, k);
+       bch2_bkey_val_to_text(&buf, c, k2);
 
-       if (bkey_gt(k.k->p, pos2.p) ||
-           pos2.size != k.k->size) {
+       if (bpos_gt(k2.k->p, pos2.p) ||
+           pos2.size != k2.k->size) {
                bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
                        __func__, buf.buf);
                ret = -BCH_ERR_internal_fsck_err;
                goto err;
        }
 
+       prt_printf(&buf, "\n  overwriting %s extent",
+                  pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
+
        if (fsck_err(c, "overlapping extents%s", buf.buf)) {
-               struct bpos update_pos = pos1.snapshot < pos2.p.snapshot ? pos1 : pos2.p;
-               struct btree_iter update_iter;
+               struct btree_iter *old_iter = &iter1;
+               struct disk_reservation res = { 0 };
 
-               struct bkey_i *update = bch2_bkey_get_mut(trans, &update_iter,
-                                               btree, update_pos,
-                                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-               bch2_trans_iter_exit(trans, &update_iter);
-               if ((ret = PTR_ERR_OR_ZERO(update)))
+               if (pos1.snapshot < pos2.p.snapshot) {
+                       old_iter = &iter2;
+                       swap(k1, k2);
+               }
+
+               trans->extra_journal_res += bch2_bkey_sectors_compressed(k2);
+
+               ret =   bch2_trans_update_extent_overwrite(trans, old_iter,
+                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
+                               k1, k2) ?:
+                       bch2_trans_commit(trans, &res, NULL,
+                               BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL);
+               bch2_disk_reservation_put(c, &res);
+
+               if (ret)
                        goto err;
 
                *fixed = true;
+
+               if (pos1.snapshot == pos2.p.snapshot)
+                       extent_end->offset = bkey_start_offset(&pos2);
+               else
+                       ret = -BCH_ERR_transaction_restart_nested;
        }
 fsck_err:
 err:
-       bch2_trans_iter_exit(trans, &iter);
+       bch2_trans_iter_exit(trans, &iter2);
+       bch2_trans_iter_exit(trans, &iter1);
        printbuf_exit(&buf);
        return ret;
 }
@@ -1199,11 +1226,11 @@ static int check_overlapping_extents(struct btree_trans *trans,
                              struct extent_ends *extent_ends,
                              struct bkey_s_c k,
                              u32 equiv,
-                             struct btree_iter *iter)
+                             struct btree_iter *iter,
+                             bool *fixed)
 {
        struct bch_fs *c = trans->c;
        struct extent_end *i;
-       bool fixed = false;
        int ret = 0;
 
        /* transaction restart, running again */
@@ -1226,7 +1253,7 @@ static int check_overlapping_extents(struct btree_trans *trans,
                                                SPOS(iter->pos.inode,
                                                     i->offset,
                                                     i->snapshot),
-                                               *k.k, &fixed);
+                                               *k.k, fixed, i);
                if (ret)
                        goto err;
        }
@@ -1237,7 +1264,7 @@ static int check_overlapping_extents(struct btree_trans *trans,
 
        extent_ends->last_pos = k.k->p;
 err:
-       return ret ?: fixed;
+       return ret;
 }
 
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
@@ -1292,13 +1319,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        goto delete;
 
                ret = check_overlapping_extents(trans, s, extent_ends, k,
-                                               equiv.snapshot, iter);
-               if (ret < 0)
-                       goto err;
-
+                                               equiv.snapshot, iter,
+                                               &inode->recalculate_sums);
                if (ret)
-                       inode->recalculate_sums = true;
-               ret = 0;
+                       goto err;
        }
 
        /*
@@ -1373,7 +1397,7 @@ int bch2_check_extents(struct bch_fs *c)
 
        snapshots_seen_init(&s);
        extent_ends_init(&extent_ends);
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
 
        ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
                        POS(BCACHEFS_ROOT_INO, 0),
index 755cf7d177cd03444ba0c5a37194ea22af43ab89..fea21e1e5721e302504a751443835e3b91941590 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "btree_key_cache.h"
+#include "btree_write_buffer.h"
 #include "bkey_methods.h"
 #include "btree_update.h"
 #include "buckets.h"
@@ -519,6 +520,23 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
        __bch2_inode_unpacked_to_text(out, &inode);
 }
 
+static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_inode:
+               return bkey_s_c_to_inode(k).v->bi_flags &
+                       cpu_to_le32(BCH_INODE_UNLINKED);
+       case KEY_TYPE_inode_v2:
+               return bkey_s_c_to_inode_v2(k).v->bi_flags &
+                       cpu_to_le32(BCH_INODE_UNLINKED);
+       case KEY_TYPE_inode_v3:
+               return bkey_s_c_to_inode_v3(k).v->bi_flags &
+                       cpu_to_le64(BCH_INODE_UNLINKED);
+       default:
+               return false;
+       }
+}
+
 int bch2_trans_mark_inode(struct btree_trans *trans,
                          enum btree_id btree_id, unsigned level,
                          struct bkey_s_c old,
@@ -526,6 +544,8 @@ int bch2_trans_mark_inode(struct btree_trans *trans,
                          unsigned flags)
 {
        int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
+       bool old_deleted = bkey_is_deleted_inode(old);
+       bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new));
 
        if (nr) {
                int ret = bch2_replicas_deltas_realloc(trans, 0);
@@ -537,6 +557,12 @@ int bch2_trans_mark_inode(struct btree_trans *trans,
                d->nr_inodes += nr;
        }
 
+       if (old_deleted != new_deleted) {
+               int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted);
+               if (ret)
+                       return ret;
+       }
+
        return 0;
 }
 
@@ -986,3 +1012,90 @@ err:
 
        return ret ?: -BCH_ERR_transaction_restart_nested;
 }
+
+static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct bch_inode_unpacked inode;
+       int ret;
+
+       if (bch2_snapshot_is_internal_node(c, pos.snapshot))
+               return 0;
+
+       if (!fsck_err_on(c->sb.clean, c,
+                        "filesystem marked as clean but have deleted inode %llu:%u",
+                        pos.offset, pos.snapshot))
+               return 0;
+
+       k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
+       if (fsck_err_on(!bkey_is_inode(k.k), c,
+                       "nonexistent inode %llu:%u in deleted_inodes btree",
+                       pos.offset, pos.snapshot))
+               goto delete;
+
+       ret = bch2_inode_unpack(k, &inode);
+       if (ret)
+               goto err;
+
+       if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c,
+                       "non-deleted inode %llu:%u in deleted_inodes btree",
+                       pos.offset, pos.snapshot))
+               goto delete;
+
+       return 1;
+err:
+fsck_err:
+       return ret;
+delete:
+       return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+}
+
+int bch2_delete_dead_inodes(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       ret = bch2_btree_write_buffer_flush_sync(&trans);
+       if (ret)
+               goto err;
+
+       /*
+        * Weird transaction restart handling here because on successful delete,
+        * bch2_inode_rm_snapshot() will return a nested transaction restart,
+        * but we can't retry because the btree write buffer won't have been
+        * flushed and we'd spin:
+        */
+       for_each_btree_key(&trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
+                          BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+               ret = lockrestart_do(&trans, may_delete_deleted_inode(&trans, k.k->p));
+               if (ret < 0)
+                       break;
+
+               if (ret) {
+                       if (!test_bit(BCH_FS_RW, &c->flags)) {
+                               bch2_trans_unlock(&trans);
+                               bch2_fs_lazy_rw(c);
+                       }
+
+                       ret = bch2_inode_rm_snapshot(&trans, k.k->p.offset, k.k->p.snapshot);
+                       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                               break;
+               }
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+err:
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
index 1b9dc27e82bd476be4531dda156b49541b86a54d..22b24405649f0200cc3785bb0e4b431dc02ea72a 100644 (file)
@@ -199,5 +199,6 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
                         struct bch_inode_unpacked *);
 
 int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32);
+int bch2_delete_dead_inodes(struct bch_fs *);
 
 #endif /* _BCACHEFS_INODE_H */
index 4d0daeba6f597212228cd3c2beba12c27eaa9ff6..960bb247f3a0ab8b84c64e8f439738ffdf76b3b6 100644 (file)
@@ -445,6 +445,13 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
        if (!options)
                return 0;
 
+       /*
+        * sys_fsconfig() is now occasionally providing us with option lists
+        * starting with a comma - weird.
+        */
+       if (*options == ',')
+               options++;
+
        copied_opts = kstrdup(options, GFP_KERNEL);
        if (!copied_opts)
                return -1;
index dcd4f9f410ae5f72ad9fcd396116a5681a99c0e3..55a233c2c7cc7b7f8122b66c3cd32fc057658c8d 100644 (file)
@@ -1458,6 +1458,29 @@ use_clean:
        if (ret)
                goto err;
 
+       /* If we fixed errors, verify that fs is actually clean now: */
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+           test_bit(BCH_FS_ERRORS_FIXED, &c->flags) &&
+           !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) &&
+           !test_bit(BCH_FS_ERROR, &c->flags)) {
+               bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
+               clear_bit(BCH_FS_ERRORS_FIXED, &c->flags);
+
+               c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
+
+               ret = bch2_run_recovery_passes(c);
+               if (ret)
+                       goto err;
+
+               if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags) ||
+                   test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
+                       bch_err(c, "Second fsck run was not clean");
+                       set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
+               }
+
+               set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
+       }
+
        if (enabled_qtypes(c)) {
                bch_verbose(c, "reading quotas");
                ret = bch2_fs_quota_read(c);
diff --git a/libbcachefs/recovery_types.h b/libbcachefs/recovery_types.h
new file mode 100644 (file)
index 0000000..abf1f83
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_RECOVERY_TYPES_H
+#define _BCACHEFS_RECOVERY_TYPES_H
+
+#define PASS_SILENT            BIT(0)
+#define PASS_FSCK              BIT(1)
+#define PASS_UNCLEAN           BIT(2)
+#define PASS_ALWAYS            BIT(3)
+
+#define BCH_RECOVERY_PASSES()                                                                  \
+       x(alloc_read,                   PASS_ALWAYS)                                            \
+       x(stripes_read,                 PASS_ALWAYS)                                            \
+       x(initialize_subvolumes,        0)                                                      \
+       x(snapshots_read,               PASS_ALWAYS)                                            \
+       x(check_topology,               0)                                                      \
+       x(check_allocations,            PASS_FSCK)                                              \
+       x(set_may_go_rw,                PASS_ALWAYS|PASS_SILENT)                                \
+       x(journal_replay,               PASS_ALWAYS)                                            \
+       x(check_alloc_info,             PASS_FSCK)                                              \
+       x(check_lrus,                   PASS_FSCK)                                              \
+       x(check_btree_backpointers,     PASS_FSCK)                                              \
+       x(check_backpointers_to_extents,PASS_FSCK)                                              \
+       x(check_extents_to_backpointers,PASS_FSCK)                                              \
+       x(check_alloc_to_lru_refs,      PASS_FSCK)                                              \
+       x(fs_freespace_init,            PASS_ALWAYS|PASS_SILENT)                                \
+       x(bucket_gens_init,             0)                                                      \
+       x(check_snapshot_trees,         PASS_FSCK)                                              \
+       x(check_snapshots,              PASS_FSCK)                                              \
+       x(check_subvols,                PASS_FSCK)                                              \
+       x(delete_dead_snapshots,        PASS_FSCK|PASS_UNCLEAN)                                 \
+       x(fs_upgrade_for_subvolumes,    0)                                                      \
+       x(check_inodes,                 PASS_FSCK)                                              \
+       x(check_extents,                PASS_FSCK)                                              \
+       x(check_dirents,                PASS_FSCK)                                              \
+       x(check_xattrs,                 PASS_FSCK)                                              \
+       x(check_root,                   PASS_FSCK)                                              \
+       x(check_directory_structure,    PASS_FSCK)                                              \
+       x(check_nlinks,                 PASS_FSCK)                                              \
+       x(delete_dead_inodes,           PASS_FSCK|PASS_UNCLEAN)                                 \
+       x(fix_reflink_p,                0)                                                      \
+
+enum bch_recovery_pass {
+#define x(n, when)     BCH_RECOVERY_PASS_##n,
+       BCH_RECOVERY_PASSES()
+#undef x
+};
+
+#endif /* _BCACHEFS_RECOVERY_TYPES_H */
index cef23d2ccc5ffec62b9e711679644228ff7cdbc0..1d4b0a583586348d42d6f39c1b2d364676a5e2a6 100644 (file)
@@ -503,6 +503,36 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr)
                __test_extent_overwrite(c, 32, 64, 32, 128);
 }
 
+static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid)
+{
+       struct bkey_i_cookie k;
+       int ret;
+
+       bkey_cookie_init(&k.k_i);
+       k.k_i.k.p.inode = inum;
+       k.k_i.k.p.offset = start + len;
+       k.k_i.k.p.snapshot = snapid;
+       k.k_i.k.size = len;
+
+       ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_btree_insert_nonextent(&trans, BTREE_ID_extents, &k.k_i,
+                                           BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
+       if (ret)
+               bch_err_fn(c, ret);
+       return ret;
+}
+
+static int test_extent_create_overlapping(struct bch_fs *c, u64 inum)
+{
+       return  insert_test_overlapping_extent(c, inum,  0, 16, U32_MAX - 2) ?: /* overwrite entire */
+               insert_test_overlapping_extent(c, inum,  2,  8, U32_MAX - 2) ?:
+               insert_test_overlapping_extent(c, inum,  4,  4, U32_MAX) ?:
+               insert_test_overlapping_extent(c, inum, 32,  8, U32_MAX - 2) ?: /* overwrite front/back */
+               insert_test_overlapping_extent(c, inum, 36,  8, U32_MAX) ?:
+               insert_test_overlapping_extent(c, inum, 60,  8, U32_MAX - 2) ?:
+               insert_test_overlapping_extent(c, inum, 64,  8, U32_MAX);
+}
+
 /* snapshot unit tests */
 
 /* Test skipping over keys in unrelated snapshots: */
@@ -901,6 +931,7 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
        perf_test(test_extent_overwrite_back);
        perf_test(test_extent_overwrite_middle);
        perf_test(test_extent_overwrite_all);
+       perf_test(test_extent_create_overlapping);
 
        perf_test(test_snapshots);