]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 17a344f265 bcachefs: Improve fsck for subvols/snapshots
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 15 Jul 2022 21:24:21 +0000 (17:24 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 15 Jul 2022 21:24:21 +0000 (17:24 -0400)
16 files changed:
.bcachefs_revision
libbcachefs/alloc_background.c
libbcachefs/backpointers.c
libbcachefs/bcachefs.h
libbcachefs/btree_gc.c
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/buckets.c
libbcachefs/data_update.c
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/lru.c
libbcachefs/recovery.c
libbcachefs/subvolume.c
libbcachefs/subvolume.h
libbcachefs/tests.c

index 60db55730fe48909985e5b575cb70834f27f6ab6..55f6e57db60d222efba6a75b51b2666ef3810229 100644 (file)
@@ -1 +1 @@
-a03225a783e75d704987ad77fef57891d7a2e115
+17a344f26599e37e7023a6daff813e4d1a96cdd2
index 4b38fbd9d913758242f082b3654100cf3bed0294..fc77747c78c49b3507f793dd07fe7f1aa2d79a15 100644 (file)
@@ -866,7 +866,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
        bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
                             BTREE_ITER_PREFETCH);
        while (1) {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
                        bch2_check_alloc_key(&trans, &iter,
@@ -887,7 +887,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN,
                             BTREE_ITER_PREFETCH);
        while (1) {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
                        bch2_check_discard_freespace_key(&trans, &iter));
@@ -904,7 +904,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN,
                             BTREE_ITER_PREFETCH);
        while (1) {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
                        bch2_check_discard_freespace_key(&trans, &iter));
@@ -1009,7 +1009,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
 
        for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
                        bch2_check_alloc_to_lru_ref(&trans, &iter));
@@ -1139,7 +1139,7 @@ static void bch2_do_discards_work(struct work_struct *work)
                        continue;
                }
 
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_USE_RESERVE|
                                      BTREE_INSERT_NOFAIL,
                                bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done));
@@ -1277,7 +1277,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
                        should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
 
                while (nr_to_invalidate-- >= 0) {
-                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                       ret = commit_do(&trans, NULL, NULL,
                                              BTREE_INSERT_USE_RESERVE|
                                              BTREE_INSERT_NOFAIL,
                                        invalidate_one_bucket(&trans, ca, &bucket,
@@ -1333,7 +1333,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
                if (iter.pos.offset >= ca->mi.nbuckets)
                        break;
 
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW,
                                 bucket_freespace_init(&trans, &iter));
                if (ret)
index 6c74b3a520c1e5d16d0ba17859e4932258252744..08d6795860f34c7d8c6ecadf55458ee98af4f897 100644 (file)
@@ -627,7 +627,7 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0);
 
        do {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW|
                                      BTREE_INSERT_NOFAIL,
                                      bch2_check_btree_backpointer(&trans, &iter));
@@ -805,7 +805,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
                                          BTREE_ITER_PREFETCH);
 
                do {
-                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                       ret = commit_do(&trans, NULL, NULL,
                                              BTREE_INSERT_LAZY_RW|
                                              BTREE_INSERT_NOFAIL,
                                              check_extent_to_backpointers(&trans, &iter));
@@ -818,7 +818,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
                if (ret)
                        break;
 
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW|
                                      BTREE_INSERT_NOFAIL,
                                      check_btree_root_to_backpointers(&trans, btree_id));
@@ -876,7 +876,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
                           BTREE_ITER_PREFETCH, k, ret) {
                u64 bp_offset = 0;
 
-               while (!(ret = __bch2_trans_do(&trans, NULL, NULL,
+               while (!(ret = commit_do(&trans, NULL, NULL,
                                               BTREE_INSERT_LAZY_RW|
                                               BTREE_INSERT_NOFAIL,
                                check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
index 8b4d0eb5cdc2b3e1e6060c477e4cbe2a81cb7b14..31e387b103ee32bc0d3c6418d9d34fe9b486d8c1 100644 (file)
@@ -515,6 +515,8 @@ enum {
        BCH_FS_INITIAL_GC_UNFIXED,      /* kill when we enumerate fsck errors */
        BCH_FS_NEED_ANOTHER_GC,
 
+       BCH_FS_HAVE_DELETED_SNAPSHOTS,
+
        /* errors: */
        BCH_FS_ERROR,
        BCH_FS_TOPOLOGY_ERROR,
index 9556b05312859672407b67973bbf2b486da62f95..e260689ba830cc97f2c169754366baef967a4f43 100644 (file)
@@ -799,7 +799,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
                        atomic64_set(&c->key_version, k->k->version.lo);
        }
 
-       ret = __bch2_trans_do(trans, NULL, NULL, 0,
+       ret = commit_do(trans, NULL, NULL, 0,
                        bch2_mark_key(trans, old, *k, flags));
 fsck_err:
 err:
@@ -1438,7 +1438,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
                        if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
                                break;
 
-                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                       ret = commit_do(&trans, NULL, NULL,
                                              BTREE_INSERT_LAZY_RW,
                                        bch2_alloc_write_key(&trans, &iter,
                                                             metadata_only));
@@ -1592,7 +1592,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
                        else
                                *bkey_refcount(new) = cpu_to_le64(r->refcount);
 
-                       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       ret = commit_do(&trans, NULL, NULL, 0,
                                __bch2_btree_insert(&trans, BTREE_ID_reflink, new));
                        kfree(new);
 
@@ -1705,7 +1705,7 @@ inconsistent:
                        for (i = 0; i < new->v.nr_blocks; i++)
                                stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
 
-                       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       ret = commit_do(&trans, NULL, NULL, 0,
                                __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
                        kfree(new);
                }
@@ -2012,7 +2012,7 @@ int bch2_gc_gens(struct bch_fs *c)
 
        for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL,
                                bch2_alloc_write_oldest_gen(&trans, &iter));
                if (ret) {
index 28f9585770068fd0ac5a1e701cf8c1cf6d037564..e9127dbf7e241ab62491af2e800dd15c8aa56d4b 100644 (file)
@@ -118,7 +118,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
        _ret;                                                           \
 })
 
-#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do)  \
+#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do)        \
        lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
                                        (_journal_seq), (_flags)))
 
@@ -128,8 +128,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
        int _ret;                                                       \
                                                                        \
        bch2_trans_init(&trans, (_c), 0, 0);                            \
-       _ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags, \
-                              _do);                                    \
+       _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
        bch2_trans_exit(&trans);                                        \
                                                                        \
        _ret;                                                           \
index 965fdfbfa3eee032971ccdc4229847db78344423..c3ef2387ddad23db4809b08ab063950e25c0338c 100644 (file)
@@ -610,7 +610,7 @@ static void btree_update_nodes_written(struct btree_update *as)
         * which may require allocations as well.
         */
        bch2_trans_init(&trans, c, 0, 512);
-       ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq,
+       ret = commit_do(&trans, &as->disk_res, &journal_seq,
                              BTREE_INSERT_NOFAIL|
                              BTREE_INSERT_NOCHECK_RW|
                              BTREE_INSERT_JOURNAL_RECLAIM|
index 1ea7e2baf32323a2cc6b4726869c3bedd841737b..fe2cd730aee01c0eaff4c8c1a730aeef34e519c3 100644 (file)
@@ -1861,7 +1861,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                                    enum bch_data_type type,
                                    unsigned sectors)
 {
-       return __bch2_trans_do(trans, NULL, NULL, 0,
+       return commit_do(trans, NULL, NULL, 0,
                        __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
 }
 
index cc9ae6dad0d5ad7e4ad068893cb9827d68a7077e..c181dba6028d2393b5b149777ffdfa671012e73d 100644 (file)
@@ -23,13 +23,13 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_iter iter, update_iter;
        struct bkey_s_c k;
-       struct snapshots_seen s;
+       snapshot_id_list s;
        int ret;
 
        if (!btree_type_has_snapshots(id))
                return 0;
 
-       snapshots_seen_init(&s);
+       darray_init(&s);
 
        if (!bkey_cmp(old_pos, new_pos))
                return 0;
@@ -41,7 +41,6 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans,
                             BTREE_ITER_NOT_EXTENTS|
                             BTREE_ITER_ALL_SNAPSHOTS);
        while (1) {
-next:
                k = bch2_btree_iter_prev(&iter);
                ret = bkey_err(k);
                if (ret)
@@ -52,11 +51,9 @@ next:
 
                if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) {
                        struct bkey_i *update;
-                       u32 *i;
 
-                       darray_for_each(s.ids, i)
-                               if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, *i))
-                                       goto next;
+                       if (snapshot_list_has_ancestor(c, &s, k.k->p.snapshot))
+                               continue;
 
                        update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
 
@@ -79,13 +76,13 @@ next:
                        if (ret)
                                break;
 
-                       ret = snapshots_seen_add(c, &s, k.k->p.snapshot);
+                       ret = snapshot_list_add(c, &s, k.k->p.snapshot);
                        if (ret)
                                break;
                }
        }
        bch2_trans_iter_exit(trans, &iter);
-       darray_exit(&s.ids);
+       darray_exit(&s);
 
        return ret;
 }
index bb94ba58a796770038de95a6f47fada634b3135c..2354c98945170c511557f7a21b36e43ad260964a 100644 (file)
@@ -443,7 +443,7 @@ static int __bch2_link(struct bch_fs *c,
        mutex_lock(&inode->ei_update_lock);
        bch2_trans_init(&trans, c, 4, 1024);
 
-       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                        bch2_link_trans(&trans,
                                        inode_inum(dir),   &dir_u,
                                        inode_inum(inode), &inode_u,
@@ -492,7 +492,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
        bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
        bch2_trans_init(&trans, c, 4, 1024);
 
-       ret = __bch2_trans_do(&trans, NULL, NULL,
+       ret = commit_do(&trans, NULL, NULL,
                              BTREE_INSERT_NOFAIL,
                        bch2_unlink_trans(&trans,
                                          inode_inum(dir), &dir_u,
@@ -614,7 +614,7 @@ static int bch2_rename2(struct user_namespace *mnt_userns,
                        goto err;
        }
 
-       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                        bch2_rename_trans(&trans,
                                          inode_inum(src_dir), &src_dir_u,
                                          inode_inum(dst_dir), &dst_dir_u,
index f1abec95a740cb03e0b9facd37512677206d4e23..c558895ef2918bafe0f7b48094477a6305765688 100644 (file)
@@ -220,7 +220,7 @@ static int write_inode(struct btree_trans *trans,
                       struct bch_inode_unpacked *inode,
                       u32 snapshot)
 {
-       int ret = __bch2_trans_do(trans, NULL, NULL,
+       int ret = commit_do(trans, NULL, NULL,
                                  BTREE_INSERT_NOFAIL|
                                  BTREE_INSERT_LAZY_RW,
                                  __write_inode(trans, inode, snapshot));
@@ -231,6 +231,7 @@ static int write_inode(struct btree_trans *trans,
 
 static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot)
 {
+       struct bch_fs *c = trans->c;
        struct btree_iter iter = { NULL };
        struct bkey_i_inode_generation delete;
        struct bch_inode_unpacked inode_u;
@@ -263,7 +264,7 @@ retry:
                goto err;
 
        if (!bkey_is_inode(k.k)) {
-               bch2_fs_inconsistent(trans->c,
+               bch2_fs_inconsistent(c,
                                     "inode %llu:%u not found when deleting",
                                     inum, snapshot);
                ret = -EIO;
@@ -273,11 +274,8 @@ retry:
        bch2_inode_unpack(k, &inode_u);
 
        /* Subvolume root? */
-       if (inode_u.bi_subvol) {
-               ret = bch2_subvolume_delete(trans, inode_u.bi_subvol);
-               if (ret)
-                       goto err;
-       }
+       if (inode_u.bi_subvol)
+               bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum);
 
        bkey_inode_generation_init(&delete.k_i);
        delete.k.p = iter.pos;
@@ -434,7 +432,7 @@ static int reattach_inode(struct btree_trans *trans,
                          struct bch_inode_unpacked *inode,
                          u32 inode_snapshot)
 {
-       int ret = __bch2_trans_do(trans, NULL, NULL,
+       int ret = commit_do(trans, NULL, NULL,
                                  BTREE_INSERT_LAZY_RW|
                                  BTREE_INSERT_NOFAIL,
                        __reattach_inode(trans, inode, inode_snapshot));
@@ -471,19 +469,60 @@ out:
        return ret;
 }
 
-static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos)
+struct snapshots_seen_entry {
+       u32                             id;
+       u32                             equiv;
+};
+
+struct snapshots_seen {
+       struct bpos                     pos;
+       DARRAY(struct snapshots_seen_entry) ids;
+};
+
+static inline void snapshots_seen_exit(struct snapshots_seen *s)
+{
+       darray_exit(&s->ids);
+}
+
+static inline void snapshots_seen_init(struct snapshots_seen *s)
 {
-       pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
+       memset(s, 0, sizeof(*s));
+}
+
+static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
+                                enum btree_id btree_id, struct bpos pos)
+{
+       struct snapshots_seen_entry *i, n = {
+               .id     = pos.snapshot,
+               .equiv  = bch2_snapshot_equiv(c, pos.snapshot),
+       };
+       int ret;
 
        if (bkey_cmp(s->pos, pos))
                s->ids.nr = 0;
+
+       pos.snapshot = n.equiv;
        s->pos = pos;
 
-       /* Might get called multiple times due to lock restarts */
-       if (s->ids.nr && s->ids.data[s->ids.nr - 1] == pos.snapshot)
-               return 0;
+       darray_for_each(s->ids, i)
+               if (i->equiv == n.equiv) {
+                       if (i->id != n.id) {
+                               bch_err(c, "snapshot deletion did not run correctly:\n"
+                                       "  duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
+                                       bch2_btree_ids[btree_id],
+                                       pos.inode, pos.offset,
+                                       i->id, n.id, n.equiv);
+                               return -EINVAL;
+                       }
 
-       return snapshots_seen_add(c, s, pos.snapshot);
+                       return 0;
+               }
+
+       ret = darray_push(&s->ids, n);
+       if (ret)
+               bch_err(c, "error reallocating snapshots_seen table (size %zu)",
+                       s->ids.size);
+       return ret;
 }
 
 /**
@@ -496,15 +535,15 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
                                    u32 id, u32 ancestor)
 {
        ssize_t i;
+       u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0;
 
        BUG_ON(id > ancestor);
-
-       id              = snapshot_t(c, id)->equiv;
-       ancestor        = snapshot_t(c, ancestor)->equiv;
+       BUG_ON(!bch2_snapshot_is_equiv(c, id));
+       BUG_ON(!bch2_snapshot_is_equiv(c, ancestor));
 
        /* @ancestor should be the snapshot most recently added to @seen */
-       BUG_ON(!seen->ids.nr || seen->ids.data[seen->ids.nr - 1] != ancestor);
-       BUG_ON(seen->pos.snapshot != ancestor);
+       BUG_ON(ancestor != seen->pos.snapshot);
+       BUG_ON(ancestor != top);
 
        if (id == ancestor)
                return true;
@@ -513,10 +552,10 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
                return false;
 
        for (i = seen->ids.nr - 2;
-            i >= 0 && seen->ids.data[i] >= id;
+            i >= 0 && seen->ids.data[i].equiv >= id;
             --i)
-               if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]) &&
-                   bch2_snapshot_is_ancestor(c, seen->ids.data[i], ancestor))
+               if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) &&
+                   bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor))
                        return false;
 
        return true;
@@ -541,8 +580,9 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
                : bch2_snapshot_is_ancestor(c, src, dst);
 }
 
-#define for_each_visible_inode(_c, _s, _w, _snapshot, _i)      \
-       for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && (_i)->snapshot <= (_snapshot); _i++)\
+#define for_each_visible_inode(_c, _s, _w, _snapshot, _i)                              \
+       for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr &&        \
+            (_i)->snapshot <= (_snapshot); _i++)                                       \
                if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
 
 struct inode_walker_entry {
@@ -577,7 +617,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
 
        return darray_push(&w->inodes, ((struct inode_walker_entry) {
                .inode          = u,
-               .snapshot       = snapshot_t(c, inode.k->p.snapshot)->equiv,
+               .snapshot       = bch2_snapshot_equiv(c, inode.k->p.snapshot),
        }));
 }
 
@@ -587,10 +627,10 @@ static int __walk_inode(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
        struct bkey_s_c k;
-       unsigned i, ancestor_pos;
+       unsigned i;
        int ret;
 
-       pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
+       pos.snapshot = bch2_snapshot_equiv(c, pos.snapshot);
 
        if (pos.inode == w->cur_inum) {
                w->first_this_inode = false;
@@ -623,17 +663,20 @@ found:
        BUG_ON(pos.snapshot > w->inodes.data[i].snapshot);
 
        if (pos.snapshot != w->inodes.data[i].snapshot) {
-               ancestor_pos = i;
+               struct inode_walker_entry e = w->inodes.data[i];
+
+               e.snapshot = pos.snapshot;
+               e.count = 0;
+
+               bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
+                        pos.inode, pos.snapshot, w->inodes.data[i].snapshot);
 
                while (i && w->inodes.data[i - 1].snapshot > pos.snapshot)
                        --i;
 
-               ret = darray_insert_item(&w->inodes, i, w->inodes.data[ancestor_pos]);
+               ret = darray_insert_item(&w->inodes, i, e);
                if (ret)
                        return ret;
-
-               w->inodes.data[i].snapshot = pos.snapshot;
-               w->inodes.data[i].count = 0;
        }
 
        return i;
@@ -653,17 +696,19 @@ static int __get_visible_inodes(struct btree_trans *trans,
 
        for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
                           BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+               u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
                if (k.k->p.offset != inum)
                        break;
 
-               if (!bkey_is_inode(k.k))
+               if (!ref_visible(c, s, s->pos.snapshot, equiv))
                        continue;
 
-               if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) {
+               if (bkey_is_inode(k.k))
                        add_inode(c, w, k);
-                       if (k.k->p.snapshot >= s->pos.snapshot)
-                               break;
-               }
+
+               if (equiv >= s->pos.snapshot)
+                       break;
        }
        bch2_trans_iter_exit(trans, &iter);
 
@@ -678,7 +723,7 @@ static int check_key_has_snapshot(struct btree_trans *trans,
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       if (mustfix_fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c,
+       if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
                        "key in missing snapshot: %s",
                        (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
                ret = bch2_btree_delete_at(trans, iter,
@@ -786,6 +831,7 @@ fsck_err:
 static int check_inode(struct btree_trans *trans,
                       struct btree_iter *iter,
                       struct bch_inode_unpacked *prev,
+                      struct snapshots_seen *s,
                       bool full)
 {
        struct bch_fs *c = trans->c;
@@ -808,6 +854,10 @@ static int check_inode(struct btree_trans *trans,
        if (ret)
                return 0;
 
+       ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
+       if (ret)
+               goto err;
+
        /*
         * if snapshot id isn't a leaf node, skip it - deletion in
         * particular is not atomic, so on the internal snapshot nodes
@@ -930,8 +980,10 @@ static int check_inodes(struct bch_fs *c, bool full)
        struct btree_trans trans;
        struct btree_iter iter;
        struct bch_inode_unpacked prev = { 0 };
+       struct snapshots_seen s;
        int ret;
 
+       snapshots_seen_init(&s);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN,
@@ -940,81 +992,22 @@ static int check_inodes(struct bch_fs *c, bool full)
                             BTREE_ITER_ALL_SNAPSHOTS);
 
        do {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW|
                                      BTREE_INSERT_NOFAIL,
-                       check_inode(&trans, &iter, &prev, full));
+                       check_inode(&trans, &iter, &prev, &s, full));
                if (ret)
                        break;
        } while (bch2_btree_iter_advance(&iter));
        bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
+       snapshots_seen_exit(&s);
        if (ret)
                bch_err(c, "error %i from check_inodes()", ret);
        return ret;
 }
 
-static int check_subvol(struct btree_trans *trans,
-                       struct btree_iter *iter)
-{
-       struct bkey_s_c k;
-       struct bkey_s_c_subvolume subvol;
-       int ret;
-
-       k = bch2_btree_iter_peek(iter);
-       if (!k.k)
-               return 0;
-
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-
-       if (k.k->type != KEY_TYPE_subvolume)
-               return 0;
-
-       subvol = bkey_s_c_to_subvolume(k);
-
-       if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
-               ret = bch2_subvolume_delete(trans, iter->pos.offset);
-               if (ret && ret != -EINTR)
-                       bch_err(trans->c, "error deleting subvolume %llu: %i",
-                               iter->pos.offset, ret);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
-noinline_for_stack
-static int check_subvols(struct bch_fs *c)
-{
-       struct btree_trans trans;
-       struct btree_iter iter;
-       int ret;
-
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes,
-                            POS_MIN,
-                            BTREE_ITER_INTENT|
-                            BTREE_ITER_PREFETCH);
-
-       do {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_LAZY_RW|
-                                     BTREE_INSERT_NOFAIL,
-                                     check_subvol(&trans, &iter));
-               if (ret)
-                       break;
-       } while (bch2_btree_iter_advance(&iter));
-       bch2_trans_iter_exit(&trans, &iter);
-
-       bch2_trans_exit(&trans);
-       return ret;
-}
-
 /*
  * Checking for overlapping extents needs to be reimplemented
  */
@@ -1158,6 +1151,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
        struct bkey_s_c k;
        struct inode_walker_entry *i;
        struct printbuf buf = PRINTBUF;
+       struct bpos equiv;
        int ret = 0;
 peek:
        k = bch2_btree_iter_peek(iter);
@@ -1174,7 +1168,10 @@ peek:
                goto out;
        }
 
-       ret = snapshots_seen_update(c, s, k.k->p);
+       equiv = k.k->p;
+       equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
+       ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
        if (ret)
                goto err;
 
@@ -1209,7 +1206,7 @@ peek:
                }
        }
 #endif
-       ret = __walk_inode(trans, inode, k.k->p);
+       ret = __walk_inode(trans, inode, equiv);
        if (ret < 0)
                goto err;
 
@@ -1241,8 +1238,8 @@ peek:
                goto out;
        }
 
-       if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) {
-               for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) {
+       if (!bch2_snapshot_internal_node(c, equiv.snapshot)) {
+               for_each_visible_inode(c, s, inode, equiv.snapshot, i) {
                        if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
                                        k.k->type != KEY_TYPE_reservation &&
                                        k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
@@ -1251,7 +1248,7 @@ peek:
                                bch2_fs_lazy_rw(c);
                                ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
                                                SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9,
-                                                    k.k->p.snapshot),
+                                                    equiv.snapshot),
                                                POS(k.k->p.inode, U64_MAX),
                                                0, NULL) ?: -EINTR;
                                goto out;
@@ -1260,7 +1257,7 @@ peek:
        }
 
        if (bkey_extent_is_allocation(k.k))
-               for_each_visible_inode(c, s, inode, k.k->p.snapshot, i)
+               for_each_visible_inode(c, s, inode, equiv.snapshot, i)
                        i->count += k.k->size;
 #if 0
        bch2_bkey_buf_reassemble(&prev, c, k);
@@ -1306,7 +1303,7 @@ static int check_extents(struct bch_fs *c)
                             BTREE_ITER_ALL_SNAPSHOTS);
 
        do {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW|
                                      BTREE_INSERT_NOFAIL,
                        check_extent(&trans, &iter, &w, &s));
@@ -1495,6 +1492,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
        struct bkey_s_c_dirent d;
        struct inode_walker_entry *i;
        struct printbuf buf = PRINTBUF;
+       struct bpos equiv;
        int ret = 0;
 peek:
        k = bch2_btree_iter_peek(iter);
@@ -1511,7 +1509,10 @@ peek:
                goto out;
        }
 
-       ret = snapshots_seen_update(c, s, k.k->p);
+       equiv = k.k->p;
+       equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
+       ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
        if (ret)
                goto err;
 
@@ -1529,7 +1530,7 @@ peek:
                goto peek;
        }
 
-       ret = __walk_inode(trans, dir, k.k->p);
+       ret = __walk_inode(trans, dir, equiv);
        if (ret < 0)
                goto err;
 
@@ -1629,7 +1630,8 @@ peek:
                        goto err;
 
                if (fsck_err_on(!target->inodes.nr, c,
-                               "dirent points to missing inode:\n%s",
+                               "dirent points to missing inode: (equiv %u)\n%s",
+                               equiv.snapshot,
                                (printbuf_reset(&buf),
                                 bch2_bkey_val_to_text(&buf, c, k),
                                 buf.buf))) {
@@ -1647,7 +1649,7 @@ peek:
        }
 
        if (d.v->d_type == DT_DIR)
-               for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
+               for_each_visible_inode(c, s, dir, equiv.snapshot, i)
                        i->count++;
 
 out:
@@ -1687,7 +1689,7 @@ static int check_dirents(struct bch_fs *c)
                             BTREE_ITER_ALL_SNAPSHOTS);
 
        do {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW|
                                      BTREE_INSERT_NOFAIL,
                        check_dirent(&trans, &iter, &hash_info,
@@ -1774,7 +1776,7 @@ static int check_xattrs(struct bch_fs *c)
                             BTREE_ITER_ALL_SNAPSHOTS);
 
        do {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_LAZY_RW|
                                      BTREE_INSERT_NOFAIL,
                                      check_xattr(&trans, &iter, &hash_info,
@@ -1814,7 +1816,7 @@ static int check_root_trans(struct btree_trans *trans)
                root_subvol.v.flags     = 0;
                root_subvol.v.snapshot  = cpu_to_le32(snapshot);
                root_subvol.v.inode     = cpu_to_le64(inum);
-               ret = __bch2_trans_do(trans, NULL, NULL,
+               ret = commit_do(trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
                        __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i));
@@ -1903,7 +1905,7 @@ static int check_path(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        int ret = 0;
 
-       snapshot = snapshot_t(c, snapshot)->equiv;
+       snapshot = bch2_snapshot_equiv(c, snapshot);
        p->nr = 0;
 
        while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
@@ -1977,7 +1979,7 @@ static int check_path(struct btree_trans *trans,
                        if (!fsck_err(c, "directory structure loop"))
                                return 0;
 
-                       ret = __bch2_trans_do(trans, NULL, NULL,
+                       ret = commit_do(trans, NULL, NULL,
                                              BTREE_INSERT_NOFAIL|
                                              BTREE_INSERT_LAZY_RW,
                                        remove_backpointer(trans, inode));
@@ -2188,7 +2190,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
                           BTREE_ITER_INTENT|
                           BTREE_ITER_PREFETCH|
                           BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               ret = snapshots_seen_update(c, &s, k.k->p);
+               ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
                if (ret)
                        break;
 
@@ -2366,7 +2368,7 @@ static int fix_reflink_p(struct bch_fs *c)
                           BTREE_ITER_PREFETCH|
                           BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
                if (k.k->type == KEY_TYPE_reflink_p) {
-                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                       ret = commit_do(&trans, NULL, NULL,
                                              BTREE_INSERT_NOFAIL|
                                              BTREE_INSERT_LAZY_RW,
                                              fix_reflink_p_key(&trans, &iter));
@@ -2386,9 +2388,10 @@ static int fix_reflink_p(struct bch_fs *c)
  */
 int bch2_fsck_full(struct bch_fs *c)
 {
-       return  bch2_fs_snapshots_check(c) ?:
+       return  bch2_fs_check_snapshots(c) ?:
+               bch2_fs_check_subvols(c) ?:
+               bch2_delete_dead_snapshots(c) ?:
                check_inodes(c, true) ?:
-               check_subvols(c) ?:
                check_extents(c) ?:
                check_dirents(c) ?:
                check_xattrs(c) ?:
@@ -2400,5 +2403,8 @@ int bch2_fsck_full(struct bch_fs *c)
 
 int bch2_fsck_walk_inodes_only(struct bch_fs *c)
 {
-       return check_inodes(c, false);
+       return  bch2_fs_check_snapshots(c) ?:
+               bch2_fs_check_subvols(c) ?:
+               bch2_delete_dead_snapshots(c) ?:
+               check_inodes(c, false);
 }
index 5a09b55006ff3ec1932dddb54dfa469faaa3a6d6..94ecb3a397602d90487887d0706a244556788ece 100644 (file)
@@ -204,7 +204,7 @@ int bch2_check_lrus(struct bch_fs *c)
 
        for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
-               ret = __bch2_trans_do(&trans, NULL, NULL,
+               ret = commit_do(&trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
                        bch2_check_lru_key(&trans, &iter));
index eea025a83b435c537ba622b2c80772fb715ae0ac..64b1e79f3182bb01f3757043140fad00a10433ee 100644 (file)
@@ -1428,6 +1428,12 @@ out:
                bch2_journal_entries_free(c);
        }
        kfree(clean);
+
+       if (!ret && test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) {
+               bch2_fs_read_write_early(c);
+               bch2_delete_dead_snapshots_async(c);
+       }
+
        if (ret)
                bch_err(c, "Error in recovery: %s (%i)", err, ret);
        else
index 60b60de83f3e5cf3188e7309a5c2cde519541e5f..463b5afd3fc74cbca1dd329d06d02df17b640e03 100644 (file)
@@ -9,15 +9,12 @@
 
 /* Snapshot tree: */
 
-static void bch2_delete_dead_snapshots_work(struct work_struct *);
-static void bch2_delete_dead_snapshots(struct bch_fs *);
-
 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
                           struct bkey_s_c k)
 {
        struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
 
-       prt_printf(out, "is_subvol %llu deleted %llu parent %u children %u %u subvol %u",
+       prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u",
               BCH_SNAPSHOT_SUBVOL(s.v),
               BCH_SNAPSHOT_DELETED(s.v),
               le32_to_cpu(s.v->parent),
@@ -143,87 +140,96 @@ static int snapshot_live(struct btree_trans *trans, u32 id)
        return !BCH_SNAPSHOT_DELETED(&v);
 }
 
-static int bch2_snapshots_set_equiv(struct btree_trans *trans)
+static int bch2_snapshot_set_equiv(struct btree_trans *trans,
+                                  struct bkey_s_c_snapshot snap)
 {
        struct bch_fs *c = trans->c;
+       unsigned i, nr_live = 0, live_idx = 0;
+       u32 id = snap.k->p.offset, child[2] = {
+               [0] = le32_to_cpu(snap.v->children[0]),
+               [1] = le32_to_cpu(snap.v->children[1])
+       };
+
+       for (i = 0; i < 2; i++) {
+               int ret = snapshot_live(trans, child[i]);
+               if (ret < 0)
+                       return ret;
+
+               if (ret)
+                       live_idx = i;
+               nr_live += ret;
+       }
+
+       snapshot_t(c, id)->equiv = nr_live == 1
+               ? snapshot_t(c, child[live_idx])->equiv
+               : id;
+       return 0;
+}
+
+static int bch2_snapshots_set_equiv(struct btree_trans *trans)
+{
        struct btree_iter iter;
        struct bkey_s_c k;
-       struct bkey_s_c_snapshot snap;
-       unsigned i;
        int ret;
 
        for_each_btree_key(trans, iter, BTREE_ID_snapshots,
                           POS_MIN, 0, k, ret) {
-               u32 id = k.k->p.offset, child[2];
-               unsigned nr_live = 0, live_idx = 0;
-
                if (k.k->type != KEY_TYPE_snapshot)
                        continue;
 
-               snap = bkey_s_c_to_snapshot(k);
-               child[0] = le32_to_cpu(snap.v->children[0]);
-               child[1] = le32_to_cpu(snap.v->children[1]);
-
-               for (i = 0; i < 2; i++) {
-                       ret = snapshot_live(trans, child[i]);
-                       if (ret < 0)
-                               goto err;
-
-                       if (ret)
-                               live_idx = i;
-                       nr_live += ret;
-               }
-
-               snapshot_t(c, id)->equiv = nr_live == 1
-                       ? snapshot_t(c, child[live_idx])->equiv
-                       : id;
+               ret = bch2_snapshot_set_equiv(trans, bkey_s_c_to_snapshot(k));
+               if (ret)
+                       break;
        }
-err:
        bch2_trans_iter_exit(trans, &iter);
 
        if (ret)
-               bch_err(c, "error walking snapshots: %i", ret);
+               bch_err(trans->c, "error in bch2_snapshots_set_equiv: %i", ret);
 
        return ret;
 }
 
 /* fsck: */
-static int bch2_snapshot_check(struct btree_trans *trans,
-                              struct bkey_s_c_snapshot s)
+static int check_snapshot(struct btree_trans *trans,
+                         struct btree_iter *iter)
 {
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c_snapshot s;
        struct bch_subvolume subvol;
        struct bch_snapshot v;
+       struct bkey_s_c k;
+       struct printbuf buf = PRINTBUF;
+       bool should_have_subvol;
        u32 i, id;
-       int ret;
+       int ret = 0;
 
-       id = le32_to_cpu(s.v->subvol);
-       ret = lockrestart_do(trans, bch2_subvolume_get(trans, id, 0, false, &subvol));
-       if (ret == -ENOENT)
-               bch_err(trans->c, "snapshot node %llu has nonexistent subvolume %u",
-                       s.k->p.offset, id);
+       k = bch2_btree_iter_peek(iter);
+       if (!k.k)
+               return 0;
+
+       ret = bkey_err(k);
        if (ret)
                return ret;
 
-       if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
-               bch_err(trans->c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
-                       s.k->p.offset);
-               return -EINVAL;
-       }
+       if (k.k->type != KEY_TYPE_snapshot)
+               return 0;
 
+       s = bkey_s_c_to_snapshot(k);
        id = le32_to_cpu(s.v->parent);
        if (id) {
                ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
                if (ret == -ENOENT)
-                       bch_err(trans->c, "snapshot node %llu has nonexistent parent %u",
-                               s.k->p.offset, id);
+                       bch_err(c, "snapshot with nonexistent parent:\n  %s",
+                               (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
                if (ret)
-                       return ret;
+                       goto err;
 
                if (le32_to_cpu(v.children[0]) != s.k->p.offset &&
                    le32_to_cpu(v.children[1]) != s.k->p.offset) {
-                       bch_err(trans->c, "snapshot parent %u missing pointer to child %llu",
+                       bch_err(c, "snapshot parent %u missing pointer to child %llu",
                                id, s.k->p.offset);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err;
                }
        }
 
@@ -232,68 +238,155 @@ static int bch2_snapshot_check(struct btree_trans *trans,
 
                ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
                if (ret == -ENOENT)
-                       bch_err(trans->c, "snapshot node %llu has nonexistent child %u",
+                       bch_err(c, "snapshot node %llu has nonexistent child %u",
                                s.k->p.offset, id);
                if (ret)
-                       return ret;
+                       goto err;
 
                if (le32_to_cpu(v.parent) != s.k->p.offset) {
-                       bch_err(trans->c, "snapshot child %u has wrong parent (got %u should be %llu)",
+                       bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
                                id, le32_to_cpu(v.parent), s.k->p.offset);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err;
                }
        }
 
-       return 0;
+       should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) &&
+               !BCH_SNAPSHOT_DELETED(s.v);
+
+       if (should_have_subvol) {
+               id = le32_to_cpu(s.v->subvol);
+               ret = lockrestart_do(trans, bch2_subvolume_get(trans, id, 0, false, &subvol));
+               if (ret == -ENOENT)
+                       bch_err(c, "snapshot points to nonexistent subvolume:\n  %s",
+                               (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
+               if (ret)
+                       goto err;
+
+               if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
+                       bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
+                               s.k->p.offset);
+                       ret = -EINVAL;
+                       goto err;
+               }
+       } else {
+               if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n  %s",
+                               (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
+                       struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u));
+
+                       ret = PTR_ERR_OR_ZERO(u);
+                       if (ret)
+                               goto err;
+
+                       bkey_reassemble(&u->k_i, s.s_c);
+                       u->v.subvol = 0;
+                       ret = bch2_trans_update(trans, iter, &u->k_i, 0);
+                       if (ret)
+                               goto err;
+               }
+       }
+
+       if (BCH_SNAPSHOT_DELETED(s.v))
+               set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
+err:
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
 }
 
-int bch2_fs_snapshots_check(struct bch_fs *c)
+int bch2_fs_check_snapshots(struct bch_fs *c)
 {
        struct btree_trans trans;
        struct btree_iter iter;
-       struct bkey_s_c k;
-       struct bch_snapshot s;
-       unsigned id;
        int ret;
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
-                          POS_MIN, 0, k, ret) {
-               if (k.k->type != KEY_TYPE_snapshot)
-                       continue;
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_snapshots,
+                            POS_MIN, BTREE_ITER_PREFETCH);
 
-               ret = bch2_snapshot_check(&trans, bkey_s_c_to_snapshot(k));
+       do {
+               ret = commit_do(&trans, NULL, NULL,
+                               BTREE_INSERT_LAZY_RW|
+                               BTREE_INSERT_NOFAIL,
+                               check_snapshot(&trans, &iter));
                if (ret)
                        break;
-       }
+       } while (bch2_btree_iter_advance(&iter));
        bch2_trans_iter_exit(&trans, &iter);
 
-       if (ret) {
+       if (ret)
                bch_err(c, "error %i checking snapshots", ret);
-               goto err;
+
+       bch2_trans_exit(&trans);
+       return ret;
+}
+
+static int check_subvol(struct btree_trans *trans,
+                       struct btree_iter *iter)
+{
+       struct bkey_s_c k;
+       struct bkey_s_c_subvolume subvol;
+       struct bch_snapshot snapshot;
+       unsigned snapid;
+       int ret;
+
+       k = bch2_btree_iter_peek(iter);
+       if (!k.k)
+               return 0;
+
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_subvolume)
+               return 0;
+
+       subvol = bkey_s_c_to_subvolume(k);
+       snapid = le32_to_cpu(subvol.v->snapshot);
+       ret = snapshot_lookup(trans, snapid, &snapshot);
+
+       if (ret == -ENOENT)
+               bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u",
+                       k.k->p.offset, snapid);
+       if (ret)
+               return ret;
+
+       if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
+               ret = bch2_subvolume_delete(trans, iter->pos.offset);
+               if (ret && ret != -EINTR)
+                       bch_err(trans->c, "error deleting subvolume %llu: %i",
+                               iter->pos.offset, ret);
+               if (ret)
+                       return ret;
        }
 
-       for_each_btree_key(&trans, iter, BTREE_ID_subvolumes,
-                          POS_MIN, 0, k, ret) {
-               if (k.k->type != KEY_TYPE_subvolume)
-                       continue;
-again_2:
-               id = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot);
-               ret = snapshot_lookup(&trans, id, &s);
-
-               if (ret == -EINTR) {
-                       k = bch2_btree_iter_peek(&iter);
-                       goto again_2;
-               } else if (ret == -ENOENT)
-                       bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
-                               k.k->p.offset, id);
-               else if (ret)
+       return 0;
+}
+
+int bch2_fs_check_subvols(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes,
+                            POS_MIN, BTREE_ITER_PREFETCH);
+
+       do {
+               ret = commit_do(&trans, NULL, NULL,
+                                     BTREE_INSERT_LAZY_RW|
+                                     BTREE_INSERT_NOFAIL,
+                                     check_subvol(&trans, &iter));
+               if (ret)
                        break;
-       }
+       } while (bch2_btree_iter_advance(&iter));
        bch2_trans_iter_exit(&trans, &iter);
-err:
+
        bch2_trans_exit(&trans);
+
        return ret;
 }
 
@@ -307,7 +400,6 @@ int bch2_fs_snapshots_start(struct bch_fs *c)
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       bool have_deleted = false;
        int ret = 0;
 
        bch2_trans_init(&trans, c, 0, 0);
@@ -317,39 +409,20 @@ int bch2_fs_snapshots_start(struct bch_fs *c)
               if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0)
                       break;
 
-               if (k.k->type != KEY_TYPE_snapshot) {
-                       bch_err(c, "found wrong key type %u in snapshot node table",
-                               k.k->type);
+               if (k.k->type != KEY_TYPE_snapshot)
                        continue;
-               }
-
-               if (BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v))
-                       have_deleted = true;
 
-               ret = bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0);
+               ret =   bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0) ?:
+                       bch2_snapshot_set_equiv(&trans, bkey_s_c_to_snapshot(k));
                if (ret)
                        break;
        }
        bch2_trans_iter_exit(&trans, &iter);
 
-       if (ret)
-               goto err;
-
-       ret = bch2_snapshots_set_equiv(&trans);
-       if (ret)
-               goto err;
-err:
        bch2_trans_exit(&trans);
 
-       if (!ret && have_deleted) {
-               bch_info(c, "restarting deletion of dead snapshots");
-               if (c->opts.fsck) {
-                       bch2_delete_dead_snapshots_work(&c->snapshot_delete_work);
-               } else {
-                       bch2_delete_dead_snapshots(c);
-               }
-       }
-
+       if (ret)
+               bch_err(c, "error starting snapshots: %i", ret);
        return ret;
 }
 
@@ -386,8 +459,10 @@ static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
                goto err;
 
        bkey_reassemble(&s->k_i, k);
-
        SET_BCH_SNAPSHOT_DELETED(&s->v, true);
+       SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
+       s->v.subvol = 0;
+
        ret = bch2_trans_update(trans, &iter, &s->k_i, 0);
        if (ret)
                goto err;
@@ -551,6 +626,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
 
                n->v.children[0] = cpu_to_le32(new_snapids[0]);
                n->v.children[1] = cpu_to_le32(new_snapids[1]);
+               n->v.subvol = 0;
                SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
                ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
                if (ret)
@@ -561,13 +637,6 @@ err:
        return ret;
 }
 
-static int snapshot_id_add(snapshot_id_list *s, u32 id)
-{
-       BUG_ON(snapshot_list_has_id(s, id));
-
-       return darray_push(s, id);
-}
-
 static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
                                           snapshot_id_list *deleted,
                                           enum btree_id btree_id)
@@ -601,11 +670,7 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
 
                if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
                    snapshot_list_has_id(&equiv_seen, equiv)) {
-                       if (btree_id == BTREE_ID_inodes &&
-                           bch2_btree_key_cache_flush(trans, btree_id, iter.pos))
-                               continue;
-
-                       ret = __bch2_trans_do(trans, NULL, NULL,
+                       ret = commit_do(trans, NULL, NULL,
                                              BTREE_INSERT_NOFAIL,
                                bch2_btree_iter_traverse(&iter) ?:
                                bch2_btree_delete_at(trans, &iter,
@@ -613,7 +678,7 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
                        if (ret)
                                break;
                } else {
-                       ret = snapshot_id_add(&equiv_seen, equiv);
+                       ret = snapshot_list_add(c, &equiv_seen, equiv);
                        if (ret)
                                break;
                }
@@ -627,9 +692,8 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
        return ret;
 }
 
-static void bch2_delete_dead_snapshots_work(struct work_struct *work)
+int bch2_delete_dead_snapshots(struct bch_fs *c)
 {
-       struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -638,6 +702,17 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
        u32 i, id, children[2];
        int ret = 0;
 
+       if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
+               return 0;
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags)) {
+               ret = bch2_fs_read_write_early(c);
+               if (ret) {
+                       bch_err(c, "error deleleting dead snapshots: error going rw: %i", ret);
+                       return ret;
+               }
+       }
+
        bch2_trans_init(&trans, c, 0, 0);
 
        /*
@@ -664,7 +739,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
                if (ret)
                        continue;
 
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        bch2_snapshot_node_set_deleted(&trans, iter.pos.offset));
                if (ret) {
                        bch_err(c, "error deleting snapshot %llu: %i", iter.pos.offset, ret);
@@ -689,7 +764,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
 
                snap = bkey_s_c_to_snapshot(k);
                if (BCH_SNAPSHOT_DELETED(snap.v)) {
-                       ret = snapshot_id_add(&deleted, k.k->p.offset);
+                       ret = snapshot_list_add(c, &deleted, k.k->p.offset);
                        if (ret)
                                break;
                }
@@ -713,7 +788,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
        }
 
        for (i = 0; i < deleted.nr; i++) {
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        bch2_snapshot_node_delete(&trans, deleted.data[i]));
                if (ret) {
                        bch_err(c, "error deleting snapshot %u: %i",
@@ -721,15 +796,25 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
                        goto err;
                }
        }
+
+       clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
 err:
        darray_exit(&deleted);
        bch2_trans_exit(&trans);
+       return ret;
+}
+
+static void bch2_delete_dead_snapshots_work(struct work_struct *work)
+{
+       struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
+
+       bch2_delete_dead_snapshots(c);
        percpu_ref_put(&c->writes);
 }
 
-static void bch2_delete_dead_snapshots(struct bch_fs *c)
+void bch2_delete_dead_snapshots_async(struct bch_fs *c)
 {
-       if (unlikely(!percpu_ref_tryget_live(&c->writes)))
+       if (!percpu_ref_tryget_live(&c->writes))
                return;
 
        if (!queue_work(system_long_wq, &c->snapshot_delete_work))
@@ -739,7 +824,14 @@ static void bch2_delete_dead_snapshots(struct bch_fs *c)
 static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
                                           struct btree_trans_commit_hook *h)
 {
-       bch2_delete_dead_snapshots(trans->c);
+       struct bch_fs *c = trans->c;
+
+       set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
+
+       if (!test_bit(BCH_FS_FSCK_DONE, &c->flags))
+               return 0;
+
+       bch2_delete_dead_snapshots_async(c);
        return 0;
 }
 
@@ -830,7 +922,6 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
        struct bkey_s_c k;
        struct bkey_s_c_subvolume subvol;
        struct btree_trans_commit_hook *h;
-       struct bkey_i *delete;
        u32 snapid;
        int ret = 0;
 
@@ -852,14 +943,7 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
        subvol = bkey_s_c_to_subvolume(k);
        snapid = le32_to_cpu(subvol.v->snapshot);
 
-       delete = bch2_trans_kmalloc(trans, sizeof(*delete));
-       ret = PTR_ERR_OR_ZERO(delete);
-       if (ret)
-               goto err;
-
-       bkey_init(&delete->k);
-       delete->k.p = iter.pos;
-       ret = bch2_trans_update(trans, &iter, delete, 0);
+       ret = bch2_btree_delete_at(trans, &iter, 0);
        if (ret)
                goto err;
 
@@ -925,7 +1009,7 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
 
        mutex_lock(&c->snapshots_unlinked_lock);
        if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
-               ret = snapshot_id_add(&c->snapshots_unlinked, h->subvol);
+               ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
        mutex_unlock(&c->snapshots_unlinked_lock);
 
        if (ret)
index b1739d29c7d40af0a6082e42c4c5e0ea1cb567fb..02a636644988a4ba51327c071b20906821cc8f93 100644 (file)
@@ -27,6 +27,16 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
        return snapshot_t(c, id)->parent;
 }
 
+static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
+{
+       return snapshot_t(c, id)->equiv;
+}
+
+static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
+{
+       return id == snapshot_t(c, id)->equiv;
+}
+
 static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id)
 {
        struct snapshot_t *s = snapshot_t(c, id);
@@ -58,42 +68,40 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances
        return id == ancestor;
 }
 
-struct snapshots_seen {
-       struct bpos                     pos;
-       DARRAY(u32)                     ids;
-};
-
-static inline void snapshots_seen_exit(struct snapshots_seen *s)
-{
-       kfree(s->ids.data);
-       s->ids.data = NULL;
-}
-
-static inline void snapshots_seen_init(struct snapshots_seen *s)
+static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
 {
-       memset(s, 0, sizeof(*s));
-}
+       u32 *i;
 
-static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
-{
-       int ret = darray_push(&s->ids, id);
-       if (ret)
-               bch_err(c, "error reallocating snapshots_seen table (size %zu)",
-                       s->ids.size);
-       return ret;
+       darray_for_each(*s, i)
+               if (*i == id)
+                       return true;
+       return false;
 }
 
-static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
+static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id)
 {
        u32 *i;
 
        darray_for_each(*s, i)
-               if (*i == id)
+               if (bch2_snapshot_is_ancestor(c, id, *i))
                        return true;
        return false;
 }
 
-int bch2_fs_snapshots_check(struct bch_fs *);
+static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+       int ret;
+
+       BUG_ON(snapshot_list_has_id(s, id));
+       ret = darray_push(s, id);
+       if (ret)
+               bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
+       return ret;
+}
+
+int bch2_fs_check_snapshots(struct bch_fs *);
+int bch2_fs_check_subvols(struct bch_fs *);
+
 void bch2_fs_snapshots_exit(struct bch_fs *);
 int bch2_fs_snapshots_start(struct bch_fs *);
 
@@ -116,6 +124,9 @@ int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
 int bch2_snapshot_node_create(struct btree_trans *, u32,
                              u32 *, u32 *, unsigned);
 
+int bch2_delete_dead_snapshots(struct bch_fs *);
+void bch2_delete_dead_snapshots_async(struct bch_fs *);
+
 int bch2_subvolume_delete(struct btree_trans *, u32);
 int bch2_subvolume_unlink(struct btree_trans *, u32);
 int bch2_subvolume_create(struct btree_trans *, u64, u32,
index 1954891ce7ee18f6d36a10f9576ea221b959bd4b..57245caa255f0682f741239ee6ecebb17c885017 100644 (file)
@@ -42,7 +42,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p,
                             BTREE_ITER_INTENT);
 
-       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                bch2_btree_iter_traverse(&iter) ?:
                bch2_trans_update(&trans, &iter, &k.k_i, 0));
        if (ret) {
@@ -51,7 +51,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
        }
 
        pr_info("deleting once");
-       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                bch2_btree_iter_traverse(&iter) ?:
                bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
@@ -60,7 +60,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
        }
 
        pr_info("deleting twice");
-       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                bch2_btree_iter_traverse(&iter) ?:
                bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
@@ -88,7 +88,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p,
                             BTREE_ITER_INTENT);
 
-       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                bch2_btree_iter_traverse(&iter) ?:
                bch2_trans_update(&trans, &iter, &k.k_i, 0));
        if (ret) {
@@ -99,7 +99,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
        bch2_trans_unlock(&trans);
        bch2_journal_flush_all_pins(&c->journal);
 
-       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                bch2_btree_iter_traverse(&iter) ?:
                bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
@@ -552,7 +552,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
                k.k.p.offset = test_rand();
                k.k.p.snapshot = U32_MAX;
 
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));
                if (ret) {
                        bch_err(c, "error in rand_insert: %i", ret);
@@ -581,7 +581,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr)
                        k[j].k.p.snapshot = U32_MAX;
                }
 
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[0].k_i) ?:
                        __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[1].k_i) ?:
                        __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[2].k_i) ?:
@@ -668,7 +668,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr)
 
        for (i = 0; i < nr; i++) {
                rand = test_rand();
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        rand_mixed_trans(&trans, &iter, &cookie, i, rand));
                if (ret) {
                        bch_err(c, "update error in rand_mixed: %i", ret);
@@ -714,7 +714,7 @@ static int rand_delete(struct bch_fs *c, u64 nr)
        for (i = 0; i < nr; i++) {
                struct bpos pos = SPOS(0, test_rand(), U32_MAX);
 
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        __do_delete(&trans, pos));
                if (ret) {
                        bch_err(c, "error in rand_delete: %i", ret);
@@ -743,7 +743,7 @@ static int seq_insert(struct bch_fs *c, u64 nr)
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
                insert.k.p = iter.pos;
 
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        bch2_btree_iter_traverse(&iter) ?:
                        bch2_trans_update(&trans, &iter, &insert.k_i, 0));
                if (ret) {
@@ -794,7 +794,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr)
 
                bkey_reassemble(&u.k_i, k);
 
-               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               ret = commit_do(&trans, NULL, NULL, 0,
                        bch2_btree_iter_traverse(&iter) ?:
                        bch2_trans_update(&trans, &iter, &u.k_i, 0));
                if (ret) {