From 0e69c66198aa76c59130747c7f10f5d72e5e2afd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Jul 2022 17:24:21 -0400 Subject: [PATCH] Update bcachefs sources to 17a344f265 bcachefs: Improve fsck for subvols/snapshots --- .bcachefs_revision | 2 +- libbcachefs/alloc_background.c | 14 +- libbcachefs/backpointers.c | 8 +- libbcachefs/bcachefs.h | 2 + libbcachefs/btree_gc.c | 10 +- libbcachefs/btree_update.h | 5 +- libbcachefs/btree_update_interior.c | 2 +- libbcachefs/buckets.c | 2 +- libbcachefs/data_update.c | 15 +- libbcachefs/fs.c | 6 +- libbcachefs/fsck.c | 248 +++++++++---------- libbcachefs/lru.c | 2 +- libbcachefs/recovery.c | 6 + libbcachefs/subvolume.c | 360 +++++++++++++++++----------- libbcachefs/subvolume.h | 59 +++-- libbcachefs/tests.c | 22 +- 16 files changed, 434 insertions(+), 329 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 60db557..55f6e57 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -a03225a783e75d704987ad77fef57891d7a2e115 +17a344f26599e37e7023a6daff813e4d1a96cdd2 diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 4b38fbd..fc77747 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -866,7 +866,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_PREFETCH); while (1) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_alloc_key(&trans, &iter, @@ -887,7 +887,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_PREFETCH); while (1) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_discard_freespace_key(&trans, &iter)); @@ -904,7 +904,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_PREFETCH); while (1) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_discard_freespace_key(&trans, &iter)); @@ -1009,7 +1009,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_alloc_to_lru_ref(&trans, &iter)); @@ -1139,7 +1139,7 @@ static void bch2_do_discards_work(struct work_struct *work) continue; } - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOFAIL, bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done)); @@ -1277,7 +1277,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); while (nr_to_invalidate-- >= 0) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOFAIL, invalidate_one_bucket(&trans, ca, &bucket, @@ -1333,7 +1333,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) if (iter.pos.offset >= ca->mi.nbuckets) break; - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW, bucket_freespace_init(&trans, &iter)); if (ret) diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 6c74b3a..08d6795 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -627,7 +627,7 @@ int bch2_check_btree_backpointers(struct bch_fs *c) bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, bch2_check_btree_backpointer(&trans, &iter)); @@ -805,7 +805,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) BTREE_ITER_PREFETCH); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_extent_to_backpointers(&trans, &iter)); @@ -818,7 +818,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) if (ret) break; - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_btree_root_to_backpointers(&trans, btree_id)); @@ -876,7 +876,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) BTREE_ITER_PREFETCH, k, ret) { u64 bp_offset = 0; - while (!(ret = __bch2_trans_do(&trans, NULL, NULL, + while (!(ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_one_backpointer(&trans, iter.pos, &bp_offset))) && diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 8b4d0eb..31e387b 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -515,6 +515,8 @@ enum { BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ BCH_FS_NEED_ANOTHER_GC, + BCH_FS_HAVE_DELETED_SNAPSHOTS, + /* errors: */ BCH_FS_ERROR, BCH_FS_TOPOLOGY_ERROR, diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 9556b05..e260689 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -799,7 +799,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, atomic64_set(&c->key_version, k->k->version.lo); } - ret = __bch2_trans_do(trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, 0, bch2_mark_key(trans, old, *k, flags)); fsck_err: err: @@ -1438,7 +1438,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0) break; - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW, bch2_alloc_write_key(&trans, &iter, metadata_only)); @@ -1592,7 +1592,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) else *bkey_refcount(new) = cpu_to_le64(r->refcount); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_reflink, new)); kfree(new); @@ -1705,7 +1705,7 @@ inconsistent: for (i = 0; i < new->v.nr_blocks; i++) stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i)); kfree(new); } @@ -2012,7 +2012,7 @@ int bch2_gc_gens(struct bch_fs *c) for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_alloc_write_oldest_gen(&trans, &iter)); if (ret) { diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 28f9585..e9127db 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -118,7 +118,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, _ret; \ }) -#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do) \ +#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_flags))) @@ -128,8 +128,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, int _ret; \ \ bch2_trans_init(&trans, (_c), 0, 0); \ - _ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags, \ - _do); \ + _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \ bch2_trans_exit(&trans); \ \ _ret; \ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 965fdfb..c3ef238 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -610,7 +610,7 @@ static void btree_update_nodes_written(struct btree_update *as) * which may require allocations as well. */ bch2_trans_init(&trans, c, 0, 512); - ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq, + ret = commit_do(&trans, &as->disk_res, &journal_seq, BTREE_INSERT_NOFAIL| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_JOURNAL_RECLAIM| diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 1ea7e2b..fe2cd73 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1861,7 +1861,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - return __bch2_trans_do(trans, NULL, NULL, 0, + return commit_do(trans, NULL, NULL, 0, __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); } diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index cc9ae6d..c181dba 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -23,13 +23,13 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter, update_iter; struct bkey_s_c k; - struct snapshots_seen s; + snapshot_id_list s; int ret; if (!btree_type_has_snapshots(id)) return 0; - snapshots_seen_init(&s); + darray_init(&s); if (!bkey_cmp(old_pos, new_pos)) return 0; @@ -41,7 +41,6 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans, BTREE_ITER_NOT_EXTENTS| BTREE_ITER_ALL_SNAPSHOTS); while (1) { -next: k = bch2_btree_iter_prev(&iter); ret = bkey_err(k); if (ret) @@ -52,11 +51,9 @@ next: if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) { struct bkey_i *update; - u32 *i; - darray_for_each(s.ids, i) - if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, *i)) - goto next; + if (snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) + continue; update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); @@ -79,13 +76,13 @@ next: if (ret) break; - ret = snapshots_seen_add(c, &s, k.k->p.snapshot); + ret = snapshot_list_add(c, &s, k.k->p.snapshot); if (ret) break; } } bch2_trans_iter_exit(trans, &iter); - darray_exit(&s.ids); + darray_exit(&s); return ret; } diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index bb94ba5..2354c98 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -443,7 +443,7 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_link_trans(&trans, inode_inum(dir), &dir_u, inode_inum(inode), &inode_u, @@ -492,7 +492,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, inode_inum(dir), &dir_u, @@ -614,7 +614,7 @@ static int bch2_rename2(struct user_namespace *mnt_userns, goto err; } - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_rename_trans(&trans, inode_inum(src_dir), &src_dir_u, inode_inum(dst_dir), &dst_dir_u, diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index f1abec9..c558895 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -220,7 +220,7 @@ static int write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 snapshot) { - int ret = __bch2_trans_do(trans, NULL, NULL, + int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, __write_inode(trans, inode, snapshot)); @@ -231,6 +231,7 @@ static int write_inode(struct btree_trans *trans, static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot) { + struct bch_fs *c = trans->c; struct btree_iter iter = { NULL }; struct bkey_i_inode_generation delete; struct bch_inode_unpacked inode_u; @@ -263,7 +264,7 @@ retry: goto err; if (!bkey_is_inode(k.k)) { - bch2_fs_inconsistent(trans->c, + bch2_fs_inconsistent(c, "inode %llu:%u not found when deleting", inum, snapshot); ret = -EIO; @@ -273,11 +274,8 @@ retry: bch2_inode_unpack(k, &inode_u); /* Subvolume root? */ - if (inode_u.bi_subvol) { - ret = bch2_subvolume_delete(trans, inode_u.bi_subvol); - if (ret) - goto err; - } + if (inode_u.bi_subvol) + bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum); bkey_inode_generation_init(&delete.k_i); delete.k.p = iter.pos; @@ -434,7 +432,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 inode_snapshot) { - int ret = __bch2_trans_do(trans, NULL, NULL, + int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, __reattach_inode(trans, inode, inode_snapshot)); @@ -471,19 +469,60 @@ out: return ret; } -static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos) +struct snapshots_seen_entry { + u32 id; + u32 equiv; +}; + +struct snapshots_seen { + struct bpos pos; + DARRAY(struct snapshots_seen_entry) ids; +}; + +static inline void snapshots_seen_exit(struct snapshots_seen *s) +{ + darray_exit(&s->ids); +} + +static inline void snapshots_seen_init(struct snapshots_seen *s) { - pos.snapshot = snapshot_t(c, pos.snapshot)->equiv; + memset(s, 0, sizeof(*s)); +} + +static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, + enum btree_id btree_id, struct bpos pos) +{ + struct snapshots_seen_entry *i, n = { + .id = pos.snapshot, + .equiv = bch2_snapshot_equiv(c, pos.snapshot), + }; + int ret; if (bkey_cmp(s->pos, pos)) s->ids.nr = 0; + + pos.snapshot = n.equiv; s->pos = pos; - /* Might get called multiple times due to lock restarts */ - if (s->ids.nr && s->ids.data[s->ids.nr - 1] == pos.snapshot) - return 0; + darray_for_each(s->ids, i) + if (i->equiv == n.equiv) { + if (i->id != n.id) { + bch_err(c, "snapshot deletion did not run correctly:\n" + " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", + bch2_btree_ids[btree_id], + pos.inode, pos.offset, + i->id, n.id, n.equiv); + return -EINVAL; + } - return snapshots_seen_add(c, s, pos.snapshot); + return 0; + } + + ret = darray_push(&s->ids, n); + if (ret) + bch_err(c, "error reallocating snapshots_seen table (size %zu)", + s->ids.size); + return ret; } /** @@ -496,15 +535,15 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see u32 id, u32 ancestor) { ssize_t i; + u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0; BUG_ON(id > ancestor); - - id = snapshot_t(c, id)->equiv; - ancestor = snapshot_t(c, ancestor)->equiv; + BUG_ON(!bch2_snapshot_is_equiv(c, id)); + BUG_ON(!bch2_snapshot_is_equiv(c, ancestor)); /* @ancestor should be the snapshot most recently added to @seen */ - BUG_ON(!seen->ids.nr || seen->ids.data[seen->ids.nr - 1] != ancestor); - BUG_ON(seen->pos.snapshot != ancestor); + BUG_ON(ancestor != seen->pos.snapshot); + BUG_ON(ancestor != top); if (id == ancestor) return true; @@ -513,10 +552,10 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see return false; for (i = seen->ids.nr - 2; - i >= 0 && seen->ids.data[i] >= id; + i >= 0 && seen->ids.data[i].equiv >= id; --i) - if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]) && - bch2_snapshot_is_ancestor(c, seen->ids.data[i], ancestor)) + if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) && + bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor)) return false; return true; @@ -541,8 +580,9 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s, : bch2_snapshot_is_ancestor(c, src, dst); } -#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ - for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && (_i)->snapshot <= (_snapshot); _i++)\ +#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ + for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ + (_i)->snapshot <= (_snapshot); _i++) \ if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) struct inode_walker_entry { @@ -577,7 +617,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, return darray_push(&w->inodes, ((struct inode_walker_entry) { .inode = u, - .snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv, + .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot), })); } @@ -587,10 +627,10 @@ static int __walk_inode(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - unsigned i, ancestor_pos; + unsigned i; int ret; - pos.snapshot = snapshot_t(c, pos.snapshot)->equiv; + pos.snapshot = bch2_snapshot_equiv(c, pos.snapshot); if (pos.inode == w->cur_inum) { w->first_this_inode = false; @@ -623,17 +663,20 @@ found: BUG_ON(pos.snapshot > w->inodes.data[i].snapshot); if (pos.snapshot != w->inodes.data[i].snapshot) { - ancestor_pos = i; + struct inode_walker_entry e = w->inodes.data[i]; + + e.snapshot = pos.snapshot; + e.count = 0; + + bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u", + pos.inode, pos.snapshot, w->inodes.data[i].snapshot); while (i && w->inodes.data[i - 1].snapshot > pos.snapshot) --i; - ret = darray_insert_item(&w->inodes, i, w->inodes.data[ancestor_pos]); + ret = darray_insert_item(&w->inodes, i, e); if (ret) return ret; - - w->inodes.data[i].snapshot = pos.snapshot; - w->inodes.data[i].count = 0; } return i; @@ -653,17 +696,19 @@ static int __get_visible_inodes(struct btree_trans *trans, for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); + if (k.k->p.offset != inum) break; - if (!bkey_is_inode(k.k)) + if (!ref_visible(c, s, s->pos.snapshot, equiv)) continue; - if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) { + if (bkey_is_inode(k.k)) add_inode(c, w, k); - if (k.k->p.snapshot >= s->pos.snapshot) - break; - } + + if (equiv >= s->pos.snapshot) + break; } bch2_trans_iter_exit(trans, &iter); @@ -678,7 +723,7 @@ static int check_key_has_snapshot(struct btree_trans *trans, struct printbuf buf = PRINTBUF; int ret = 0; - if (mustfix_fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c, + if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, "key in missing snapshot: %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = bch2_btree_delete_at(trans, iter, @@ -786,6 +831,7 @@ fsck_err: static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *prev, + struct snapshots_seen *s, bool full) { struct bch_fs *c = trans->c; @@ -808,6 +854,10 @@ static int check_inode(struct btree_trans *trans, if (ret) return 0; + ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); + if (ret) + goto err; + /* * if snapshot id isn't a leaf node, skip it - deletion in * particular is not atomic, so on the internal snapshot nodes @@ -930,8 +980,10 @@ static int check_inodes(struct bch_fs *c, bool full) struct btree_trans trans; struct btree_iter iter; struct bch_inode_unpacked prev = { 0 }; + struct snapshots_seen s; int ret; + snapshots_seen_init(&s); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN, @@ -940,81 +992,22 @@ static int check_inodes(struct bch_fs *c, bool full) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, - check_inode(&trans, &iter, &prev, full)); + check_inode(&trans, &iter, &prev, &s, full)); if (ret) break; } while (bch2_btree_iter_advance(&iter)); bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); + snapshots_seen_exit(&s); if (ret) bch_err(c, "error %i from check_inodes()", ret); return ret; } -static int check_subvol(struct btree_trans *trans, - struct btree_iter *iter) -{ - struct bkey_s_c k; - struct bkey_s_c_subvolume subvol; - int ret; - - k = bch2_btree_iter_peek(iter); - if (!k.k) - return 0; - - ret = bkey_err(k); - if (ret) - return ret; - - if (k.k->type != KEY_TYPE_subvolume) - return 0; - - subvol = bkey_s_c_to_subvolume(k); - - if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { - ret = bch2_subvolume_delete(trans, iter->pos.offset); - if (ret && ret != -EINTR) - bch_err(trans->c, "error deleting subvolume %llu: %i", - iter->pos.offset, ret); - if (ret) - return ret; - } - - return 0; -} - -noinline_for_stack -static int check_subvols(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter iter; - int ret; - - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes, - POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH); - - do { - ret = __bch2_trans_do(&trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL, - check_subvol(&trans, &iter)); - if (ret) - break; - } while (bch2_btree_iter_advance(&iter)); - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - return ret; -} - /* * Checking for overlapping extents needs to be reimplemented */ @@ -1158,6 +1151,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k; struct inode_walker_entry *i; struct printbuf buf = PRINTBUF; + struct bpos equiv; int ret = 0; peek: k = bch2_btree_iter_peek(iter); @@ -1174,7 +1168,10 @@ peek: goto out; } - ret = snapshots_seen_update(c, s, k.k->p); + equiv = k.k->p; + equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); + + ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) goto err; @@ -1209,7 +1206,7 @@ peek: } } #endif - ret = __walk_inode(trans, inode, k.k->p); + ret = __walk_inode(trans, inode, equiv); if (ret < 0) goto err; @@ -1241,8 +1238,8 @@ peek: goto out; } - if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) { - for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) { + if (!bch2_snapshot_internal_node(c, equiv.snapshot)) { + for_each_visible_inode(c, s, inode, equiv.snapshot, i) { if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && k.k->type != KEY_TYPE_reservation && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, @@ -1251,7 +1248,7 @@ peek: bch2_fs_lazy_rw(c); ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9, - k.k->p.snapshot), + equiv.snapshot), POS(k.k->p.inode, U64_MAX), 0, NULL) ?: -EINTR; goto out; @@ -1260,7 +1257,7 @@ peek: } if (bkey_extent_is_allocation(k.k)) - for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) + for_each_visible_inode(c, s, inode, equiv.snapshot, i) i->count += k.k->size; #if 0 bch2_bkey_buf_reassemble(&prev, c, k); @@ -1306,7 +1303,7 @@ static int check_extents(struct bch_fs *c) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_extent(&trans, &iter, &w, &s)); @@ -1495,6 +1492,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d; struct inode_walker_entry *i; struct printbuf buf = PRINTBUF; + struct bpos equiv; int ret = 0; peek: k = bch2_btree_iter_peek(iter); @@ -1511,7 +1509,10 @@ peek: goto out; } - ret = snapshots_seen_update(c, s, k.k->p); + equiv = k.k->p; + equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); + + ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) goto err; @@ -1529,7 +1530,7 @@ peek: goto peek; } - ret = __walk_inode(trans, dir, k.k->p); + ret = __walk_inode(trans, dir, equiv); if (ret < 0) goto err; @@ -1629,7 +1630,8 @@ peek: goto err; if (fsck_err_on(!target->inodes.nr, c, - "dirent points to missing inode:\n%s", + "dirent points to missing inode: (equiv %u)\n%s", + equiv.snapshot, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -1647,7 +1649,7 @@ peek: } if (d.v->d_type == DT_DIR) - for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) + for_each_visible_inode(c, s, dir, equiv.snapshot, i) i->count++; out: @@ -1687,7 +1689,7 @@ static int check_dirents(struct bch_fs *c) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_dirent(&trans, &iter, &hash_info, @@ -1774,7 +1776,7 @@ static int check_xattrs(struct bch_fs *c) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_xattr(&trans, &iter, &hash_info, @@ -1814,7 +1816,7 @@ static int check_root_trans(struct btree_trans *trans) root_subvol.v.flags = 0; root_subvol.v.snapshot = cpu_to_le32(snapshot); root_subvol.v.inode = cpu_to_le64(inum); - ret = __bch2_trans_do(trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i)); @@ -1903,7 +1905,7 @@ static int check_path(struct btree_trans *trans, struct bch_fs *c = trans->c; int ret = 0; - snapshot = snapshot_t(c, snapshot)->equiv; + snapshot = bch2_snapshot_equiv(c, snapshot); p->nr = 0; while (!(inode->bi_inum == BCACHEFS_ROOT_INO && @@ -1977,7 +1979,7 @@ static int check_path(struct btree_trans *trans, if (!fsck_err(c, "directory structure loop")) return 0; - ret = __bch2_trans_do(trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, remove_backpointer(trans, inode)); @@ -2188,7 +2190,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = snapshots_seen_update(c, &s, k.k->p); + ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); if (ret) break; @@ -2366,7 +2368,7 @@ static int fix_reflink_p(struct bch_fs *c) BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { if (k.k->type == KEY_TYPE_reflink_p) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, fix_reflink_p_key(&trans, &iter)); @@ -2386,9 +2388,10 @@ static int fix_reflink_p(struct bch_fs *c) */ int bch2_fsck_full(struct bch_fs *c) { - return bch2_fs_snapshots_check(c) ?: + return bch2_fs_check_snapshots(c) ?: + bch2_fs_check_subvols(c) ?: + bch2_delete_dead_snapshots(c) ?: check_inodes(c, true) ?: - check_subvols(c) ?: check_extents(c) ?: check_dirents(c) ?: check_xattrs(c) ?: @@ -2400,5 +2403,8 @@ int bch2_fsck_full(struct bch_fs *c) int bch2_fsck_walk_inodes_only(struct bch_fs *c) { - return check_inodes(c, false); + return bch2_fs_check_snapshots(c) ?: + bch2_fs_check_subvols(c) ?: + bch2_delete_dead_snapshots(c) ?: + check_inodes(c, false); } diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index 5a09b55..94ecb3a 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -204,7 +204,7 @@ int bch2_check_lrus(struct bch_fs *c) for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_lru_key(&trans, &iter)); diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index eea025a..64b1e79 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1428,6 +1428,12 @@ out: bch2_journal_entries_free(c); } kfree(clean); + + if (!ret && test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) { + bch2_fs_read_write_early(c); + bch2_delete_dead_snapshots_async(c); + } + if (ret) bch_err(c, "Error in recovery: %s (%i)", err, ret); else diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 60b60de..463b5af 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -9,15 +9,12 @@ /* Snapshot tree: */ -static void bch2_delete_dead_snapshots_work(struct work_struct *); -static void bch2_delete_dead_snapshots(struct bch_fs *); - void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); - prt_printf(out, "is_subvol %llu deleted %llu parent %u children %u %u subvol %u", + prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u", BCH_SNAPSHOT_SUBVOL(s.v), BCH_SNAPSHOT_DELETED(s.v), le32_to_cpu(s.v->parent), @@ -143,87 +140,96 @@ static int snapshot_live(struct btree_trans *trans, u32 id) return !BCH_SNAPSHOT_DELETED(&v); } -static int bch2_snapshots_set_equiv(struct btree_trans *trans) +static int bch2_snapshot_set_equiv(struct btree_trans *trans, + struct bkey_s_c_snapshot snap) { struct bch_fs *c = trans->c; + unsigned i, nr_live = 0, live_idx = 0; + u32 id = snap.k->p.offset, child[2] = { + [0] = le32_to_cpu(snap.v->children[0]), + [1] = le32_to_cpu(snap.v->children[1]) + }; + + for (i = 0; i < 2; i++) { + int ret = snapshot_live(trans, child[i]); + if (ret < 0) + return ret; + + if (ret) + live_idx = i; + nr_live += ret; + } + + snapshot_t(c, id)->equiv = nr_live == 1 + ? snapshot_t(c, child[live_idx])->equiv + : id; + return 0; +} + +static int bch2_snapshots_set_equiv(struct btree_trans *trans) +{ struct btree_iter iter; struct bkey_s_c k; - struct bkey_s_c_snapshot snap; - unsigned i; int ret; for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) { - u32 id = k.k->p.offset, child[2]; - unsigned nr_live = 0, live_idx = 0; - if (k.k->type != KEY_TYPE_snapshot) continue; - snap = bkey_s_c_to_snapshot(k); - child[0] = le32_to_cpu(snap.v->children[0]); - child[1] = le32_to_cpu(snap.v->children[1]); - - for (i = 0; i < 2; i++) { - ret = snapshot_live(trans, child[i]); - if (ret < 0) - goto err; - - if (ret) - live_idx = i; - nr_live += ret; - } - - snapshot_t(c, id)->equiv = nr_live == 1 - ? snapshot_t(c, child[live_idx])->equiv - : id; + ret = bch2_snapshot_set_equiv(trans, bkey_s_c_to_snapshot(k)); + if (ret) + break; } -err: bch2_trans_iter_exit(trans, &iter); if (ret) - bch_err(c, "error walking snapshots: %i", ret); + bch_err(trans->c, "error in bch2_snapshots_set_equiv: %i", ret); return ret; } /* fsck: */ -static int bch2_snapshot_check(struct btree_trans *trans, - struct bkey_s_c_snapshot s) +static int check_snapshot(struct btree_trans *trans, + struct btree_iter *iter) { + struct bch_fs *c = trans->c; + struct bkey_s_c_snapshot s; struct bch_subvolume subvol; struct bch_snapshot v; + struct bkey_s_c k; + struct printbuf buf = PRINTBUF; + bool should_have_subvol; u32 i, id; - int ret; + int ret = 0; - id = le32_to_cpu(s.v->subvol); - ret = lockrestart_do(trans, bch2_subvolume_get(trans, id, 0, false, &subvol)); - if (ret == -ENOENT) - bch_err(trans->c, "snapshot node %llu has nonexistent subvolume %u", - s.k->p.offset, id); + k = bch2_btree_iter_peek(iter); + if (!k.k) + return 0; + + ret = bkey_err(k); if (ret) return ret; - if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) { - bch_err(trans->c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", - s.k->p.offset); - return -EINVAL; - } + if (k.k->type != KEY_TYPE_snapshot) + return 0; + s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); if (id) { ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v)); if (ret == -ENOENT) - bch_err(trans->c, "snapshot node %llu has nonexistent parent %u", - s.k->p.offset, id); + bch_err(c, "snapshot with nonexistent parent:\n %s", + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); if (ret) - return ret; + goto err; if (le32_to_cpu(v.children[0]) != s.k->p.offset && le32_to_cpu(v.children[1]) != s.k->p.offset) { - bch_err(trans->c, "snapshot parent %u missing pointer to child %llu", + bch_err(c, "snapshot parent %u missing pointer to child %llu", id, s.k->p.offset); - return -EINVAL; + ret = -EINVAL; + goto err; } } @@ -232,68 +238,155 @@ static int bch2_snapshot_check(struct btree_trans *trans, ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v)); if (ret == -ENOENT) - bch_err(trans->c, "snapshot node %llu has nonexistent child %u", + bch_err(c, "snapshot node %llu has nonexistent child %u", s.k->p.offset, id); if (ret) - return ret; + goto err; if (le32_to_cpu(v.parent) != s.k->p.offset) { - bch_err(trans->c, "snapshot child %u has wrong parent (got %u should be %llu)", + bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", id, le32_to_cpu(v.parent), s.k->p.offset); - return -EINVAL; + ret = -EINVAL; + goto err; } } - return 0; + should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) && + !BCH_SNAPSHOT_DELETED(s.v); + + if (should_have_subvol) { + id = le32_to_cpu(s.v->subvol); + ret = lockrestart_do(trans, bch2_subvolume_get(trans, id, 0, false, &subvol)); + if (ret == -ENOENT) + bch_err(c, "snapshot points to nonexistent subvolume:\n %s", + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); + if (ret) + goto err; + + if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) { + bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", + s.k->p.offset); + ret = -EINVAL; + goto err; + } + } else { + if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n %s", + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u)); + + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + bkey_reassemble(&u->k_i, s.s_c); + u->v.subvol = 0; + ret = bch2_trans_update(trans, iter, &u->k_i, 0); + if (ret) + goto err; + } + } + + if (BCH_SNAPSHOT_DELETED(s.v)) + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); +err: +fsck_err: + printbuf_exit(&buf); + return ret; } -int bch2_fs_snapshots_check(struct bch_fs *c) +int bch2_fs_check_snapshots(struct bch_fs *c) { struct btree_trans trans; struct btree_iter iter; - struct bkey_s_c k; - struct bch_snapshot s; - unsigned id; int ret; bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ret) { - if (k.k->type != KEY_TYPE_snapshot) - continue; + bch2_trans_iter_init(&trans, &iter, BTREE_ID_snapshots, + POS_MIN, BTREE_ITER_PREFETCH); - ret = bch2_snapshot_check(&trans, bkey_s_c_to_snapshot(k)); + do { + ret = commit_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_snapshot(&trans, &iter)); if (ret) break; - } + } while (bch2_btree_iter_advance(&iter)); bch2_trans_iter_exit(&trans, &iter); - if (ret) { + if (ret) bch_err(c, "error %i checking snapshots", ret); - goto err; + + bch2_trans_exit(&trans); + return ret; +} + +static int check_subvol(struct btree_trans *trans, + struct btree_iter *iter) +{ + struct bkey_s_c k; + struct bkey_s_c_subvolume subvol; + struct bch_snapshot snapshot; + unsigned snapid; + int ret; + + k = bch2_btree_iter_peek(iter); + if (!k.k) + return 0; + + ret = bkey_err(k); + if (ret) + return ret; + + if (k.k->type != KEY_TYPE_subvolume) + return 0; + + subvol = bkey_s_c_to_subvolume(k); + snapid = le32_to_cpu(subvol.v->snapshot); + ret = snapshot_lookup(trans, snapid, &snapshot); + + if (ret == -ENOENT) + bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u", + k.k->p.offset, snapid); + if (ret) + return ret; + + if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { + ret = bch2_subvolume_delete(trans, iter->pos.offset); + if (ret && ret != -EINTR) + bch_err(trans->c, "error deleting subvolume %llu: %i", + iter->pos.offset, ret); + if (ret) + return ret; } - for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, - POS_MIN, 0, k, ret) { - if (k.k->type != KEY_TYPE_subvolume) - continue; -again_2: - id = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot); - ret = snapshot_lookup(&trans, id, &s); - - if (ret == -EINTR) { - k = bch2_btree_iter_peek(&iter); - goto again_2; - } else if (ret == -ENOENT) - bch_err(c, "subvolume %llu points to nonexistent snapshot %u", - k.k->p.offset, id); - else if (ret) + return 0; +} + +int bch2_fs_check_subvols(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + + bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes, + POS_MIN, BTREE_ITER_PREFETCH); + + do { + ret = commit_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_subvol(&trans, &iter)); + if (ret) break; - } + } while (bch2_btree_iter_advance(&iter)); bch2_trans_iter_exit(&trans, &iter); -err: + bch2_trans_exit(&trans); + return ret; } @@ -307,7 +400,6 @@ int bch2_fs_snapshots_start(struct bch_fs *c) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - bool have_deleted = false; int ret = 0; bch2_trans_init(&trans, c, 0, 0); @@ -317,39 +409,20 @@ int bch2_fs_snapshots_start(struct bch_fs *c) if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) break; - if (k.k->type != KEY_TYPE_snapshot) { - bch_err(c, "found wrong key type %u in snapshot node table", - k.k->type); + if (k.k->type != KEY_TYPE_snapshot) continue; - } - - if (BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)) - have_deleted = true; - ret = bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0); + ret = bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0) ?: + bch2_snapshot_set_equiv(&trans, bkey_s_c_to_snapshot(k)); if (ret) break; } bch2_trans_iter_exit(&trans, &iter); - if (ret) - goto err; - - ret = bch2_snapshots_set_equiv(&trans); - if (ret) - goto err; -err: bch2_trans_exit(&trans); - if (!ret && have_deleted) { - bch_info(c, "restarting deletion of dead snapshots"); - if (c->opts.fsck) { - bch2_delete_dead_snapshots_work(&c->snapshot_delete_work); - } else { - bch2_delete_dead_snapshots(c); - } - } - + if (ret) + bch_err(c, "error starting snapshots: %i", ret); return ret; } @@ -386,8 +459,10 @@ static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) goto err; bkey_reassemble(&s->k_i, k); - SET_BCH_SNAPSHOT_DELETED(&s->v, true); + SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); + s->v.subvol = 0; + ret = bch2_trans_update(trans, &iter, &s->k_i, 0); if (ret) goto err; @@ -551,6 +626,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, n->v.children[0] = cpu_to_le32(new_snapids[0]); n->v.children[1] = cpu_to_le32(new_snapids[1]); + n->v.subvol = 0; SET_BCH_SNAPSHOT_SUBVOL(&n->v, false); ret = bch2_trans_update(trans, &iter, &n->k_i, 0); if (ret) @@ -561,13 +637,6 @@ err: return ret; } -static int snapshot_id_add(snapshot_id_list *s, u32 id) -{ - BUG_ON(snapshot_list_has_id(s, id)); - - return darray_push(s, id); -} - static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, snapshot_id_list *deleted, enum btree_id btree_id) @@ -601,11 +670,7 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, if (snapshot_list_has_id(deleted, k.k->p.snapshot) || snapshot_list_has_id(&equiv_seen, equiv)) { - if (btree_id == BTREE_ID_inodes && - bch2_btree_key_cache_flush(trans, btree_id, iter.pos)) - continue; - - ret = __bch2_trans_do(trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(trans, &iter, @@ -613,7 +678,7 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, if (ret) break; } else { - ret = snapshot_id_add(&equiv_seen, equiv); + ret = snapshot_list_add(c, &equiv_seen, equiv); if (ret) break; } @@ -627,9 +692,8 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, return ret; } -static void bch2_delete_dead_snapshots_work(struct work_struct *work) +int bch2_delete_dead_snapshots(struct bch_fs *c) { - struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; @@ -638,6 +702,17 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) u32 i, id, children[2]; int ret = 0; + if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) + return 0; + + if (!test_bit(BCH_FS_STARTED, &c->flags)) { + ret = bch2_fs_read_write_early(c); + if (ret) { + bch_err(c, "error deleleting dead snapshots: error going rw: %i", ret); + return ret; + } + } + bch2_trans_init(&trans, c, 0, 0); /* @@ -664,7 +739,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) if (ret) continue; - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_snapshot_node_set_deleted(&trans, iter.pos.offset)); if (ret) { bch_err(c, "error deleting snapshot %llu: %i", iter.pos.offset, ret); @@ -689,7 +764,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) snap = bkey_s_c_to_snapshot(k); if (BCH_SNAPSHOT_DELETED(snap.v)) { - ret = snapshot_id_add(&deleted, k.k->p.offset); + ret = snapshot_list_add(c, &deleted, k.k->p.offset); if (ret) break; } @@ -713,7 +788,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) } for (i = 0; i < deleted.nr; i++) { - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_snapshot_node_delete(&trans, deleted.data[i])); if (ret) { bch_err(c, "error deleting snapshot %u: %i", @@ -721,15 +796,25 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) goto err; } } + + clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); err: darray_exit(&deleted); bch2_trans_exit(&trans); + return ret; +} + +static void bch2_delete_dead_snapshots_work(struct work_struct *work) +{ + struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); + + bch2_delete_dead_snapshots(c); percpu_ref_put(&c->writes); } -static void bch2_delete_dead_snapshots(struct bch_fs *c) +void bch2_delete_dead_snapshots_async(struct bch_fs *c) { - if (unlikely(!percpu_ref_tryget_live(&c->writes))) + if (!percpu_ref_tryget_live(&c->writes)) return; if (!queue_work(system_long_wq, &c->snapshot_delete_work)) @@ -739,7 +824,14 @@ static void bch2_delete_dead_snapshots(struct bch_fs *c) static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, struct btree_trans_commit_hook *h) { - bch2_delete_dead_snapshots(trans->c); + struct bch_fs *c = trans->c; + + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); + + if (!test_bit(BCH_FS_FSCK_DONE, &c->flags)) + return 0; + + bch2_delete_dead_snapshots_async(c); return 0; } @@ -830,7 +922,6 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) struct bkey_s_c k; struct bkey_s_c_subvolume subvol; struct btree_trans_commit_hook *h; - struct bkey_i *delete; u32 snapid; int ret = 0; @@ -852,14 +943,7 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) subvol = bkey_s_c_to_subvolume(k); snapid = le32_to_cpu(subvol.v->snapshot); - delete = bch2_trans_kmalloc(trans, sizeof(*delete)); - ret = PTR_ERR_OR_ZERO(delete); - if (ret) - goto err; - - bkey_init(&delete->k); - delete->k.p = iter.pos; - ret = bch2_trans_update(trans, &iter, delete, 0); + ret = bch2_btree_delete_at(trans, &iter, 0); if (ret) goto err; @@ -925,7 +1009,7 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, mutex_lock(&c->snapshots_unlinked_lock); if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) - ret = snapshot_id_add(&c->snapshots_unlinked, h->subvol); + ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); mutex_unlock(&c->snapshots_unlinked_lock); if (ret) diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h index b1739d2..02a6366 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/subvolume.h @@ -27,6 +27,16 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) return snapshot_t(c, id)->parent; } +static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) +{ + return snapshot_t(c, id)->equiv; +} + +static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) +{ + return id == snapshot_t(c, id)->equiv; +} + static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id) { struct snapshot_t *s = snapshot_t(c, id); @@ -58,42 +68,40 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances return id == ancestor; } -struct snapshots_seen { - struct bpos pos; - DARRAY(u32) ids; -}; - -static inline void snapshots_seen_exit(struct snapshots_seen *s) -{ - kfree(s->ids.data); - s->ids.data = NULL; -} - -static inline void snapshots_seen_init(struct snapshots_seen *s) +static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) { - memset(s, 0, sizeof(*s)); -} + u32 *i; -static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) -{ - int ret = darray_push(&s->ids, id); - if (ret) - bch_err(c, "error reallocating snapshots_seen table (size %zu)", - s->ids.size); - return ret; + darray_for_each(*s, i) + if (*i == id) + return true; + return false; } -static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) +static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id) { u32 *i; darray_for_each(*s, i) - if (*i == id) + if (bch2_snapshot_is_ancestor(c, id, *i)) return true; return false; } -int bch2_fs_snapshots_check(struct bch_fs *); +static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id) +{ + int ret; + + BUG_ON(snapshot_list_has_id(s, id)); + ret = darray_push(s, id); + if (ret) + bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); + return ret; +} + +int bch2_fs_check_snapshots(struct bch_fs *); +int bch2_fs_check_subvols(struct bch_fs *); + void bch2_fs_snapshots_exit(struct bch_fs *); int bch2_fs_snapshots_start(struct bch_fs *); @@ -116,6 +124,9 @@ int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); int bch2_snapshot_node_create(struct btree_trans *, u32, u32 *, u32 *, unsigned); +int bch2_delete_dead_snapshots(struct bch_fs *); +void bch2_delete_dead_snapshots_async(struct bch_fs *); + int bch2_subvolume_delete(struct btree_trans *, u32); int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_create(struct btree_trans *, u64, u32, diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index 1954891..57245ca 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -42,7 +42,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { @@ -51,7 +51,7 @@ static int test_delete(struct bch_fs *c, u64 nr) } pr_info("deleting once"); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { @@ -60,7 +60,7 @@ static int test_delete(struct bch_fs *c, u64 nr) } pr_info("deleting twice"); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { @@ -88,7 +88,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { @@ -99,7 +99,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_trans_unlock(&trans); bch2_journal_flush_all_pins(&c->journal); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { @@ -552,7 +552,7 @@ static int rand_insert(struct bch_fs *c, u64 nr) k.k.p.offset = test_rand(); k.k.p.snapshot = U32_MAX; - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); if (ret) { bch_err(c, "error in rand_insert: %i", ret); @@ -581,7 +581,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) k[j].k.p.snapshot = U32_MAX; } - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[0].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[1].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[2].k_i) ?: @@ -668,7 +668,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr) for (i = 0; i < nr; i++) { rand = test_rand(); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, rand_mixed_trans(&trans, &iter, &cookie, i, rand)); if (ret) { bch_err(c, "update error in rand_mixed: %i", ret); @@ -714,7 +714,7 @@ static int rand_delete(struct bch_fs *c, u64 nr) for (i = 0; i < nr; i++) { struct bpos pos = SPOS(0, test_rand(), U32_MAX); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __do_delete(&trans, pos)); if (ret) { bch_err(c, "error in rand_delete: %i", ret); @@ -743,7 +743,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { insert.k.p = iter.pos; - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &insert.k_i, 0)); if (ret) { @@ -794,7 +794,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &u.k_i, 0)); if (ret) { -- 2.39.5