From 21ae8a4b715acd326e6404ce6409ae329566eb64 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Jul 2023 14:17:18 -0400 Subject: [PATCH 1/1] Update bcachefs sources to 070ec8d07b bcachefs: Snapshot depth, skiplist fields Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- include/linux/rhashtable.h | 1 + libbcachefs/bcachefs.h | 17 +-- libbcachefs/bcachefs_format.h | 76 +++++++---- libbcachefs/btree_iter.h | 8 ++ libbcachefs/recovery.c | 42 ++++-- libbcachefs/recovery.h | 2 + libbcachefs/subvolume.c | 237 +++++++++++++++++++++++++++------- libbcachefs/subvolume.h | 33 ++++- libbcachefs/subvolume_types.h | 2 + libbcachefs/super-io.c | 32 ++++- libbcachefs/super-io.h | 4 + 12 files changed, 354 insertions(+), 102 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 381d645..87a4c7d 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -fb39031ade476044b4d89e6a8f20de8e025be39c +070ec8d07bcab34fde39499a79b9da6f4254ec7c diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c5e717b..adeef32 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -28,6 +28,7 @@ #include #define BIT(nr) (1UL << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) #include /* diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 88a1782..d8c0206 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -660,12 +660,11 @@ enum bch_write_ref { #define PASS_FSCK BIT(1) #define PASS_UNCLEAN BIT(2) #define PASS_ALWAYS BIT(3) -#define PASS_UPGRADE(v) ((v) << 4) #define BCH_RECOVERY_PASSES() \ x(alloc_read, PASS_ALWAYS) \ x(stripes_read, PASS_ALWAYS) \ - x(initialize_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \ + x(initialize_subvolumes, 0) \ x(snapshots_read, PASS_ALWAYS) \ x(check_allocations, PASS_FSCK) \ x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ @@ -677,8 +676,8 @@ enum bch_write_ref { x(check_extents_to_backpointers,PASS_FSCK) \ x(check_alloc_to_lru_refs, PASS_FSCK) \ x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ - x(bucket_gens_init, PASS_UPGRADE(bcachefs_metadata_version_bucket_gens)) \ - x(fs_upgrade_for_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \ + x(bucket_gens_init, 0) \ + x(fs_upgrade_for_subvolumes, 0) \ x(check_snapshot_trees, PASS_FSCK) \ x(check_snapshots, PASS_FSCK) \ x(check_subvols, PASS_FSCK) \ @@ -690,7 +689,7 @@ enum bch_write_ref { x(check_root, PASS_FSCK) \ x(check_directory_structure, PASS_FSCK) \ x(check_nlinks, PASS_FSCK) \ - x(fix_reflink_p, PASS_UPGRADE(bcachefs_metadata_version_reflink_p_fix)) \ + x(fix_reflink_p, 0) \ enum bch_recovery_pass { #define x(n, when) BCH_RECOVERY_PASS_##n, @@ -1033,6 +1032,8 @@ struct bch_fs { u64 journal_replay_seq_start; u64 journal_replay_seq_end; enum bch_recovery_pass curr_recovery_pass; + /* bitmap of explicitly enabled recovery passes: */ + u64 recovery_passes_explicit; /* DEBUG JUNK */ struct dentry *fs_debug_dir; @@ -1177,12 +1178,6 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev) return dev < c->sb.nr_devices && c->devs[dev]; } -static inline bool bch2_version_upgrading_to(const struct bch_fs *c, unsigned new_version) -{ - return c->sb.version_upgrade_complete < new_version && - c->sb.version >= new_version; -} - #define BKEY_PADDED_ONSTACK(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 78771d8..6d693e4 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1148,6 +1148,8 @@ struct bch_snapshot { __le32 children[2]; __le32 subvol; __le32 tree; + __le32 depth; + __le32 skip[3]; }; LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) @@ -1578,32 +1580,60 @@ struct bch_sb_field_journal_seq_blacklist { #define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) #define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) -#define BCH_METADATA_VERSIONS() \ - x(bkey_renumber, BCH_VERSION(0, 10)) \ - x(inode_btree_change, BCH_VERSION(0, 11)) \ - x(snapshot, BCH_VERSION(0, 12)) \ - x(inode_backpointers, BCH_VERSION(0, 13)) \ - x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \ - x(snapshot_2, BCH_VERSION(0, 15)) \ - x(reflink_p_fix, BCH_VERSION(0, 16)) \ - x(subvol_dirent, BCH_VERSION(0, 17)) \ - x(inode_v2, BCH_VERSION(0, 18)) \ - x(freespace, BCH_VERSION(0, 19)) \ - x(alloc_v4, BCH_VERSION(0, 20)) \ - x(new_data_types, BCH_VERSION(0, 21)) \ - x(backpointers, BCH_VERSION(0, 22)) \ - x(inode_v3, BCH_VERSION(0, 23)) \ - x(unwritten_extents, BCH_VERSION(0, 24)) \ - x(bucket_gens, BCH_VERSION(0, 25)) \ - x(lru_v2, BCH_VERSION(0, 26)) \ - x(fragmentation_lru, BCH_VERSION(0, 27)) \ - x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \ - x(snapshot_trees, BCH_VERSION(0, 29)) \ - x(major_minor, BCH_VERSION(1, 0)) +#define RECOVERY_PASS_ALL_FSCK (1ULL << 63) + +#define BCH_METADATA_VERSIONS() \ + x(bkey_renumber, BCH_VERSION(0, 10), \ + RECOVERY_PASS_ALL_FSCK) \ + x(inode_btree_change, BCH_VERSION(0, 11), \ + RECOVERY_PASS_ALL_FSCK) \ + x(snapshot, BCH_VERSION(0, 12), \ + RECOVERY_PASS_ALL_FSCK) \ + x(inode_backpointers, BCH_VERSION(0, 13), \ + RECOVERY_PASS_ALL_FSCK) \ + x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \ + RECOVERY_PASS_ALL_FSCK) \ + x(snapshot_2, BCH_VERSION(0, 15), \ + BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \ + BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \ + RECOVERY_PASS_ALL_FSCK) \ + x(reflink_p_fix, BCH_VERSION(0, 16), \ + BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \ + x(subvol_dirent, BCH_VERSION(0, 17), \ + RECOVERY_PASS_ALL_FSCK) \ + x(inode_v2, BCH_VERSION(0, 18), \ + RECOVERY_PASS_ALL_FSCK) \ + x(freespace, BCH_VERSION(0, 19), \ + RECOVERY_PASS_ALL_FSCK) \ + x(alloc_v4, BCH_VERSION(0, 20), \ + RECOVERY_PASS_ALL_FSCK) \ + x(new_data_types, BCH_VERSION(0, 21), \ + RECOVERY_PASS_ALL_FSCK) \ + x(backpointers, BCH_VERSION(0, 22), \ + RECOVERY_PASS_ALL_FSCK) \ + x(inode_v3, BCH_VERSION(0, 23), \ + RECOVERY_PASS_ALL_FSCK) \ + x(unwritten_extents, BCH_VERSION(0, 24), \ + RECOVERY_PASS_ALL_FSCK) \ + x(bucket_gens, BCH_VERSION(0, 25), \ + BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ + RECOVERY_PASS_ALL_FSCK) \ + x(lru_v2, BCH_VERSION(0, 26), \ + RECOVERY_PASS_ALL_FSCK) \ + x(fragmentation_lru, BCH_VERSION(0, 27), \ + RECOVERY_PASS_ALL_FSCK) \ + x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \ + RECOVERY_PASS_ALL_FSCK) \ + x(snapshot_trees, BCH_VERSION(0, 29), \ + RECOVERY_PASS_ALL_FSCK) \ + x(major_minor, BCH_VERSION(1, 0), \ + 0) \ + x(snapshot_skiplists, BCH_VERSION(1, 1), \ + BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, -#define x(t, n) bcachefs_metadata_version_##t = n, +#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n, BCH_METADATA_VERSIONS() #undef x bcachefs_metadata_version_max diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 0e9c1cb..c472aa8 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -795,6 +795,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) +#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \ + _start, _iter_flags, _k, \ + _disk_res, _journal_seq, _commit_flags,\ + _do) \ + for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\ + (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ + (_journal_seq), (_commit_flags))) + #define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \ _start, _end, _iter_flags, _k, \ _disk_res, _journal_seq, _commit_flags,\ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 47adb66..b04e156 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -594,10 +594,21 @@ static int bch2_journal_replay_key(struct btree_trans *trans, unsigned iter_flags = BTREE_ITER_INTENT| BTREE_ITER_NOT_EXTENTS; + unsigned update_flags = BTREE_TRIGGER_NORUN; int ret; + /* + * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to + * keep the key cache coherent with the underlying btree. Nothing + * besides the allocator is doing updates yet so we don't need key cache + * coherency for non-alloc btrees, and key cache fills for snapshots + * btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until + * the snapshots recovery pass runs. + */ if (!k->level && k->btree_id == BTREE_ID_alloc) iter_flags |= BTREE_ITER_CACHED; + else + update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM; bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, k->level, @@ -610,7 +621,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) goto out; - ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN); + ret = bch2_trans_update(trans, &iter, k->k, update_flags); out: bch2_trans_iter_exit(trans, &iter); return ret; @@ -1115,6 +1126,7 @@ static void check_version_upgrade(struct bch_fs *c) unsigned latest_version = bcachefs_metadata_version_current; unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; + u64 recovery_passes; if (old_version < bcachefs_metadata_required_upgrade_below) { if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || @@ -1161,12 +1173,15 @@ static void check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, new_version); prt_newline(&buf); - prt_str(&buf, "fsck required"); + recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); + if (recovery_passes) { + prt_str(&buf, "fsck required"); - bch_info(c, "%s", buf.buf); + c->recovery_passes_explicit |= recovery_passes; + c->opts.fix_errors = FSCK_OPT_YES; + } - c->opts.fsck = true; - c->opts.fix_errors = FSCK_OPT_YES; + bch_info(c, "%s", buf.buf); mutex_lock(&c->sb_lock); bch2_sb_upgrade(c, new_version); @@ -1199,21 +1214,30 @@ static struct recovery_pass_fn recovery_passes[] = { #undef x }; +u64 bch2_fsck_recovery_passes(void) +{ + u64 ret = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(recovery_passes); i++) + if (recovery_passes[i].when & PASS_FSCK) + ret |= BIT_ULL(i); + return ret; +} + static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { struct recovery_pass_fn *p = recovery_passes + c->curr_recovery_pass; if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) return false; + if (c->recovery_passes_explicit & BIT_ULL(pass)) + return true; if ((p->when & PASS_FSCK) && c->opts.fsck) return true; if ((p->when & PASS_UNCLEAN) && !c->sb.clean) return true; if (p->when & PASS_ALWAYS) return true; - if (p->when >= PASS_UPGRADE(0) && - bch2_version_upgrading_to(c, p->when >> 4)) - return true; return false; } @@ -1297,7 +1321,7 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (!c->opts.nochanges) + if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery)) check_version_upgrade(c); if (c->opts.fsck && c->opts.norecovery) { diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h index 8c0348e..f8e796c 100644 --- a/libbcachefs/recovery.h +++ b/libbcachefs/recovery.h @@ -52,6 +52,8 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, void bch2_journal_keys_free(struct journal_keys *); void bch2_journal_entries_free(struct bch_fs *); +u64 bch2_fsck_recovery_passes(void); + int bch2_fs_recovery(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index f3852c4..f118e58 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -8,8 +8,41 @@ #include "fs.h" #include "subvolume.h" +#include + static int bch2_subvolume_delete(struct btree_trans *, u32); +static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor) +{ + struct snapshot_t *s = snapshot_t(c, id); + + if (s->skip[2] <= ancestor) + return s->skip[2]; + if (s->skip[1] <= ancestor) + return s->skip[1]; + if (s->skip[0] <= ancestor) + return s->skip[0]; + return s->parent; +} + +bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) +{ + EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); + + while (id && id < ancestor) + id = get_ancestor_below(c, id, ancestor); + + return id == ancestor; +} + +static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) +{ + while (id && id < ancestor) + id = snapshot_t(c, id)->parent; + + return id == ancestor; +} + /* Snapshot tree: */ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, @@ -140,6 +173,25 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, } } + if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { + if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) { + prt_printf(err, "skiplist not normalized"); + return -BCH_ERR_invalid_bkey; + } + + for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { + id = le32_to_cpu(s.v->skip[i]); + + if (!id != !s.v->parent || + (s.v->parent && + id <= k.k->p.offset)) { + prt_printf(err, "bad skiplist node %u)", id); + return -BCH_ERR_invalid_bkey; + } + } + } + return 0; } @@ -161,10 +213,17 @@ int bch2_mark_snapshot(struct btree_trans *trans, struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); t->parent = le32_to_cpu(s.v->parent); + t->skip[0] = le32_to_cpu(s.v->skip[0]); + t->skip[1] = le32_to_cpu(s.v->skip[1]); + t->skip[2] = le32_to_cpu(s.v->skip[2]); + t->depth = le32_to_cpu(s.v->depth); t->children[0] = le32_to_cpu(s.v->children[0]); t->children[1] = le32_to_cpu(s.v->children[1]); t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; t->tree = le32_to_cpu(s.v->tree); + + if (BCH_SNAPSHOT_DELETED(s.v)) + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); } else { t->parent = 0; t->children[0] = 0; @@ -370,9 +429,9 @@ static int check_snapshot_tree(struct btree_trans *trans, "snapshot tree points to missing subvolume:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(!bch2_snapshot_is_ancestor(c, - le32_to_cpu(subvol.snapshot), - root_id), c, + fsck_err_on(!bch2_snapshot_is_ancestor_early(c, + le32_to_cpu(subvol.snapshot), + root_id), c, "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || @@ -441,7 +500,48 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans, if (ret) return ret; - return bch2_snapshot_is_ancestor(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); + return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); +} + +static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id) +{ + struct snapshot_t *s; + + if (!id) + return 0; + + s = snapshot_t(c, id); + if (!s->parent) + return id; + + return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); +} + +static int snapshot_rand_ancestor_good(struct btree_trans *trans, + struct bch_snapshot s) +{ + struct bch_snapshot a; + unsigned i; + int ret; + + for (i = 0; i < 3; i++) { + if (!s.parent != !s.skip[i]) + return false; + + if (!s.parent) + continue; + + ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a); + if (bch2_err_matches(ret, ENOENT)) + return false; + if (ret) + return ret; + + if (a.tree != s.tree) + return false; + } + + return true; } /* @@ -451,14 +551,15 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans, */ static int snapshot_tree_ptr_repair(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c_snapshot *s) + struct bkey_s_c k, + struct bch_snapshot *s) { struct bch_fs *c = trans->c; struct btree_iter root_iter; struct bch_snapshot_tree s_t; struct bkey_s_c_snapshot root; struct bkey_i_snapshot *u; - u32 root_id = bch2_snapshot_root(c, s->k->p.offset), tree_id; + u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; int ret; root = bch2_bkey_get_iter_typed(trans, &root_iter, @@ -484,18 +585,18 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans, goto err; u->v.tree = cpu_to_le32(tree_id); - if (s->k->p.snapshot == root_id) - *s = snapshot_i_to_s_c(u); + if (k.k->p.offset == root_id) + *s = u->v; } - if (s->k->p.snapshot != root_id) { - u = bch2_bkey_make_mut_typed(trans, iter, &s->s_c, 0, snapshot); + if (k.k->p.offset != root_id) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); if (ret) goto err; u->v.tree = cpu_to_le32(tree_id); - *s = snapshot_i_to_s_c(u); + *s = u->v; } err: bch2_trans_iter_exit(trans, &root_iter); @@ -507,9 +608,14 @@ static int check_snapshot(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct bkey_s_c_snapshot s; + struct bch_snapshot s; struct bch_subvolume subvol; struct bch_snapshot v; + struct bkey_i_snapshot *u; + u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); + struct snapshot_t *parent = parent_id + ? snapshot_t(c, parent_id) + : NULL; struct printbuf buf = PRINTBUF; bool should_have_subvol; u32 i, id; @@ -518,94 +624,119 @@ static int check_snapshot(struct btree_trans *trans, if (k.k->type != KEY_TYPE_snapshot) return 0; - s = bkey_s_c_to_snapshot(k); - id = le32_to_cpu(s.v->parent); + memset(&s, 0, sizeof(s)); + memcpy(&s, k.v, bkey_val_bytes(k.k)); + + id = le32_to_cpu(s.parent); if (id) { ret = snapshot_lookup(trans, id, &v); if (bch2_err_matches(ret, ENOENT)) bch_err(c, "snapshot with nonexistent parent:\n %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); if (ret) goto err; - if (le32_to_cpu(v.children[0]) != s.k->p.offset && - le32_to_cpu(v.children[1]) != s.k->p.offset) { + if (le32_to_cpu(v.children[0]) != k.k->p.offset && + le32_to_cpu(v.children[1]) != k.k->p.offset) { bch_err(c, "snapshot parent %u missing pointer to child %llu", - id, s.k->p.offset); + id, k.k->p.offset); ret = -EINVAL; goto err; } } - for (i = 0; i < 2 && s.v->children[i]; i++) { - id = le32_to_cpu(s.v->children[i]); + for (i = 0; i < 2 && s.children[i]; i++) { + id = le32_to_cpu(s.children[i]); ret = snapshot_lookup(trans, id, &v); if (bch2_err_matches(ret, ENOENT)) bch_err(c, "snapshot node %llu has nonexistent child %u", - s.k->p.offset, id); + k.k->p.offset, id); if (ret) goto err; - if (le32_to_cpu(v.parent) != s.k->p.offset) { + if (le32_to_cpu(v.parent) != k.k->p.offset) { bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", - id, le32_to_cpu(v.parent), s.k->p.offset); + id, le32_to_cpu(v.parent), k.k->p.offset); ret = -EINVAL; goto err; } } - should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) && - !BCH_SNAPSHOT_DELETED(s.v); + should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && + !BCH_SNAPSHOT_DELETED(&s); if (should_have_subvol) { - id = le32_to_cpu(s.v->subvol); + id = le32_to_cpu(s.subvol); ret = bch2_subvolume_get(trans, id, 0, false, &subvol); if (bch2_err_matches(ret, ENOENT)) bch_err(c, "snapshot points to nonexistent subvolume:\n %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); if (ret) goto err; - if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) { + if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", - s.k->p.offset); + k.k->p.offset); ret = -EINVAL; goto err; } } else { - if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u)); - + if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); if (ret) goto err; - bkey_reassemble(&u->k_i, s.s_c); u->v.subvol = 0; - ret = bch2_trans_update(trans, iter, &u->k_i, 0); - if (ret) - goto err; - - s = snapshot_i_to_s_c(u); + s = u->v; } } - ret = snapshot_tree_ptr_good(trans, s.k->p.offset, le32_to_cpu(s.v->tree)); + ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); if (ret < 0) goto err; if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = snapshot_tree_ptr_repair(trans, iter, &s); + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = snapshot_tree_ptr_repair(trans, iter, k, &s); if (ret) goto err; } ret = 0; - if (BCH_SNAPSHOT_DELETED(s.v)) - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); + if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c, + "snapshot with incorrect depth fields, should be %u:\n %s", + parent->depth + 1, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0); + s = u->v; + } + + ret = snapshot_rand_ancestor_good(trans, s); + if (ret < 0) + goto err; + + if (fsck_err_on(!ret, c, "snapshot with bad rand_ancestor field:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) + u->v.skip[i] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent_id)); + + bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_int); + s = u->v; + } + ret = 0; err: fsck_err: printbuf_exit(&buf); @@ -618,9 +749,13 @@ int bch2_check_snapshots(struct bch_fs *c) struct bkey_s_c k; int ret; + /* + * We iterate backwards as checking/fixing the depth field requires that + * the parent's depth already be correct: + */ ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, - BTREE_ID_snapshots, POS_MIN, + for_each_btree_key_reverse_commit(&trans, iter, + BTREE_ID_snapshots, POS_MAX, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, check_snapshot(&trans, &iter, k))); @@ -847,10 +982,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, u32 *snapshot_subvols, unsigned nr_snapids) { + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_i_snapshot *n; struct bkey_s_c k; - unsigned i; + unsigned i, j; + u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, @@ -880,6 +1017,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, n->v.parent = cpu_to_le32(parent); n->v.subvol = cpu_to_le32(snapshot_subvols[i]); n->v.tree = cpu_to_le32(tree); + n->v.depth = cpu_to_le32(depth); + + for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) + n->v.skip[j] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent)); + + bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_int); SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h index daa9a6b..ab0b4a6 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/subvolume.h @@ -37,9 +37,34 @@ static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) return genradix_ptr(&c->snapshots, U32_MAX - id); } +static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +{ + return snapshot_t(c, id)->parent; +} + static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) { +#ifdef CONFIG_BCACHEFS_DEBUG + u32 parent = snapshot_t(c, id)->parent; + + if (parent && + snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1) + panic("id %u depth=%u parent %u depth=%u\n", + id, snapshot_t(c, id)->depth, + parent, snapshot_t(c, parent)->depth); + + return parent; +#else return snapshot_t(c, id)->parent; +#endif +} + +static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) +{ + while (n--) + id = bch2_snapshot_parent(c, id); + + return id; } static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) @@ -84,13 +109,7 @@ static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) return 0; } -static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) -{ - while (id && id < ancestor) - id = bch2_snapshot_parent(c, id); - - return id == ancestor; -} +bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) { diff --git a/libbcachefs/subvolume_types.h b/libbcachefs/subvolume_types.h index c6c1cba..750d975 100644 --- a/libbcachefs/subvolume_types.h +++ b/libbcachefs/subvolume_types.h @@ -8,6 +8,8 @@ typedef DARRAY(u32) snapshot_id_list; struct snapshot_t { u32 parent; + u32 skip[3]; + u32 depth; u32 children[2]; u32 subvol; /* Nonzero only if a subvolume points to this node: */ u32 tree; diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 1ac3606..1b5bc4e 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -4,6 +4,7 @@ #include "btree_update_interior.h" #include "buckets.h" #include "checksum.h" +#include "counters.h" #include "disk_groups.h" #include "ec.h" #include "error.h" @@ -12,24 +13,29 @@ #include "journal_io.h" #include "journal_sb.h" #include "journal_seq_blacklist.h" +#include "recovery.h" #include "replicas.h" #include "quota.h" #include "super-io.h" #include "super.h" #include "trace.h" #include "vstructs.h" -#include "counters.h" #include #include -struct bch2_metadata_version_str { +struct bch2_metadata_version { u16 version; const char *name; + u64 recovery_passes; }; -static const struct bch2_metadata_version_str bch2_metadata_versions[] = { -#define x(n, v) { .version = v, .name = #n }, +static const struct bch2_metadata_version bch2_metadata_versions[] = { +#define x(n, v, _recovery_passes) { \ + .version = v, \ + .name = #n, \ + .recovery_passes = _recovery_passes, \ +}, BCH_METADATA_VERSIONS() #undef x }; @@ -61,6 +67,24 @@ unsigned bch2_latest_compatible_version(unsigned v) return v; } +u64 bch2_upgrade_recovery_passes(struct bch_fs *c, + unsigned old_version, + unsigned new_version) +{ + u64 ret = 0; + + for (const struct bch2_metadata_version *i = bch2_metadata_versions; + i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions); + i++) + if (i->version > old_version && i->version <= new_version) { + if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK) + ret |= bch2_fsck_recovery_passes(); + ret |= i->recovery_passes; + } + + return ret &= ~RECOVERY_PASS_ALL_FSCK; +} + const char * const bch2_sb_fields[] = { #define x(name, nr) #name, BCH_SB_FIELDS() diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index b365f08..904adea 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -18,6 +18,10 @@ static inline bool bch2_version_compatible(u16 version) void bch2_version_to_text(struct printbuf *, unsigned); unsigned bch2_latest_compatible_version(unsigned); +u64 bch2_upgrade_recovery_passes(struct bch_fs *c, + unsigned, + unsigned); + struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type); struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *, enum bch_sb_field_type, unsigned); -- 2.39.2