X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Frecovery.c;h=9127d0e3ca2f6a3fd44e076b42f01ee6f7736427;hb=f3f005c76eb5636542a8f5b137bd1904d57e8f86;hp=98f1454c23fb6c6307682082c27ecf072d392798;hpb=89b361f24a433a4a55e0032eca4e43045ea3f0d0;p=bcachefs-tools-debian diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 98f1454..9127d0e 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -27,6 +27,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "sb-downgrade.h" #include "snapshot.h" #include "subvolume.h" #include "super-io.h" @@ -145,12 +146,11 @@ static int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; DARRAY(struct journal_key *) keys_sorted = { 0 }; - struct journal_key **kp; struct journal *j = &c->journal; u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; struct btree_trans *trans = bch2_trans_get(c); - int ret; + int ret = 0; if (keys->nr) { ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", @@ -171,10 +171,12 @@ static int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = keys->d + i; - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_journal_reclaim| - (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), + /* Skip fastpath if we're low on space in the journal */ + ret = c->journal.watermark ? -1 : + commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_journal_reclaim| + (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); BUG_ON(!ret && !k->overwritten); if (ret) { @@ -278,7 +280,7 @@ static int journal_replay_entry_early(struct bch_fs *c, le64_to_cpu(u->v); break; case BCH_FS_USAGE_inodes: - c->usage_base->nr_inodes = le64_to_cpu(u->v); + c->usage_base->b.nr_inodes = le64_to_cpu(u->v); break; case BCH_FS_USAGE_key_version: atomic64_set(&c->key_version, @@ -342,14 +344,11 @@ static int journal_replay_entry_early(struct bch_fs *c, static int journal_replay_early(struct bch_fs *c, struct bch_sb_field_clean *clean) { - struct jset_entry *entry; - int ret; - if (clean) { - for (entry = clean->start; + for (struct jset_entry *entry = clean->start; entry != vstruct_end(&clean->field); entry = vstruct_next(entry)) { - ret = journal_replay_entry_early(c, entry); + int ret = journal_replay_entry_early(c, entry); if (ret) return ret; } @@ -364,7 +363,7 @@ static int journal_replay_early(struct bch_fs *c, continue; vstruct_for_each(&i->j, entry) { - ret = journal_replay_entry_early(c, entry); + int ret = journal_replay_entry_early(c, entry); if (ret) return ret; } @@ -460,8 +459,7 @@ static int bch2_initialize_subvolumes(struct bch_fs *c) ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -501,13 +499,12 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, __bch2_fs_upgrade_for_subvolumes(trans)); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } const char * const bch2_recovery_passes[] = { -#define x(_fn, _when) #_fn, +#define x(_fn, ...) #_fn, BCH_RECOVERY_PASSES() #undef x NULL @@ -531,7 +528,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c) keys->gap = keys->nr; set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || c->opts.fsck) + + if (keys->nr || c->opts.fsck || !c->sb.clean) return bch2_fs_read_write_early(c); return 0; } @@ -542,18 +540,47 @@ struct recovery_pass_fn { }; static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _when) { .fn = bch2_##_fn, .when = _when }, +#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, BCH_RECOVERY_PASSES() #undef x }; -static void check_version_upgrade(struct bch_fs *c) +u64 bch2_recovery_passes_to_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + +u64 bch2_recovery_passes_from_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + +static bool check_version_upgrade(struct bch_fs *c) { unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; - u64 recovery_passes; if (old_version < bcachefs_metadata_required_upgrade_below) { if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || @@ -597,27 +624,26 @@ static void check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, new_version); prt_newline(&buf); - recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); - if (recovery_passes) { - if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK) - prt_str(&buf, "fsck required"); - else { - prt_str(&buf, "running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, recovery_passes); - } + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_upgrade(c, old_version, new_version); + passes = ext->recovery_passes_required[0] & ~passes; - c->recovery_passes_explicit |= recovery_passes; - c->opts.fix_errors = FSCK_FIX_yes; + if (passes) { + prt_str(&buf, " running recovery passes: "); + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); } bch_info(c, "%s", buf.buf); - mutex_lock(&c->sb_lock); bch2_sb_upgrade(c, new_version); - mutex_unlock(&c->sb_lock); printbuf_exit(&buf); + return true; } + + return false; } u64 bch2_fsck_recovery_passes(void) @@ -632,7 +658,7 @@ u64 bch2_fsck_recovery_passes(void) static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { - struct recovery_pass_fn *p = recovery_pass_fns + c->curr_recovery_pass; + struct recovery_pass_fn *p = recovery_pass_fns + pass; if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) return false; @@ -649,39 +675,62 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { + struct recovery_pass_fn *p = recovery_pass_fns + pass; int ret; - c->curr_recovery_pass = pass; + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), + bch2_recovery_passes[pass]); + ret = p->fn(c); + if (ret) + return ret; + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_CONT " done\n"); - if (should_run_recovery_pass(c, pass)) { - struct recovery_pass_fn *p = recovery_pass_fns + pass; + return 0; +} - if (!(p->when & PASS_SILENT)) - printk(KERN_INFO bch2_log_msg(c, "%s..."), - bch2_recovery_passes[pass]); - ret = p->fn(c); - if (ret) - return ret; - if (!(p->when & PASS_SILENT)) - printk(KERN_CONT " done\n"); +static int bch2_run_recovery_passes(struct bch_fs *c) +{ + int ret = 0; - c->recovery_passes_complete |= BIT_ULL(pass); + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { + if (should_run_recovery_pass(c, c->curr_recovery_pass)) { + unsigned pass = c->curr_recovery_pass; + + ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || + (ret && c->curr_recovery_pass < pass)) + continue; + if (ret) + break; + + c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); + } + c->curr_recovery_pass++; + c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); } - return 0; + return ret; } -static int bch2_run_recovery_passes(struct bch_fs *c) +int bch2_run_online_recovery_passes(struct bch_fs *c) { int ret = 0; - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { - ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) + for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { + struct recovery_pass_fn *p = recovery_pass_fns + i; + + if (!(p->when & PASS_ONLINE)) continue; + + ret = bch2_run_recovery_pass(c, i); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { + i = c->curr_recovery_pass; + continue; + } if (ret) break; - c->curr_recovery_pass++; } return ret; @@ -692,7 +741,6 @@ int bch2_fs_recovery(struct bch_fs *c) struct bch_sb_field_clean *clean = NULL; struct jset *last_journal_entry = NULL; u64 last_seq = 0, blacklist_seq, journal_seq; - bool write_sb = false; int ret = 0; if (c->sb.clean) { @@ -720,15 +768,76 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery)) - check_version_upgrade(c); - if (c->opts.fsck && c->opts.norecovery) { bch_err(c, "cannot select both norecovery and fsck"); ret = -EINVAL; goto err; } + if (!(c->opts.nochanges && c->opts.norecovery)) { + mutex_lock(&c->sb_lock); + bool write_sb = false; + + struct bch_sb_field_ext *ext = + bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); + if (!ext) { + ret = -BCH_ERR_ENOSPC_sb; + mutex_unlock(&c->sb_lock); + goto err; + } + + if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { + ext->recovery_passes_required[0] |= + cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); + write_sb = true; + } + + u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + if (sb_passes) { + struct printbuf buf = PRINTBUF; + prt_str(&buf, "superblock requires following recovery passes to be run:\n "); + prt_bitflags(&buf, bch2_recovery_passes, sb_passes); + bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + } + + if (bch2_check_version_downgrade(c)) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Version downgrade required:\n"); + + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_downgrade(c, + BCH_VERSION_MINOR(bcachefs_metadata_version_current), + BCH_VERSION_MINOR(c->sb.version)); + passes = ext->recovery_passes_required[0] & ~passes; + if (passes) { + prt_str(&buf, " running recovery passes: "); + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); + } + + bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + write_sb = true; + } + + if (check_version_upgrade(c)) + write_sb = true; + + if (write_sb) + bch2_write_super(c); + + c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + mutex_unlock(&c->sb_lock); + } + + if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); + + if (c->opts.fsck) + set_bit(BCH_FS_fsck_running, &c->flags); + ret = bch2_blacklist_table_initialize(c); if (ret) { bch_err(c, "error initializing blacklist table"); @@ -865,15 +974,12 @@ use_clean: if (ret) goto err; - if (c->opts.fsck && - (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || - BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb))) - c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - ret = bch2_run_recovery_passes(c); if (ret) goto err; + clear_bit(BCH_FS_fsck_running, &c->flags); + /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && test_bit(BCH_FS_errors_fixed, &c->flags) && @@ -908,16 +1014,30 @@ use_clean: } mutex_lock(&c->sb_lock); - if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) { - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version); + bool write_sb = false; + + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); write_sb = true; } - if (!test_bit(BCH_FS_error, &c->flags)) { + if (!test_bit(BCH_FS_error, &c->flags) && + !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) { c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); write_sb = true; } + if (!test_bit(BCH_FS_error, &c->flags)) { + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + if (ext && + (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || + !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) { + memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required)); + memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); + write_sb = true; + } + } + if (c->opts.fsck && !test_bit(BCH_FS_error, &c->flags) && !test_bit(BCH_FS_errors_not_fixed, &c->flags)) { @@ -954,7 +1074,6 @@ use_clean: ret = 0; out: - set_bit(BCH_FS_fsck_done, &c->flags); bch2_flush_fsck_errs(c); if (!c->opts.keep_journal && @@ -962,13 +1081,14 @@ out: bch2_journal_keys_put_initial(c); kfree(clean); - if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) { + if (!ret && + test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && + !c->opts.nochanges) { bch2_fs_read_write_early(c); bch2_delete_dead_snapshots_async(c); } - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; err: fsck_err: @@ -981,8 +1101,6 @@ int bch2_fs_initialize(struct bch_fs *c) struct bch_inode_unpacked root_inode, lostfound_inode; struct bkey_inode_buf packed_inode; struct qstr lostfound = QSTR("lost+found"); - struct bch_dev *ca; - unsigned i; int ret; bch_notice(c, "initializing new filesystem"); @@ -991,7 +1109,7 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - bch2_sb_maybe_downgrade(c); + bch2_check_version_downgrade(c); if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { bch2_sb_upgrade(c, bcachefs_metadata_version_current); @@ -1002,12 +1120,11 @@ int bch2_fs_initialize(struct bch_fs *c) c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); set_bit(BCH_FS_may_go_rw, &c->flags); - set_bit(BCH_FS_fsck_done, &c->flags); - for (i = 0; i < BTREE_ID_NR; i++) + for (unsigned i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); - for_each_member_device(ca, c, i) + for_each_member_device(c, ca) bch2_dev_usage_init(ca); ret = bch2_fs_journal_alloc(c); @@ -1035,7 +1152,7 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) ca->new_fs_bucket_idx = 0; ret = bch2_fs_freespace_init(c); @@ -1059,10 +1176,9 @@ int bch2_fs_initialize(struct bch_fs *c) packed_inode.inode.k.p.snapshot = U32_MAX; ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0); - if (ret) { - bch_err_msg(c, ret, "creating root directory"); + bch_err_msg(c, ret, "creating root directory"); + if (ret) goto err; - } bch2_inode_init_early(c, &lostfound_inode); @@ -1073,10 +1189,11 @@ int bch2_fs_initialize(struct bch_fs *c) &lostfound, 0, 0, S_IFDIR|0700, 0, NULL, NULL, (subvol_inum) { 0 }, 0)); - if (ret) { - bch_err_msg(c, ret, "creating lost+found"); + bch_err_msg(c, ret, "creating lost+found"); + if (ret) goto err; - } + + c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1; if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); @@ -1085,10 +1202,9 @@ int bch2_fs_initialize(struct bch_fs *c) } ret = bch2_journal_flush(&c->journal); - if (ret) { - bch_err_msg(c, ret, "writing first journal entry"); + bch_err_msg(c, ret, "writing first journal entry"); + if (ret) goto err; - } mutex_lock(&c->sb_lock); SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); @@ -1099,6 +1215,6 @@ int bch2_fs_initialize(struct bch_fs *c) return 0; err: - bch_err_fn(ca, ret); + bch_err_fn(c, ret); return ret; }