]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/recovery.c
Update bcachefs sources to 5e392aed7a bcachefs: Kill bch2_alloc_write()
[bcachefs-tools-debian] / libbcachefs / recovery.c
index 7e4400cc02a9249381e5c03b6b7ff7776c4ee910..ca92fe84c248a25f5863046bdc038f265274e20d 100644 (file)
@@ -16,6 +16,7 @@
 #include "journal_io.h"
 #include "journal_reclaim.h"
 #include "journal_seq_blacklist.h"
+#include "lru.h"
 #include "move.h"
 #include "quota.h"
 #include "recovery.h"
@@ -94,6 +95,24 @@ size_t bch2_journal_key_search(struct journal_keys *journal_keys,
        return l;
 }
 
+struct bkey_i *bch2_journal_keys_peek(struct bch_fs *c, enum btree_id btree_id,
+                                     unsigned level, struct bpos pos)
+{
+       struct journal_keys *keys = &c->journal_keys;
+       struct journal_key *end = keys->d + keys->nr;
+       struct journal_key *k = keys->d +
+               bch2_journal_key_search(keys, btree_id, level, pos);
+
+       while (k < end && k->overwritten)
+               k++;
+
+       if (k < end &&
+           k->btree_id == btree_id &&
+           k->level    == level)
+               return k->k;
+       return NULL;
+}
+
 static void journal_iter_fix(struct bch_fs *c, struct journal_iter *iter, unsigned idx)
 {
        struct bkey_i *n = iter->keys->d[idx].k;
@@ -544,8 +563,9 @@ static int bch2_journal_replay(struct bch_fs *c)
                ret = bch2_trans_do(c, NULL, NULL,
                                    BTREE_INSERT_LAZY_RW|
                                    BTREE_INSERT_NOFAIL|
-                                   BTREE_INSERT_JOURNAL_RESERVED|
-                                   (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
+                                   (!k->allocated
+                                    ? BTREE_INSERT_JOURNAL_REPLAY|JOURNAL_WATERMARK_reserved
+                                    : 0),
                             bch2_journal_replay_key(&trans, k));
                if (ret) {
                        bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
@@ -560,6 +580,9 @@ static int bch2_journal_replay(struct bch_fs *c)
        bch2_journal_set_replay_done(j);
        bch2_journal_flush_all_pins(j);
        ret = bch2_journal_error(j);
+
+       if (keys->nr && !ret)
+               bch2_journal_log_msg(&c->journal, "journal replay finished");
 err:
        kvfree(keys_sorted);
        return ret;
@@ -742,6 +765,8 @@ static int verify_superblock_clean(struct bch_fs *c,
 {
        unsigned i;
        struct bch_sb_field_clean *clean = *cleanp;
+       struct printbuf buf1 = PRINTBUF;
+       struct printbuf buf2 = PRINTBUF;
        int ret = 0;
 
        if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
@@ -754,7 +779,6 @@ static int verify_superblock_clean(struct bch_fs *c,
        }
 
        for (i = 0; i < BTREE_ID_NR; i++) {
-               char buf1[200], buf2[200];
                struct bkey_i *k1, *k2;
                unsigned l1 = 0, l2 = 0;
 
@@ -764,6 +788,19 @@ static int verify_superblock_clean(struct bch_fs *c,
                if (!k1 && !k2)
                        continue;
 
+               printbuf_reset(&buf1);
+               printbuf_reset(&buf2);
+
+               if (k1)
+                       bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(k1));
+               else
+                       pr_buf(&buf1, "(none)");
+
+               if (k2)
+                       bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(k2));
+               else
+                       pr_buf(&buf2, "(none)");
+
                mustfix_fsck_err_on(!k1 || !k2 ||
                                    IS_ERR(k1) ||
                                    IS_ERR(k2) ||
@@ -773,10 +810,12 @@ static int verify_superblock_clean(struct bch_fs *c,
                        "superblock btree root %u doesn't match journal after clean shutdown\n"
                        "sb:      l=%u %s\n"
                        "journal: l=%u %s\n", i,
-                       l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1),
-                       l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2));
+                       l1, buf1.buf,
+                       l2, buf2.buf);
        }
 fsck_err:
+       printbuf_exit(&buf2);
+       printbuf_exit(&buf1);
        return ret;
 }
 
@@ -803,7 +842,7 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
                return ERR_PTR(-ENOMEM);
        }
 
-       ret = bch2_sb_clean_validate(c, clean, READ);
+       ret = bch2_sb_clean_validate_late(c, clean, READ);
        if (ret) {
                mutex_unlock(&c->sb_lock);
                return ERR_PTR(ret);
@@ -990,8 +1029,8 @@ int bch2_fs_recovery(struct bch_fs *c)
                        bch_info(c, "filesystem version is prior to subvol_dirent - upgrading");
                        c->opts.version_upgrade = true;
                        c->opts.fsck            = true;
-               } else if (c->sb.version < bcachefs_metadata_version_inode_v2) {
-                       bch_info(c, "filesystem version is prior to inode_v2 - upgrading");
+               } else if (c->sb.version < bcachefs_metadata_version_alloc_v4) {
+                       bch_info(c, "filesystem version is prior to alloc_v4 - upgrading");
                        c->opts.version_upgrade = true;
                }
        }
@@ -1054,6 +1093,9 @@ use_clean:
                blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1;
        }
 
+       if (c->opts.read_journal_only)
+               goto out;
+
        if (c->opts.reconstruct_alloc) {
                c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
                drop_alloc_keys(&c->journal_keys);
@@ -1097,7 +1139,7 @@ use_clean:
        err = "error reading allocation information";
 
        down_read(&c->gc_lock);
-       ret = bch2_alloc_read(c, false, false);
+       ret = bch2_alloc_read(c);
        up_read(&c->gc_lock);
 
        if (ret)
@@ -1111,8 +1153,6 @@ use_clean:
                goto err;
        bch_verbose(c, "stripes_read done");
 
-       set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-
        /*
         * If we're not running fsck, this ensures bch2_fsck_err() calls are
         * instead interpreted as bch2_inconsistent_err() calls:
@@ -1126,18 +1166,32 @@ use_clean:
            test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
                bool metadata_only = c->opts.norecovery;
 
-               bch_info(c, "starting mark and sweep");
-               err = "error in mark and sweep";
+               bch_info(c, "checking allocations");
+               err = "error checking allocations";
                ret = bch2_gc(c, true, metadata_only);
                if (ret)
                        goto err;
-               bch_verbose(c, "mark and sweep done");
+               bch_verbose(c, "done checking allocations");
+       }
+
+       if (c->opts.fsck) {
+               bch_info(c, "checking need_discard and freespace btrees");
+               err = "error checking need_discard and freespace btrees";
+               ret = bch2_check_alloc_info(c, true);
+               if (ret)
+                       goto err;
+
+               ret = bch2_check_lrus(c, true);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking need_discard and freespace btrees");
        }
 
        bch2_stripes_heap_start(c);
 
        clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
        set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+       set_bit(BCH_FS_MAY_GO_RW, &c->flags);
 
        /*
         * Skip past versions that might have possibly been used (as nonces),
@@ -1157,6 +1211,11 @@ use_clean:
        if (c->opts.verbose || !c->sb.clean)
                bch_info(c, "journal replay done");
 
+       err = "error initializing freespace";
+       ret = bch2_fs_freespace_init(c);
+       if (ret)
+               goto err;
+
        if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
                bch2_fs_lazy_rw(c);
 
@@ -1297,8 +1356,8 @@ int bch2_fs_initialize(struct bch_fs *c)
        }
        mutex_unlock(&c->sb_lock);
 
-       set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
        set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+       set_bit(BCH_FS_MAY_GO_RW, &c->flags);
        set_bit(BCH_FS_FSCK_DONE, &c->flags);
 
        for (i = 0; i < BTREE_ID_NR; i++)
@@ -1329,6 +1388,7 @@ int bch2_fs_initialize(struct bch_fs *c)
         * Write out the superblock and journal buckets, now that we can do
         * btree updates
         */
+       bch_verbose(c, "marking superblocks");
        err = "error marking superblock and journal";
        for_each_member_device(ca, c, i) {
                ret = bch2_trans_mark_dev_sb(c, ca);
@@ -1340,6 +1400,12 @@ int bch2_fs_initialize(struct bch_fs *c)
                ca->new_fs_bucket_idx = 0;
        }
 
+       bch_verbose(c, "initializing freespace");
+       err = "error initializing freespace";
+       ret = bch2_fs_freespace_init(c);
+       if (ret)
+               goto err;
+
        err = "error creating root snapshot node";
        ret = bch2_fs_initialize_subvolumes(c);
        if (ret)