]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/recovery.c
btree_write_buffer: ensure atomic64_sub_return_release availability
[bcachefs-tools-debian] / libbcachefs / recovery.c
index 33a68a335be6ab5ad2e3548284fe91fbc260b7ec..9c30500ce9200af8be8f71a50f5fa02c356e4400 100644 (file)
 #include "journal_reclaim.h"
 #include "journal_seq_blacklist.h"
 #include "lru.h"
+#include "logged_ops.h"
 #include "move.h"
 #include "quota.h"
+#include "rebalance.h"
 #include "recovery.h"
 #include "replicas.h"
 #include "sb-clean.h"
+#include "snapshot.h"
 #include "subvolume.h"
 #include "super-io.h"
 
 
 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
 
+static bool btree_id_is_alloc(enum btree_id id)
+{
+       switch (id) {
+       case BTREE_ID_alloc:
+       case BTREE_ID_backpointers:
+       case BTREE_ID_need_discard:
+       case BTREE_ID_freespace:
+       case BTREE_ID_bucket_gens:
+               return true;
+       default:
+               return false;
+       }
+}
+
 /* for -o reconstruct_alloc: */
 static void drop_alloc_keys(struct journal_keys *keys)
 {
        size_t src, dst;
 
        for (src = 0, dst = 0; src < keys->nr; src++)
-               if (keys->d[src].btree_id != BTREE_ID_alloc)
+               if (!btree_id_is_alloc(keys->d[src].btree_id))
                        keys->d[dst++] = keys->d[src];
 
        keys->nr = dst;
@@ -163,10 +180,10 @@ static int bch2_journal_replay(struct bch_fs *c)
                                    (!k->allocated
                                     ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim
                                     : 0),
-                            bch2_journal_replay_key(&trans, k));
+                            bch2_journal_replay_key(trans, k));
                if (ret) {
                        bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
-                               bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret));
+                               bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret));
                        goto err;
                }
        }
@@ -209,7 +226,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
 
                if (entry->u64s) {
                        r->level = entry->level;
-                       bkey_copy(&r->key, &entry->start[0]);
+                       bkey_copy(&r->key, (struct bkey_i *) entry->start);
                        r->error = 0;
                } else {
                        r->error = -EIO;
@@ -330,20 +347,6 @@ static int journal_replay_early(struct bch_fs *c,
 
 /* sb clean section: */
 
-static bool btree_id_is_alloc(enum btree_id id)
-{
-       switch (id) {
-       case BTREE_ID_alloc:
-       case BTREE_ID_backpointers:
-       case BTREE_ID_need_discard:
-       case BTREE_ID_freespace:
-       case BTREE_ID_bucket_gens:
-               return true;
-       default:
-               return false;
-       }
-}
-
 static int read_btree_roots(struct bch_fs *c)
 {
        unsigned i;
@@ -362,23 +365,25 @@ static int read_btree_roots(struct bch_fs *c)
                }
 
                if (r->error) {
-                       __fsck_err(c, btree_id_is_alloc(i)
+                       __fsck_err(c,
+                                  btree_id_is_alloc(i)
                                   ? FSCK_CAN_IGNORE : 0,
+                                  btree_root_bkey_invalid,
                                   "invalid btree root %s",
-                                  bch2_btree_ids[i]);
+                                  bch2_btree_id_str(i));
                        if (i == BTREE_ID_alloc)
                                c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
                }
 
                ret = bch2_btree_root_read(c, i, &r->key, r->level);
                if (ret) {
-                       __fsck_err(c,
-                                  btree_id_is_alloc(i)
-                                  ? FSCK_CAN_IGNORE : 0,
-                                  "error reading btree root %s",
-                                  bch2_btree_ids[i]);
+                       fsck_err(c,
+                                btree_root_read_error,
+                                "error reading btree root %s",
+                                bch2_btree_id_str(i));
                        if (btree_id_is_alloc(i))
                                c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+                       ret = 0;
                }
        }
 
@@ -421,15 +426,9 @@ static int bch2_initialize_subvolumes(struct bch_fs *c)
        root_volume.v.snapshot  = cpu_to_le32(U32_MAX);
        root_volume.v.inode     = cpu_to_le64(BCACHEFS_ROOT_INO);
 
-       ret =   bch2_btree_insert(c, BTREE_ID_snapshot_trees,
-                                 &root_tree.k_i,
-                                 NULL, NULL, 0) ?:
-               bch2_btree_insert(c, BTREE_ID_snapshots,
-                                 &root_snapshot.k_i,
-                                 NULL, NULL, 0) ?:
-               bch2_btree_insert(c, BTREE_ID_subvolumes,
-                                 &root_volume.k_i,
-                                 NULL, NULL, 0);
+       ret =   bch2_btree_insert(c, BTREE_ID_snapshot_trees,   &root_tree.k_i, NULL, 0) ?:
+               bch2_btree_insert(c, BTREE_ID_snapshots,        &root_snapshot.k_i, NULL, 0) ?:
+               bch2_btree_insert(c, BTREE_ID_subvolumes,       &root_volume.k_i, NULL, 0);
        if (ret)
                bch_err_fn(c, ret);
        return ret;
@@ -470,7 +469,7 @@ noinline_for_stack
 static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
 {
        int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
-                               __bch2_fs_upgrade_for_subvolumes(&trans));
+                               __bch2_fs_upgrade_for_subvolumes(trans));
        if (ret)
                bch_err_fn(c, ret);
        return ret;
@@ -560,7 +559,7 @@ static void check_version_upgrade(struct bch_fs *c)
                        if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK)
                                prt_str(&buf, "fsck required");
                        else {
-                               prt_str(&buf, "running recovery passses: ");
+                               prt_str(&buf, "running recovery passes: ");
                                prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
                        }
 
@@ -649,7 +648,7 @@ int bch2_fs_recovery(struct bch_fs *c)
 {
        struct bch_sb_field_clean *clean = NULL;
        struct jset *last_journal_entry = NULL;
-       u64 last_seq, blacklist_seq, journal_seq;
+       u64 last_seq = 0, blacklist_seq, journal_seq;
        bool write_sb = false;
        int ret = 0;
 
@@ -718,6 +717,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                if (mustfix_fsck_err_on(c->sb.clean &&
                                        last_journal_entry &&
                                        !journal_entry_empty(last_journal_entry), c,
+                               clean_but_journal_not_empty,
                                "filesystem marked clean but journal not empty")) {
                        c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
                        SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
@@ -725,7 +725,9 @@ int bch2_fs_recovery(struct bch_fs *c)
                }
 
                if (!last_journal_entry) {
-                       fsck_err_on(!c->sb.clean, c, "no journal entries found");
+                       fsck_err_on(!c->sb.clean, c,
+                                   dirty_but_no_journal_entries,
+                                   "no journal entries found");
                        if (clean)
                                goto use_clean;
 
@@ -733,6 +735,13 @@ int bch2_fs_recovery(struct bch_fs *c)
                                if (*i) {
                                        last_journal_entry = &(*i)->j;
                                        (*i)->ignore = false;
+                                       /*
+                                        * This was probably a NO_FLUSH entry,
+                                        * so last_seq was garbage - but we know
+                                        * we're only using a single journal
+                                        * entry, set it here:
+                                        */
+                                       (*i)->j.last_seq = (*i)->j.seq;
                                        break;
                                }
                }
@@ -906,7 +915,7 @@ out:
        }
        kfree(clean);
 
-       if (!ret && test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) {
+       if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
                bch2_fs_read_write_early(c);
                bch2_delete_dead_snapshots_async(c);
        }
@@ -951,16 +960,12 @@ int bch2_fs_initialize(struct bch_fs *c)
        for (i = 0; i < BTREE_ID_NR; i++)
                bch2_btree_root_alloc(c, i);
 
-       for_each_online_member(ca, c, i)
+       for_each_member_device(ca, c, i)
                bch2_dev_usage_init(ca);
 
-       for_each_online_member(ca, c, i) {
-               ret = bch2_dev_journal_alloc(ca);
-               if (ret) {
-                       percpu_ref_put(&ca->io_ref);
-                       goto err;
-               }
-       }
+       ret = bch2_fs_journal_alloc(c);
+       if (ret)
+               goto err;
 
        /*
         * journal_res_get() will crash if called before this has
@@ -978,15 +983,13 @@ int bch2_fs_initialize(struct bch_fs *c)
         * btree updates
         */
        bch_verbose(c, "marking superblocks");
-       for_each_member_device(ca, c, i) {
-               ret = bch2_trans_mark_dev_sb(c, ca);
-               if (ret) {
-                       percpu_ref_put(&ca->ref);
-                       goto err;
-               }
+       ret = bch2_trans_mark_dev_sbs(c);
+       bch_err_msg(c, ret, "marking superblocks");
+       if (ret)
+               goto err;
 
+       for_each_online_member(ca, c, i)
                ca->new_fs_bucket_idx = 0;
-       }
 
        ret = bch2_fs_freespace_init(c);
        if (ret)
@@ -1008,9 +1011,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        bch2_inode_pack(&packed_inode, &root_inode);
        packed_inode.inode.k.p.snapshot = U32_MAX;
 
-       ret = bch2_btree_insert(c, BTREE_ID_inodes,
-                               &packed_inode.inode.k_i,
-                               NULL, NULL, 0);
+       ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0);
        if (ret) {
                bch_err_msg(c, ret, "creating root directory");
                goto err;
@@ -1019,7 +1020,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        bch2_inode_init_early(c, &lostfound_inode);
 
        ret = bch2_trans_do(c, NULL, NULL, 0,
-               bch2_create_trans(&trans,
+               bch2_create_trans(trans,
                                  BCACHEFS_ROOT_SUBVOL_INUM,
                                  &root_inode, &lostfound_inode,
                                  &lostfound,