]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/recovery.c
Update bcachefs sources to 386f00b639 bcachefs: Snapshot creation, deletion
[bcachefs-tools-debian] / libbcachefs / recovery.c
index 9bd6348842e0733d90b881d03f773115cbaa5b87..64e0b542e7791d53cf0a0b5323e00e3e79656800 100644 (file)
@@ -20,6 +20,7 @@
 #include "quota.h"
 #include "recovery.h"
 #include "replicas.h"
+#include "subvolume.h"
 #include "super-io.h"
 
 #include <linux/sort.h>
@@ -39,6 +40,20 @@ static void drop_alloc_keys(struct journal_keys *keys)
        keys->nr = dst;
 }
 
+/*
+ * Btree node pointers have a field to stack a pointer to the in memory btree
+ * node; we need to zero out this field when reading in btree nodes, or when
+ * reading in keys from the journal:
+ */
+static void zero_out_btree_mem_ptr(struct journal_keys *keys)
+{
+       struct journal_key *i;
+
+       for (i = keys->d; i < keys->d + keys->nr; i++)
+               if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
+                       bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
+}
+
 /* iterate over keys read from the journal: */
 
 static int __journal_key_cmp(enum btree_id     l_btree_id,
@@ -312,7 +327,7 @@ static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
               (k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                bch2_bkey_buf_reassemble(&tmp, c, k);
 
-               bch2_btree_node_prefetch(c, NULL, tmp.k,
+               bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
                                        b->c.btree_id, b->c.level - 1);
 
                bch2_btree_and_journal_iter_advance(&iter);
@@ -504,24 +519,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
                                     enum btree_id id, unsigned level,
                                     struct bkey_i *k)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
-       iter = bch2_trans_get_node_iter(trans, id, k->k.p,
-                                       BTREE_MAX_DEPTH, level,
-                                       BTREE_ITER_INTENT);
-
-       /*
-        * iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run
-        * extent_handle_overwrites() and extent_update_to_keys() - but we don't
-        * want that here, journal replay is supposed to treat extents like
-        * regular keys:
-        */
-       BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-
-       ret   = bch2_btree_iter_traverse(iter) ?:
-               bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_node_iter_init(trans, &iter, id, k->k.p,
+                                 BTREE_MAX_DEPTH, level,
+                                 BTREE_ITER_INTENT|
+                                 BTREE_ITER_NOT_EXTENTS);
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -539,15 +546,16 @@ static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
 
 static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, k->k.p,
-                                  BTREE_ITER_CACHED|
-                                  BTREE_ITER_CACHED_NOFILL|
-                                  BTREE_ITER_INTENT);
-       ret = bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, k->k.p,
+                            BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
+                            BTREE_ITER_INTENT);
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -725,7 +733,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
                ca->usage_base->buckets_ec              = le64_to_cpu(u->buckets_ec);
                ca->usage_base->buckets_unavailable     = le64_to_cpu(u->buckets_unavailable);
 
-               for (i = 0; i < nr_types; i++) {
+               for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
                        ca->usage_base->d[i].buckets    = le64_to_cpu(u->d[i].buckets);
                        ca->usage_base->d[i].sectors    = le64_to_cpu(u->d[i].sectors);
                        ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented);
@@ -954,6 +962,81 @@ fsck_err:
        return ret;
 }
 
+static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
+{
+       struct bkey_i_snapshot  root_snapshot;
+       struct bkey_i_subvolume root_volume;
+       int ret;
+
+       bkey_snapshot_init(&root_snapshot.k_i);
+       root_snapshot.k.p.offset = U32_MAX;
+       root_snapshot.v.flags   = 0;
+       root_snapshot.v.parent  = 0;
+       root_snapshot.v.subvol  = BCACHEFS_ROOT_SUBVOL;
+       root_snapshot.v.pad     = 0;
+       SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
+
+       ret = bch2_btree_insert(c, BTREE_ID_snapshots,
+                               &root_snapshot.k_i,
+                               NULL, NULL, 0);
+       if (ret)
+               return ret;
+
+
+       bkey_subvolume_init(&root_volume.k_i);
+       root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+       root_volume.v.flags     = 0;
+       root_volume.v.snapshot  = cpu_to_le32(U32_MAX);
+       root_volume.v.inode     = cpu_to_le64(BCACHEFS_ROOT_INO);
+
+       ret = bch2_btree_insert(c, BTREE_ID_subvolumes,
+                               &root_volume.k_i,
+                               NULL, NULL, 0);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct bch_inode_unpacked inode;
+       struct bkey_inode_buf *packed;
+       int ret;
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
+                            POS(0, BCACHEFS_ROOT_INO), 0);
+       k = bch2_btree_iter_peek_slot(&iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (k.k->type != KEY_TYPE_inode) {
+               bch_err(c, "root inode not found");
+               ret = -ENOENT;
+               goto err;
+       }
+
+       ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode);
+       BUG_ON(ret);
+
+       inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+
+       packed = bch2_trans_kmalloc(trans, sizeof(*packed));
+       ret = PTR_ERR_OR_ZERO(packed);
+       if (ret)
+               goto err;
+
+       bch2_inode_pack(c, packed, &inode);
+       ret = bch2_trans_update(trans, &iter, &packed->inode.k_i, 0);
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
 int bch2_fs_recovery(struct bch_fs *c)
 {
        const char *err = "cannot allocate memory";
@@ -1010,6 +1093,12 @@ int bch2_fs_recovery(struct bch_fs *c)
                c->opts.version_upgrade = true;
                c->opts.fsck            = true;
                c->opts.fix_errors      = FSCK_OPT_YES;
+       } else if (c->sb.version < bcachefs_metadata_version_btree_ptr_sectors_written) {
+               bch_info(c, "version prior to btree_ptr_sectors_written, upgrade required");
+               c->opts.version_upgrade = true;
+       } else if (c->sb.version < bcachefs_metadata_version_snapshot) {
+               bch_info(c, "filesystem version is prior to snapshot field - upgrading");
+               c->opts.version_upgrade = true;
        }
 
        ret = bch2_blacklist_table_initialize(c);
@@ -1074,6 +1163,8 @@ use_clean:
                drop_alloc_keys(&c->journal_keys);
        }
 
+       zero_out_btree_mem_ptr(&c->journal_keys);
+
        ret = journal_replay_early(c, clean, &c->journal_entries);
        if (ret)
                goto err;
@@ -1176,6 +1267,29 @@ use_clean:
                bch_verbose(c, "alloc write done");
        }
 
+       if (c->sb.version < bcachefs_metadata_version_snapshot) {
+               err = "error creating root snapshot node";
+               ret = bch2_fs_initialize_subvolumes(c);
+               if (ret)
+                       goto err;
+       }
+
+       bch_verbose(c, "reading snapshots table");
+       err = "error reading snapshots table";
+       ret = bch2_fs_snapshots_start(c);
+       if (ret)
+               goto err;
+       bch_verbose(c, "reading snapshots done");
+
+       if (c->sb.version < bcachefs_metadata_version_snapshot) {
+               /* set bi_subvol on root inode */
+               err = "error upgrade root inode for subvolumes";
+               ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
+                                   bch2_fs_upgrade_for_subvolumes(&trans));
+               if (ret)
+                       goto err;
+       }
+
        if (c->opts.fsck) {
                bch_info(c, "starting fsck");
                err = "error in fsck";
@@ -1202,7 +1316,9 @@ use_clean:
 
        if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
            !(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
-               struct bch_move_stats stats = { 0 };
+               struct bch_move_stats stats;
+
+               bch_move_stats_init(&stats, "recovery");
 
                bch_info(c, "scanning for old btree nodes");
                ret = bch2_fs_read_write(c);
@@ -1334,9 +1450,22 @@ int bch2_fs_initialize(struct bch_fs *c)
                }
        }
 
+       err = "error creating root snapshot node";
+       ret = bch2_fs_initialize_subvolumes(c);
+       if (ret)
+               goto err;
+
+       bch_verbose(c, "reading snapshots table");
+       err = "error reading snapshots table";
+       ret = bch2_fs_snapshots_start(c);
+       if (ret)
+               goto err;
+       bch_verbose(c, "reading snapshots done");
+
        bch2_inode_init(c, &root_inode, 0, 0,
                        S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
-       root_inode.bi_inum = BCACHEFS_ROOT_INO;
+       root_inode.bi_inum      = BCACHEFS_ROOT_INO;
+       root_inode.bi_subvol    = BCACHEFS_ROOT_SUBVOL;
        bch2_inode_pack(c, &packed_inode, &root_inode);
        packed_inode.inode.k.p.snapshot = U32_MAX;
 
@@ -1351,11 +1480,12 @@ int bch2_fs_initialize(struct bch_fs *c)
 
        err = "error creating lost+found";
        ret = bch2_trans_do(c, NULL, NULL, 0,
-               bch2_create_trans(&trans, BCACHEFS_ROOT_INO,
+               bch2_create_trans(&trans,
+                                 BCACHEFS_ROOT_SUBVOL_INUM,
                                  &root_inode, &lostfound_inode,
                                  &lostfound,
                                  0, 0, S_IFDIR|0700, 0,
-                                 NULL, NULL));
+                                 NULL, NULL, (subvol_inum) { 0 }, 0));
        if (ret) {
                bch_err(c, "error creating lost+found");
                goto err;