#include "quota.h"
#include "recovery.h"
#include "replicas.h"
+#include "subvolume.h"
#include "super-io.h"
#include <linux/sort.h>
keys->nr = dst;
}
+/*
+ * Btree node pointers have a field to stack a pointer to the in memory btree
+ * node; we need to zero out this field when reading in btree nodes, or when
+ * reading in keys from the journal:
+ */
+static void zero_out_btree_mem_ptr(struct journal_keys *keys)
+{
+ struct journal_key *i;
+
+ for (i = keys->d; i < keys->d + keys->nr; i++)
+ if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
+ bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
+}
+
/* iterate over keys read from the journal: */
static int __journal_key_cmp(enum btree_id l_btree_id,
(k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_buf_reassemble(&tmp, c, k);
- bch2_btree_node_prefetch(c, NULL, tmp.k,
+ bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
b->c.btree_id, b->c.level - 1);
bch2_btree_and_journal_iter_advance(&iter);
enum btree_id id, unsigned level,
struct bkey_i *k)
{
- struct btree_iter *iter;
+ struct btree_iter iter;
int ret;
- iter = bch2_trans_get_node_iter(trans, id, k->k.p,
- BTREE_MAX_DEPTH, level,
- BTREE_ITER_INTENT);
-
- /*
- * iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run
- * extent_handle_overwrites() and extent_update_to_keys() - but we don't
- * want that here, journal replay is supposed to treat extents like
- * regular keys:
- */
- BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-
- ret = bch2_btree_iter_traverse(iter) ?:
- bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
- bch2_trans_iter_put(trans, iter);
+ bch2_trans_node_iter_init(trans, &iter, id, k->k.p,
+ BTREE_MAX_DEPTH, level,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_NOT_EXTENTS);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
{
- struct btree_iter *iter;
+ struct btree_iter iter;
int ret;
- iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, k->k.p,
- BTREE_ITER_CACHED|
- BTREE_ITER_CACHED_NOFILL|
- BTREE_ITER_INTENT);
- ret = bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
- bch2_trans_iter_put(trans, iter);
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, k->k.p,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec);
ca->usage_base->buckets_unavailable = le64_to_cpu(u->buckets_unavailable);
- for (i = 0; i < nr_types; i++) {
+ for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets);
ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors);
ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented);
return ret;
}
+static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
+{
+ struct bkey_i_snapshot root_snapshot;
+ struct bkey_i_subvolume root_volume;
+ int ret;
+
+ bkey_snapshot_init(&root_snapshot.k_i);
+ root_snapshot.k.p.offset = U32_MAX;
+ root_snapshot.v.flags = 0;
+ root_snapshot.v.parent = 0;
+ root_snapshot.v.subvol = BCACHEFS_ROOT_SUBVOL;
+ root_snapshot.v.pad = 0;
+ SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
+
+ ret = bch2_btree_insert(c, BTREE_ID_snapshots,
+ &root_snapshot.k_i,
+ NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+
+ bkey_subvolume_init(&root_volume.k_i);
+ root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_volume.v.flags = 0;
+ root_volume.v.snapshot = cpu_to_le32(U32_MAX);
+ root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
+
+ ret = bch2_btree_insert(c, BTREE_ID_subvolumes,
+ &root_volume.k_i,
+ NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
+ struct bkey_inode_buf *packed;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
+ POS(0, BCACHEFS_ROOT_INO), 0);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_inode) {
+ bch_err(c, "root inode not found");
+ ret = -ENOENT;
+ goto err;
+ }
+
+ ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode);
+ BUG_ON(ret);
+
+ inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+
+ packed = bch2_trans_kmalloc(trans, sizeof(*packed));
+ ret = PTR_ERR_OR_ZERO(packed);
+ if (ret)
+ goto err;
+
+ bch2_inode_pack(c, packed, &inode);
+ ret = bch2_trans_update(trans, &iter, &packed->inode.k_i, 0);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
int bch2_fs_recovery(struct bch_fs *c)
{
const char *err = "cannot allocate memory";
c->opts.version_upgrade = true;
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
+ } else if (c->sb.version < bcachefs_metadata_version_btree_ptr_sectors_written) {
+ bch_info(c, "version prior to btree_ptr_sectors_written, upgrade required");
+ c->opts.version_upgrade = true;
+ } else if (c->sb.version < bcachefs_metadata_version_snapshot) {
+ bch_info(c, "filesystem version is prior to snapshot field - upgrading");
+ c->opts.version_upgrade = true;
}
ret = bch2_blacklist_table_initialize(c);
drop_alloc_keys(&c->journal_keys);
}
+ zero_out_btree_mem_ptr(&c->journal_keys);
+
ret = journal_replay_early(c, clean, &c->journal_entries);
if (ret)
goto err;
bch_verbose(c, "alloc write done");
}
+ if (c->sb.version < bcachefs_metadata_version_snapshot) {
+ err = "error creating root snapshot node";
+ ret = bch2_fs_initialize_subvolumes(c);
+ if (ret)
+ goto err;
+ }
+
+ bch_verbose(c, "reading snapshots table");
+ err = "error reading snapshots table";
+ ret = bch2_fs_snapshots_start(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "reading snapshots done");
+
+ if (c->sb.version < bcachefs_metadata_version_snapshot) {
+ /* set bi_subvol on root inode */
+ err = "error upgrade root inode for subvolumes";
+ ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
+ bch2_fs_upgrade_for_subvolumes(&trans));
+ if (ret)
+ goto err;
+ }
+
if (c->opts.fsck) {
bch_info(c, "starting fsck");
err = "error in fsck";
if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
- struct bch_move_stats stats = { 0 };
+ struct bch_move_stats stats;
+
+ bch_move_stats_init(&stats, "recovery");
bch_info(c, "scanning for old btree nodes");
ret = bch2_fs_read_write(c);
}
}
+ err = "error creating root snapshot node";
+ ret = bch2_fs_initialize_subvolumes(c);
+ if (ret)
+ goto err;
+
+ bch_verbose(c, "reading snapshots table");
+ err = "error reading snapshots table";
+ ret = bch2_fs_snapshots_start(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "reading snapshots done");
+
bch2_inode_init(c, &root_inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
- root_inode.bi_inum = BCACHEFS_ROOT_INO;
+ root_inode.bi_inum = BCACHEFS_ROOT_INO;
+ root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
bch2_inode_pack(c, &packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating lost+found";
ret = bch2_trans_do(c, NULL, NULL, 0,
- bch2_create_trans(&trans, BCACHEFS_ROOT_INO,
+ bch2_create_trans(&trans,
+ BCACHEFS_ROOT_SUBVOL_INUM,
&root_inode, &lostfound_inode,
&lostfound,
0, 0, S_IFDIR|0700, 0,
- NULL, NULL));
+ NULL, NULL, (subvol_inum) { 0 }, 0));
if (ret) {
bch_err(c, "error creating lost+found");
goto err;