X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fsubvolume.c;h=7c67c28d3ef88ff32d1805257faf37ebc79f0d2d;hb=b5fd066153c40a70a29caa1ea7987723ab687763;hp=24244bc3d2fbff6703dcf5b4e27d513ebb7082dc;hpb=ddac1641ee1e2686c2211a8d671ea723634dfc89;p=bcachefs-tools-debian diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 24244bc..7c67c28 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -6,893 +6,269 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "snapshot.h" #include "subvolume.h" -/* Snapshot tree: */ +#include -void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) -{ - struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); - - prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u", - BCH_SNAPSHOT_SUBVOL(s.v), - BCH_SNAPSHOT_DELETED(s.v), - le32_to_cpu(s.v->parent), - le32_to_cpu(s.v->children[0]), - le32_to_cpu(s.v->children[1]), - le32_to_cpu(s.v->subvol)); -} - -int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, - int rw, struct printbuf *err) -{ - struct bkey_s_c_snapshot s; - u32 i, id; - - if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0 || - bkey_cmp(k.k->p, POS(0, 1)) < 0) { - prt_printf(err, "bad pos"); - return -EINVAL; - } - - if (bkey_val_bytes(k.k) != sizeof(struct bch_snapshot)) { - prt_printf(err, "bad val size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_snapshot)); - return -EINVAL; - } - - s = bkey_s_c_to_snapshot(k); - - id = le32_to_cpu(s.v->parent); - if (id && id <= k.k->p.offset) { - prt_printf(err, "bad parent node (%u <= %llu)", - id, k.k->p.offset); - return -EINVAL; - } - - if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) { - prt_printf(err, "children not normalized"); - return -EINVAL; - } - - if (s.v->children[0] && - s.v->children[0] == s.v->children[1]) { - prt_printf(err, "duplicate child nodes"); - return -EINVAL; - } - - for (i = 0; i < 2; i++) { - id = le32_to_cpu(s.v->children[i]); - - if (id >= k.k->p.offset) { - prt_printf(err, "bad child node (%u >= %llu)", - id, k.k->p.offset); - return -EINVAL; - } - } - - return 0; -} - -int bch2_mark_snapshot(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct snapshot_t *t; - - t = genradix_ptr_alloc(&c->snapshots, - U32_MAX - new.k->p.offset, - GFP_KERNEL); - if (!t) - return -ENOMEM; - - if (new.k->type == KEY_TYPE_snapshot) { - struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); - - t->parent = le32_to_cpu(s.v->parent); - t->children[0] = le32_to_cpu(s.v->children[0]); - t->children[1] = le32_to_cpu(s.v->children[1]); - t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; - } else { - t->parent = 0; - t->children[0] = 0; - t->children[1] = 0; - t->subvol = 0; - } - - return 0; -} - -static int snapshot_lookup(struct btree_trans *trans, u32 id, - struct bch_snapshot *s) -{ - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id), - BTREE_ITER_WITH_UPDATES); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k) ?: k.k->type == KEY_TYPE_snapshot ? 0 : -ENOENT; - - if (!ret) - *s = *bkey_s_c_to_snapshot(k).v; - - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -static int snapshot_live(struct btree_trans *trans, u32 id) -{ - struct bch_snapshot v; - int ret; - - if (!id) - return 0; - - ret = snapshot_lookup(trans, id, &v); - if (ret == -ENOENT) - bch_err(trans->c, "snapshot node %u not found", id); - if (ret) - return ret; - - return !BCH_SNAPSHOT_DELETED(&v); -} - -static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - unsigned i, nr_live = 0, live_idx = 0; - struct bkey_s_c_snapshot snap; - u32 id = k.k->p.offset, child[2]; - - if (k.k->type != KEY_TYPE_snapshot) - return 0; - - snap = bkey_s_c_to_snapshot(k); - - child[0] = le32_to_cpu(snap.v->children[0]); - child[1] = le32_to_cpu(snap.v->children[1]); - - for (i = 0; i < 2; i++) { - int ret = snapshot_live(trans, child[i]); - if (ret < 0) - return ret; - - if (ret) - live_idx = i; - nr_live += ret; - } - - snapshot_t(c, id)->equiv = nr_live == 1 - ? snapshot_t(c, child[live_idx])->equiv - : id; - return 0; -} - -/* fsck: */ -static int check_snapshot(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - struct bkey_s_c_snapshot s; - struct bch_subvolume subvol; - struct bch_snapshot v; - struct printbuf buf = PRINTBUF; - bool should_have_subvol; - u32 i, id; - int ret = 0; - - if (k.k->type != KEY_TYPE_snapshot) - return 0; - - s = bkey_s_c_to_snapshot(k); - id = le32_to_cpu(s.v->parent); - if (id) { - ret = snapshot_lookup(trans, id, &v); - if (ret == -ENOENT) - bch_err(c, "snapshot with nonexistent parent:\n %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); - if (ret) - goto err; - - if (le32_to_cpu(v.children[0]) != s.k->p.offset && - le32_to_cpu(v.children[1]) != s.k->p.offset) { - bch_err(c, "snapshot parent %u missing pointer to child %llu", - id, s.k->p.offset); - ret = -EINVAL; - goto err; - } - } - - for (i = 0; i < 2 && s.v->children[i]; i++) { - id = le32_to_cpu(s.v->children[i]); - - ret = snapshot_lookup(trans, id, &v); - if (ret == -ENOENT) - bch_err(c, "snapshot node %llu has nonexistent child %u", - s.k->p.offset, id); - if (ret) - goto err; - - if (le32_to_cpu(v.parent) != s.k->p.offset) { - bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", - id, le32_to_cpu(v.parent), s.k->p.offset); - ret = -EINVAL; - goto err; - } - } - - should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) && - !BCH_SNAPSHOT_DELETED(s.v); - - if (should_have_subvol) { - id = le32_to_cpu(s.v->subvol); - ret = bch2_subvolume_get(trans, id, 0, false, &subvol); - if (ret == -ENOENT) - bch_err(c, "snapshot points to nonexistent subvolume:\n %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); - if (ret) - goto err; - - if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) { - bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", - s.k->p.offset); - ret = -EINVAL; - goto err; - } - } else { - if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u)); - - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - bkey_reassemble(&u->k_i, s.s_c); - u->v.subvol = 0; - ret = bch2_trans_update(trans, iter, &u->k_i, 0); - if (ret) - goto err; - } - } - - if (BCH_SNAPSHOT_DELETED(s.v)) - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); -err: -fsck_err: - printbuf_exit(&buf); - return ret; -} - -int bch2_fs_check_snapshots(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - bch2_trans_init(&trans, c, 0, 0); - - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_snapshot(&trans, &iter, k)); - - if (ret) - bch_err(c, "error %i checking snapshots", ret); - - bch2_trans_exit(&trans); - return ret; -} +static int bch2_subvolume_delete(struct btree_trans *, u32); static int check_subvol(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { + struct bch_fs *c = trans->c; struct bkey_s_c_subvolume subvol; struct bch_snapshot snapshot; unsigned snapid; - int ret; + int ret = 0; if (k.k->type != KEY_TYPE_subvolume) return 0; subvol = bkey_s_c_to_subvolume(k); snapid = le32_to_cpu(subvol.v->snapshot); - ret = snapshot_lookup(trans, snapid, &snapshot); + ret = bch2_snapshot_lookup(trans, snapid, &snapshot); - if (ret == -ENOENT) - bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u", + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "subvolume %llu points to nonexistent snapshot %u", k.k->p.offset, snapid); if (ret) return ret; if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { ret = bch2_subvolume_delete(trans, iter->pos.offset); - if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(trans->c, "error deleting subvolume %llu: %s", - iter->pos.offset, bch2_err_str(ret)); - if (ret) - return ret; + bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); + return ret ?: -BCH_ERR_transaction_restart_nested; } - return 0; -} - -int bch2_fs_check_subvols(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - bch2_trans_init(&trans, c, 0, 0); - - ret = for_each_btree_key_commit(&trans, iter, - BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_subvol(&trans, &iter, k)); - - bch2_trans_exit(&trans); - - return ret; -} + if (!BCH_SUBVOLUME_SNAP(subvol.v)) { + u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); + u32 snapshot_tree; + struct bch_snapshot_tree st; -void bch2_fs_snapshots_exit(struct bch_fs *c) -{ - genradix_free(&c->snapshots); -} + rcu_read_lock(); + snapshot_tree = snapshot_t(c, snapshot_root)->tree; + rcu_read_unlock(); -int bch2_fs_snapshots_start(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; + ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); - bch2_trans_init(&trans, c, 0, 0); - - for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, - bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0) ?: - bch2_snapshot_set_equiv(&trans, k)); - - bch2_trans_exit(&trans); - - if (ret) - bch_err(c, "error starting snapshots: %s", bch2_err_str(ret)); - return ret; -} + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, + "%s: snapshot tree %u not found", __func__, snapshot_tree); -/* - * Mark a snapshot as deleted, for future cleanup: - */ -static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) -{ - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i_snapshot *s; - int ret = 0; + if (ret) + return ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id), - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; + if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, + c, subvol_not_master_and_not_snapshot, + "subvolume %llu is not set as snapshot but is not master subvolume", + k.k->p.offset)) { + struct bkey_i_subvolume *s = + bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); + ret = PTR_ERR_OR_ZERO(s); + if (ret) + return ret; - if (k.k->type != KEY_TYPE_snapshot) { - bch2_fs_inconsistent(trans->c, "missing snapshot %u", id); - ret = -ENOENT; - goto err; + SET_BCH_SUBVOLUME_SNAP(&s->v, true); + } } - /* already deleted? */ - if (BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)) - goto err; - - s = bch2_trans_kmalloc(trans, sizeof(*s)); - ret = PTR_ERR_OR_ZERO(s); - if (ret) - goto err; - - bkey_reassemble(&s->k_i, k); - SET_BCH_SNAPSHOT_DELETED(&s->v, true); - SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); - s->v.subvol = 0; - - ret = bch2_trans_update(trans, &iter, &s->k_i, 0); - if (ret) - goto err; -err: - bch2_trans_iter_exit(trans, &iter); +fsck_err: return ret; } -static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) +int bch2_check_subvols(struct bch_fs *c) { - struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; - struct bkey_s_c k; - struct bkey_s_c_snapshot s; - struct bkey_i_snapshot *parent; - u32 parent_id; - unsigned i; - int ret = 0; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id), - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - if (k.k->type != KEY_TYPE_snapshot) { - bch2_fs_inconsistent(trans->c, "missing snapshot %u", id); - ret = -ENOENT; - goto err; - } - - s = bkey_s_c_to_snapshot(k); - - BUG_ON(!BCH_SNAPSHOT_DELETED(s.v)); - parent_id = le32_to_cpu(s.v->parent); - - if (parent_id) { - bch2_trans_iter_init(trans, &p_iter, BTREE_ID_snapshots, - POS(0, parent_id), - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(&p_iter); - ret = bkey_err(k); - if (ret) - goto err; - - if (k.k->type != KEY_TYPE_snapshot) { - bch2_fs_inconsistent(trans->c, "missing snapshot %u", parent_id); - ret = -ENOENT; - goto err; - } - - parent = bch2_trans_kmalloc(trans, sizeof(*parent)); - ret = PTR_ERR_OR_ZERO(parent); - if (ret) - goto err; - - bkey_reassemble(&parent->k_i, k); - - for (i = 0; i < 2; i++) - if (le32_to_cpu(parent->v.children[i]) == id) - break; - - if (i == 2) - bch_err(trans->c, "snapshot %u missing child pointer to %u", - parent_id, id); - else - parent->v.children[i] = 0; - - if (le32_to_cpu(parent->v.children[0]) < - le32_to_cpu(parent->v.children[1])) - swap(parent->v.children[0], - parent->v.children[1]); - - ret = bch2_trans_update(trans, &p_iter, &parent->k_i, 0); - if (ret) - goto err; - } - - ret = bch2_btree_delete_at(trans, &iter, 0); -err: - bch2_trans_iter_exit(trans, &p_iter); - bch2_trans_iter_exit(trans, &iter); + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, + BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_subvol(trans, &iter, k))); + bch_err_fn(c, ret); return ret; } -int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, - u32 *new_snapids, - u32 *snapshot_subvols, - unsigned nr_snapids) +/* Subvolumes: */ + +int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, + enum bkey_invalid_flags flags, struct printbuf *err) { - struct btree_iter iter; - struct bkey_i_snapshot *n; - struct bkey_s_c k; - unsigned i; int ret = 0; - bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, - POS_MIN, BTREE_ITER_INTENT); - k = bch2_btree_iter_peek(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - for (i = 0; i < nr_snapids; i++) { - k = bch2_btree_iter_prev_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - if (!k.k || !k.k->p.offset) { - ret = -ENOSPC; - goto err; - } - - n = bch2_trans_kmalloc(trans, sizeof(*n)); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - goto err; - - bkey_snapshot_init(&n->k_i); - n->k.p = iter.pos; - n->v.flags = 0; - n->v.parent = cpu_to_le32(parent); - n->v.subvol = cpu_to_le32(snapshot_subvols[i]); - n->v.pad = 0; - SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); - - ret = bch2_trans_update(trans, &iter, &n->k_i, 0) ?: - bch2_mark_snapshot(trans, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); - if (ret) - goto err; - - new_snapids[i] = iter.pos.offset; - } - - if (parent) { - bch2_btree_iter_set_pos(&iter, POS(0, parent)); - k = bch2_btree_iter_peek(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - if (k.k->type != KEY_TYPE_snapshot) { - bch_err(trans->c, "snapshot %u not found", parent); - ret = -ENOENT; - goto err; - } - - n = bch2_trans_kmalloc(trans, sizeof(*n)); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - goto err; - - bkey_reassemble(&n->k_i, k); - - if (n->v.children[0] || n->v.children[1]) { - bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); - ret = -EINVAL; - goto err; - } - - n->v.children[0] = cpu_to_le32(new_snapids[0]); - n->v.children[1] = cpu_to_le32(new_snapids[1]); - n->v.subvol = 0; - SET_BCH_SNAPSHOT_SUBVOL(&n->v, false); - ret = bch2_trans_update(trans, &iter, &n->k_i, 0); - if (ret) - goto err; - } -err: - bch2_trans_iter_exit(trans, &iter); + bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || + bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, + subvol_pos_bad, + "invalid pos"); +fsck_err: return ret; } -static int snapshot_delete_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, - snapshot_id_list *deleted, - snapshot_id_list *equiv_seen, - struct bpos *last_pos) -{ - struct bch_fs *c = trans->c; - u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv; - - if (bkey_cmp(k.k->p, *last_pos)) - equiv_seen->nr = 0; - *last_pos = k.k->p; - - if (snapshot_list_has_id(deleted, k.k->p.snapshot) || - snapshot_list_has_id(equiv_seen, equiv)) { - return bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); - } else { - return snapshot_list_add(c, equiv_seen, equiv); - } -} - -static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k) +void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - struct bkey_s_c_snapshot snap; - u32 children[2]; - int ret; - - if (k.k->type != KEY_TYPE_snapshot) - return 0; - - snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v) || - BCH_SNAPSHOT_SUBVOL(snap.v)) - return 0; - - children[0] = le32_to_cpu(snap.v->children[0]); - children[1] = le32_to_cpu(snap.v->children[1]); + struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); - ret = snapshot_live(trans, children[0]) ?: - snapshot_live(trans, children[1]); - if (ret < 0) - return ret; + prt_printf(out, "root %llu snapshot id %u", + le64_to_cpu(s.v->inode), + le32_to_cpu(s.v->snapshot)); - if (!ret) - return bch2_snapshot_node_set_deleted(trans, k.k->p.offset); - return 0; + if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent)) + prt_printf(out, " parent %u", le32_to_cpu(s.v->parent)); } -int bch2_delete_dead_snapshots(struct bch_fs *c) +static __always_inline int +bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, + bool inconsistent_if_not_found, + int iter_flags, + struct bch_subvolume *s) { - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_s_c_snapshot snap; - snapshot_id_list deleted = { 0 }; - u32 i, id; - int ret = 0; - - if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) - return 0; - - if (!test_bit(BCH_FS_STARTED, &c->flags)) { - ret = bch2_fs_read_write_early(c); - if (ret) { - bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret)); - return ret; - } - } - - bch2_trans_init(&trans, c, 0, 0); - - /* - * For every snapshot node: If we have no live children and it's not - * pointed to by a subvolume, delete it: - */ - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, - NULL, NULL, 0, - bch2_delete_redundant_snapshot(&trans, &iter, k)); - if (ret) { - bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret)); - goto err; - } - - for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, - bch2_snapshot_set_equiv(&trans, k)); - if (ret) { - bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret)); - goto err; - } - - for_each_btree_key(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ret) { - if (k.k->type != KEY_TYPE_snapshot) - continue; - - snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v)) { - ret = snapshot_list_add(c, &deleted, k.k->p.offset); - if (ret) - break; - } - } - bch2_trans_iter_exit(&trans, &iter); - - if (ret) { - bch_err(c, "error walking snapshots: %s", bch2_err_str(ret)); - goto err; - } - - for (id = 0; id < BTREE_ID_NR; id++) { - struct bpos last_pos = POS_MIN; - snapshot_id_list equiv_seen = { 0 }; - - if (!btree_type_has_snapshots(id)) - continue; - - ret = for_each_btree_key_commit(&trans, iter, - id, POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BTREE_INSERT_NOFAIL, - snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos)); - - darray_exit(&equiv_seen); - - if (ret) { - bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret)); - goto err; - } - } - - for (i = 0; i < deleted.nr; i++) { - ret = commit_do(&trans, NULL, NULL, 0, - bch2_snapshot_node_delete(&trans, deleted.data[i])); - if (ret) { - bch_err(c, "error deleting snapshot %u: %s", - deleted.data[i], bch2_err_str(ret)); - goto err; - } - } - - clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); -err: - darray_exit(&deleted); - bch2_trans_exit(&trans); + int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), + iter_flags, subvolume, s); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && + inconsistent_if_not_found, + trans->c, "missing subvolume %u", subvol); return ret; } -static void bch2_delete_dead_snapshots_work(struct work_struct *work) -{ - struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); - - bch2_delete_dead_snapshots(c); - percpu_ref_put(&c->writes); -} - -void bch2_delete_dead_snapshots_async(struct bch_fs *c) +int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, + bool inconsistent_if_not_found, + int iter_flags, + struct bch_subvolume *s) { - if (!percpu_ref_tryget_live(&c->writes)) - return; - - if (!queue_work(system_long_wq, &c->snapshot_delete_work)) - percpu_ref_put(&c->writes); + return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); } -static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, - struct btree_trans_commit_hook *h) +int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) { - struct bch_fs *c = trans->c; - - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - - if (!test_bit(BCH_FS_FSCK_DONE, &c->flags)) - return 0; + struct bch_subvolume s; + int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); + if (ret) + return ret; - bch2_delete_dead_snapshots_async(c); + if (BCH_SUBVOLUME_RO(&s)) + return -EROFS; return 0; } -/* Subvolumes: */ - -int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k, - int rw, struct printbuf *err) +int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) { - if (bkey_cmp(k.k->p, SUBVOL_POS_MIN) < 0 || - bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0) { - prt_printf(err, "invalid pos"); - return -EINVAL; - } - - if (bkey_val_bytes(k.k) != sizeof(struct bch_subvolume)) { - prt_printf(err, "incorrect value size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_subvolume)); - return -EINVAL; - } - - return 0; + return bch2_trans_do(c, NULL, NULL, 0, + bch2_subvol_is_ro_trans(trans, subvol)); } -void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) +int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, + struct bch_subvolume *subvol) { - struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); + struct bch_snapshot snap; - prt_printf(out, "root %llu snapshot id %u", - le64_to_cpu(s.v->inode), - le32_to_cpu(s.v->snapshot)); + return bch2_snapshot_lookup(trans, snapshot, &snap) ?: + bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); } -int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, - bool inconsistent_if_not_found, - int iter_flags, - struct bch_subvolume *s) +int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + u32 *snapid) { struct btree_iter iter; - struct bkey_s_c k; + struct bkey_s_c_subvolume subvol; int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol), - iter_flags); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -ENOENT; - - if (ret == -ENOENT && inconsistent_if_not_found) - bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol); - if (!ret) - *s = *bkey_s_c_to_subvolume(k).v; + subvol = bch2_bkey_get_iter_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, subvolid), + BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES, + subvolume); + ret = bkey_err(subvol); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing subvolume %u", subvolid); + if (likely(!ret)) + *snapid = le32_to_cpu(subvol.v->snapshot); bch2_trans_iter_exit(trans, &iter); return ret; } -int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, - struct bch_subvolume *subvol) +static int bch2_subvolume_reparent(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + u32 old_parent, u32 new_parent) { - struct bch_snapshot snap; + struct bkey_i_subvolume *s; + int ret; - return snapshot_lookup(trans, snapshot, &snap) ?: - bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); + if (k.k->type != KEY_TYPE_subvolume) + return 0; + + if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) && + le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent) + return 0; + + s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); + ret = PTR_ERR_OR_ZERO(s); + if (ret) + return ret; + + s->v.parent = cpu_to_le32(new_parent); + return 0; } -int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, - u32 *snapid) +/* + * Separate from the snapshot tree in the snapshots btree, we record the tree + * structure of how snapshot subvolumes were created - the parent subvolume of + * each snapshot subvolume. + * + * When a subvolume is deleted, we scan for child subvolumes and reparant them, + * to avoid dangling references: + */ +static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete) { struct bch_subvolume s; - int ret; - ret = bch2_subvolume_get(trans, subvol, true, - BTREE_ITER_CACHED| - BTREE_ITER_WITH_UPDATES, - &s); - - *snapid = le32_to_cpu(s.snapshot); - return ret; + return lockrestart_do(trans, + bch2_subvolume_get(trans, subvolid_to_delete, true, + BTREE_ITER_CACHED, &s)) ?: + for_each_btree_key_commit(trans, iter, + BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_subvolume_reparent(trans, &iter, k, + subvolid_to_delete, le32_to_cpu(s.parent))); } /* * Delete subvolume, mark snapshot ID as deleted, queue up snapshot * deletion/cleanup: */ -int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) +static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { struct btree_iter iter; - struct bkey_s_c k; struct bkey_s_c_subvolume subvol; - struct btree_trans_commit_hook *h; u32 snapid; int ret = 0; - bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, - POS(0, subvolid), - BTREE_ITER_CACHED| - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); + subvol = bch2_bkey_get_iter_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, subvolid), + BTREE_ITER_CACHED|BTREE_ITER_INTENT, + subvolume); + ret = bkey_err(subvol); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing subvolume %u", subvolid); if (ret) - goto err; - - if (k.k->type != KEY_TYPE_subvolume) { - bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid); - ret = -EIO; - goto err; - } + return ret; - subvol = bkey_s_c_to_subvolume(k); snapid = le32_to_cpu(subvol.v->snapshot); - ret = bch2_btree_delete_at(trans, &iter, 0); - if (ret) - goto err; - - ret = bch2_snapshot_node_set_deleted(trans, snapid); - if (ret) - goto err; - - h = bch2_trans_kmalloc(trans, sizeof(*h)); - ret = PTR_ERR_OR_ZERO(h); - if (ret) - goto err; - - h->fn = bch2_delete_dead_snapshots_hook; - bch2_trans_commit_hook(trans, h); -err: + ret = bch2_btree_delete_at(trans, &iter, 0) ?: + bch2_snapshot_node_set_deleted(trans, snapid); bch2_trans_iter_exit(trans, &iter); return ret; } -void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) +static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) +{ + return bch2_subvolumes_reparent(trans, subvolid) ?: + commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_subvolume_delete(trans, subvolid)); +} + +static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_wait_for_pagecache_and_delete_work); @@ -912,18 +288,16 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) bch2_evict_subvolume_inodes(c, &s); for (id = s.data; id < s.data + s.nr; id++) { - ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, - bch2_subvolume_delete(&trans, *id)); - if (ret) { - bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret)); + ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); + bch_err_msg(c, ret, "deleting subvolume %u", *id); + if (ret) break; - } } darray_exit(&s); } - percpu_ref_put(&c->writes); + bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); } struct subvolume_unlink_hook { @@ -931,7 +305,7 @@ struct subvolume_unlink_hook { u32 subvol; }; -int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, +static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, struct btree_trans_commit_hook *_h) { struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); @@ -946,58 +320,41 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, if (ret) return ret; - if (unlikely(!percpu_ref_tryget_live(&c->writes))) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache)) return -EROFS; - if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) - percpu_ref_put(&c->writes); + if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) + bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); return 0; } int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) { struct btree_iter iter; - struct bkey_s_c k; struct bkey_i_subvolume *n; struct subvolume_unlink_hook *h; int ret = 0; - bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, - POS(0, subvolid), - BTREE_ITER_CACHED| - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - if (k.k->type != KEY_TYPE_subvolume) { - bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid); - ret = -EIO; - goto err; - } - - n = bch2_trans_kmalloc(trans, sizeof(*n)); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - goto err; - - bkey_reassemble(&n->k_i, k); - SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); - - ret = bch2_trans_update(trans, &iter, &n->k_i, 0); - if (ret) - goto err; - h = bch2_trans_kmalloc(trans, sizeof(*h)); ret = PTR_ERR_OR_ZERO(h); if (ret) - goto err; + return ret; h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; h->subvol = subvolid; bch2_trans_commit_hook(trans, &h->h); -err: + + n = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, subvolid), + BTREE_ITER_CACHED, subvolume); + ret = PTR_ERR_OR_ZERO(n); + if (unlikely(ret)) { + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing subvolume %u", subvolid); + return ret; + } + + SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1012,54 +369,32 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; struct bkey_i_subvolume *new_subvol = NULL; struct bkey_i_subvolume *src_subvol = NULL; - struct bkey_s_c k; u32 parent = 0, new_nodes[2], snapshot_subvols[2]; int ret = 0; - for_each_btree_key(trans, dst_iter, BTREE_ID_subvolumes, SUBVOL_POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0) - break; - - /* - * bch2_subvolume_delete() doesn't flush the btree key cache - - * ideally it would but that's tricky - */ - if (bkey_deleted(k.k) && - !bch2_btree_key_cache_find(c, BTREE_ID_subvolumes, dst_iter.pos)) - goto found_slot; - } + ret = bch2_bkey_get_empty_slot(trans, &dst_iter, + BTREE_ID_subvolumes, POS(0, U32_MAX)); + if (ret == -BCH_ERR_ENOSPC_btree_slot) + ret = -BCH_ERR_ENOSPC_subvolume_create; + if (ret) + return ret; - if (!ret) - ret = -ENOSPC; - goto err; -found_slot: snapshot_subvols[0] = dst_iter.pos.offset; snapshot_subvols[1] = src_subvolid; if (src_subvolid) { /* Creating a snapshot: */ - src_subvol = bch2_trans_kmalloc(trans, sizeof(*src_subvol)); - ret = PTR_ERR_OR_ZERO(src_subvol); - if (ret) - goto err; - - bch2_trans_iter_init(trans, &src_iter, BTREE_ID_subvolumes, - POS(0, src_subvolid), - BTREE_ITER_CACHED| - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(&src_iter); - ret = bkey_err(k); - if (ret) - goto err; - if (k.k->type != KEY_TYPE_subvolume) { - bch_err(c, "subvolume %u not found", src_subvolid); - ret = -ENOENT; + src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter, + BTREE_ID_subvolumes, POS(0, src_subvolid), + BTREE_ITER_CACHED, subvolume); + ret = PTR_ERR_OR_ZERO(src_subvol); + if (unlikely(ret)) { + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, + "subvolume %u not found", src_subvolid); goto err; } - bkey_reassemble(&src_subvol->k_i, k); parent = le32_to_cpu(src_subvol->v.snapshot); } @@ -1076,21 +411,20 @@ found_slot: goto err; } - new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); + new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume); ret = PTR_ERR_OR_ZERO(new_subvol); if (ret) goto err; - bkey_subvolume_init(&new_subvol->k_i); new_subvol->v.flags = 0; new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]); new_subvol->v.inode = cpu_to_le64(inode); + new_subvol->v.parent = cpu_to_le32(src_subvolid); + new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c)); + new_subvol->v.otime.hi = 0; + SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro); SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0); - new_subvol->k.p = dst_iter.pos; - ret = bch2_trans_update(trans, &dst_iter, &new_subvol->k_i, 0); - if (ret) - goto err; *new_subvolid = new_subvol->k.p.offset; *new_snapshotid = new_nodes[0];