1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_key_cache.h"
5 #include "btree_update.h"
11 #include <linux/random.h>
13 static int bch2_subvolume_delete(struct btree_trans *, u32);
15 static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
17 const struct snapshot_t *s = __snapshot_t(t, id);
19 if (s->skip[2] <= ancestor)
21 if (s->skip[1] <= ancestor)
23 if (s->skip[0] <= ancestor)
28 bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
30 struct snapshot_table *t;
32 EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
35 t = rcu_dereference(c->snapshots);
37 while (id && id < ancestor)
38 id = get_ancestor_below(t, id, ancestor);
41 return id == ancestor;
44 static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
46 struct snapshot_table *t;
49 t = rcu_dereference(c->snapshots);
51 while (id && id < ancestor)
52 id = __snapshot_t(t, id)->parent;
55 return id == ancestor;
58 static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
63 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
69 static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
71 size_t idx = U32_MAX - id;
73 struct snapshot_table *new, *old;
75 new_size = max(16UL, roundup_pow_of_two(idx + 1));
77 new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
84 rcu_dereference_protected(c->snapshots, true)->s,
85 sizeof(new->s[0]) * c->snapshot_table_size);
87 rcu_assign_pointer(c->snapshots, new);
88 c->snapshot_table_size = new_size;
92 return &rcu_dereference_protected(c->snapshots, true)->s[idx];
95 static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
97 size_t idx = U32_MAX - id;
99 lockdep_assert_held(&c->snapshot_table_lock);
101 if (likely(idx < c->snapshot_table_size))
102 return &rcu_dereference_protected(c->snapshots, true)->s[idx];
104 return __snapshot_t_mut(c, id);
109 void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
112 struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
114 prt_printf(out, "subvol %u root snapshot %u",
115 le32_to_cpu(t.v->master_subvol),
116 le32_to_cpu(t.v->root_snapshot));
119 int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k,
120 enum bkey_invalid_flags flags,
121 struct printbuf *err)
123 if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
124 bkey_lt(k.k->p, POS(0, 1))) {
125 prt_printf(err, "bad pos");
126 return -BCH_ERR_invalid_bkey;
132 int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
133 struct bch_snapshot_tree *s)
135 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
136 BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
138 if (bch2_err_matches(ret, ENOENT))
139 ret = -BCH_ERR_ENOENT_snapshot_tree;
143 static struct bkey_i_snapshot_tree *
144 __snapshot_tree_create(struct btree_trans *trans)
146 struct btree_iter iter;
147 int ret = bch2_bkey_get_empty_slot(trans, &iter,
148 BTREE_ID_snapshot_trees, POS(0, U32_MAX));
149 struct bkey_i_snapshot_tree *s_t;
151 if (ret == -BCH_ERR_ENOSPC_btree_slot)
152 ret = -BCH_ERR_ENOSPC_snapshot_tree;
156 s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
157 ret = PTR_ERR_OR_ZERO(s_t);
158 bch2_trans_iter_exit(trans, &iter);
159 return ret ? ERR_PTR(ret) : s_t;
162 static int snapshot_tree_create(struct btree_trans *trans,
163 u32 root_id, u32 subvol_id, u32 *tree_id)
165 struct bkey_i_snapshot_tree *n_tree =
166 __snapshot_tree_create(trans);
169 return PTR_ERR(n_tree);
171 n_tree->v.master_subvol = cpu_to_le32(subvol_id);
172 n_tree->v.root_snapshot = cpu_to_le32(root_id);
173 *tree_id = n_tree->k.p.offset;
177 /* Snapshot nodes: */
179 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
182 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
184 prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
185 BCH_SNAPSHOT_SUBVOL(s.v),
186 BCH_SNAPSHOT_DELETED(s.v),
187 le32_to_cpu(s.v->parent),
188 le32_to_cpu(s.v->children[0]),
189 le32_to_cpu(s.v->children[1]),
190 le32_to_cpu(s.v->subvol),
191 le32_to_cpu(s.v->tree));
194 int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
195 enum bkey_invalid_flags flags,
196 struct printbuf *err)
198 struct bkey_s_c_snapshot s;
201 if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
202 bkey_lt(k.k->p, POS(0, 1))) {
203 prt_printf(err, "bad pos");
204 return -BCH_ERR_invalid_bkey;
207 s = bkey_s_c_to_snapshot(k);
209 id = le32_to_cpu(s.v->parent);
210 if (id && id <= k.k->p.offset) {
211 prt_printf(err, "bad parent node (%u <= %llu)",
213 return -BCH_ERR_invalid_bkey;
216 if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
217 prt_printf(err, "children not normalized");
218 return -BCH_ERR_invalid_bkey;
221 if (s.v->children[0] &&
222 s.v->children[0] == s.v->children[1]) {
223 prt_printf(err, "duplicate child nodes");
224 return -BCH_ERR_invalid_bkey;
227 for (i = 0; i < 2; i++) {
228 id = le32_to_cpu(s.v->children[i]);
230 if (id >= k.k->p.offset) {
231 prt_printf(err, "bad child node (%u >= %llu)",
233 return -BCH_ERR_invalid_bkey;
237 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
238 if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
239 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
240 prt_printf(err, "skiplist not normalized");
241 return -BCH_ERR_invalid_bkey;
244 for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
245 id = le32_to_cpu(s.v->skip[i]);
247 if (!id != !s.v->parent ||
249 id <= k.k->p.offset)) {
250 prt_printf(err, "bad skiplist node %u)", id);
251 return -BCH_ERR_invalid_bkey;
259 int bch2_mark_snapshot(struct btree_trans *trans,
260 enum btree_id btree, unsigned level,
261 struct bkey_s_c old, struct bkey_s_c new,
264 struct bch_fs *c = trans->c;
265 struct snapshot_t *t;
268 mutex_lock(&c->snapshot_table_lock);
270 t = snapshot_t_mut(c, new.k->p.offset);
272 ret = -BCH_ERR_ENOMEM_mark_snapshot;
276 if (new.k->type == KEY_TYPE_snapshot) {
277 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
279 t->parent = le32_to_cpu(s.v->parent);
280 t->skip[0] = le32_to_cpu(s.v->skip[0]);
281 t->skip[1] = le32_to_cpu(s.v->skip[1]);
282 t->skip[2] = le32_to_cpu(s.v->skip[2]);
283 t->depth = le32_to_cpu(s.v->depth);
284 t->children[0] = le32_to_cpu(s.v->children[0]);
285 t->children[1] = le32_to_cpu(s.v->children[1]);
286 t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
287 t->tree = le32_to_cpu(s.v->tree);
289 if (BCH_SNAPSHOT_DELETED(s.v))
290 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
299 mutex_unlock(&c->snapshot_table_lock);
303 static int snapshot_lookup(struct btree_trans *trans, u32 id,
304 struct bch_snapshot *s)
306 return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
307 BTREE_ITER_WITH_UPDATES, snapshot, s);
310 static int snapshot_live(struct btree_trans *trans, u32 id)
312 struct bch_snapshot v;
318 ret = snapshot_lookup(trans, id, &v);
319 if (bch2_err_matches(ret, ENOENT))
320 bch_err(trans->c, "snapshot node %u not found", id);
324 return !BCH_SNAPSHOT_DELETED(&v);
327 static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
329 struct bch_fs *c = trans->c;
330 unsigned i, nr_live = 0, live_idx = 0;
331 struct bkey_s_c_snapshot snap;
332 u32 id = k.k->p.offset, child[2];
334 if (k.k->type != KEY_TYPE_snapshot)
337 snap = bkey_s_c_to_snapshot(k);
339 child[0] = le32_to_cpu(snap.v->children[0]);
340 child[1] = le32_to_cpu(snap.v->children[1]);
342 for (i = 0; i < 2; i++) {
343 int ret = snapshot_live(trans, child[i]);
353 mutex_lock(&c->snapshot_table_lock);
355 snapshot_t_mut(c, id)->equiv = nr_live == 1
356 ? snapshot_t_mut(c, child[live_idx])->equiv
359 mutex_unlock(&c->snapshot_table_lock);
366 static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
368 return snapshot_t(c, id)->children[child];
371 static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
373 return bch2_snapshot_child(c, id, 0);
376 static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
378 return bch2_snapshot_child(c, id, 1);
381 static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
385 n = bch2_snapshot_left_child(c, id);
389 while ((parent = bch2_snapshot_parent(c, id))) {
390 n = bch2_snapshot_right_child(c, parent);
399 static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
401 u32 id = snapshot_root;
405 s = snapshot_t(c, id)->subvol;
407 if (s && (!subvol || s < subvol))
410 id = bch2_snapshot_tree_next(c, id);
416 static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
417 u32 snapshot_root, u32 *subvol_id)
419 struct bch_fs *c = trans->c;
420 struct btree_iter iter;
422 struct bkey_s_c_subvolume s;
426 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
428 if (k.k->type != KEY_TYPE_subvolume)
431 s = bkey_s_c_to_subvolume(k);
432 if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
434 if (!BCH_SUBVOLUME_SNAP(s.v)) {
435 *subvol_id = s.k->p.offset;
441 bch2_trans_iter_exit(trans, &iter);
443 if (!ret && !found) {
444 struct bkey_i_subvolume *s;
446 *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
448 s = bch2_bkey_get_mut_typed(trans, &iter,
449 BTREE_ID_subvolumes, POS(0, *subvol_id),
451 ret = PTR_ERR_OR_ZERO(s);
455 SET_BCH_SUBVOLUME_SNAP(&s->v, false);
461 static int check_snapshot_tree(struct btree_trans *trans,
462 struct btree_iter *iter,
465 struct bch_fs *c = trans->c;
466 struct bkey_s_c_snapshot_tree st;
467 struct bch_snapshot s;
468 struct bch_subvolume subvol;
469 struct printbuf buf = PRINTBUF;
473 if (k.k->type != KEY_TYPE_snapshot_tree)
476 st = bkey_s_c_to_snapshot_tree(k);
477 root_id = le32_to_cpu(st.v->root_snapshot);
479 ret = snapshot_lookup(trans, root_id, &s);
480 if (ret && !bch2_err_matches(ret, ENOENT))
483 if (fsck_err_on(ret ||
484 root_id != bch2_snapshot_root(c, root_id) ||
485 st.k->p.offset != le32_to_cpu(s.tree),
487 "snapshot tree points to missing/incorrect snapshot:\n %s",
488 (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
489 ret = bch2_btree_delete_at(trans, iter, 0);
493 ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
495 if (ret && !bch2_err_matches(ret, ENOENT))
498 if (fsck_err_on(ret, c,
499 "snapshot tree points to missing subvolume:\n %s",
500 (printbuf_reset(&buf),
501 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
502 fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
503 le32_to_cpu(subvol.snapshot),
505 "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
506 (printbuf_reset(&buf),
507 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
508 fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c,
509 "snapshot tree points to snapshot subvolume:\n %s",
510 (printbuf_reset(&buf),
511 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
512 struct bkey_i_snapshot_tree *u;
515 ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
519 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
520 ret = PTR_ERR_OR_ZERO(u);
524 u->v.master_subvol = cpu_to_le32(subvol_id);
525 st = snapshot_tree_i_to_s_c(u);
534 * For each snapshot_tree, make sure it points to the root of a snapshot tree
535 * and that snapshot entry points back to it, or delete it.
537 * And, make sure it points to a subvolume within that snapshot tree, or correct
538 * it to point to the oldest subvolume within that snapshot tree.
540 int bch2_check_snapshot_trees(struct bch_fs *c)
542 struct btree_iter iter;
546 ret = bch2_trans_run(c,
547 for_each_btree_key_commit(&trans, iter,
548 BTREE_ID_snapshot_trees, POS_MIN,
549 BTREE_ITER_PREFETCH, k,
550 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
551 check_snapshot_tree(&trans, &iter, k)));
554 bch_err(c, "error %i checking snapshot trees", ret);
559 * Look up snapshot tree for @tree_id and find root,
560 * make sure @snap_id is a descendent:
562 static int snapshot_tree_ptr_good(struct btree_trans *trans,
563 u32 snap_id, u32 tree_id)
565 struct bch_snapshot_tree s_t;
566 int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
568 if (bch2_err_matches(ret, ENOENT))
573 return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
576 static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id)
578 const struct snapshot_t *s;
584 s = snapshot_t(c, id);
586 id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
592 static int snapshot_rand_ancestor_good(struct btree_trans *trans,
593 struct bch_snapshot s)
595 struct bch_snapshot a;
599 for (i = 0; i < 3; i++) {
600 if (!s.parent != !s.skip[i])
606 ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a);
607 if (bch2_err_matches(ret, ENOENT))
612 if (a.tree != s.tree)
620 * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
621 * its snapshot_tree pointer is correct (allocate new one if necessary), then
622 * update this node's pointer to root node's pointer:
624 static int snapshot_tree_ptr_repair(struct btree_trans *trans,
625 struct btree_iter *iter,
627 struct bch_snapshot *s)
629 struct bch_fs *c = trans->c;
630 struct btree_iter root_iter;
631 struct bch_snapshot_tree s_t;
632 struct bkey_s_c_snapshot root;
633 struct bkey_i_snapshot *u;
634 u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
637 root = bch2_bkey_get_iter_typed(trans, &root_iter,
638 BTREE_ID_snapshots, POS(0, root_id),
639 BTREE_ITER_WITH_UPDATES, snapshot);
640 ret = bkey_err(root);
644 tree_id = le32_to_cpu(root.v->tree);
646 ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
647 if (ret && !bch2_err_matches(ret, ENOENT))
650 if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
651 u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
652 ret = PTR_ERR_OR_ZERO(u) ?:
653 snapshot_tree_create(trans, root_id,
654 bch2_snapshot_tree_oldest_subvol(c, root_id),
659 u->v.tree = cpu_to_le32(tree_id);
660 if (k.k->p.offset == root_id)
664 if (k.k->p.offset != root_id) {
665 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
666 ret = PTR_ERR_OR_ZERO(u);
670 u->v.tree = cpu_to_le32(tree_id);
674 bch2_trans_iter_exit(trans, &root_iter);
678 static int check_snapshot(struct btree_trans *trans,
679 struct btree_iter *iter,
682 struct bch_fs *c = trans->c;
683 struct bch_snapshot s;
684 struct bch_subvolume subvol;
685 struct bch_snapshot v;
686 struct bkey_i_snapshot *u;
687 u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
689 struct printbuf buf = PRINTBUF;
690 bool should_have_subvol;
694 if (k.k->type != KEY_TYPE_snapshot)
697 memset(&s, 0, sizeof(s));
698 memcpy(&s, k.v, bkey_val_bytes(k.k));
700 id = le32_to_cpu(s.parent);
702 ret = snapshot_lookup(trans, id, &v);
703 if (bch2_err_matches(ret, ENOENT))
704 bch_err(c, "snapshot with nonexistent parent:\n %s",
705 (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
709 if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
710 le32_to_cpu(v.children[1]) != k.k->p.offset) {
711 bch_err(c, "snapshot parent %u missing pointer to child %llu",
718 for (i = 0; i < 2 && s.children[i]; i++) {
719 id = le32_to_cpu(s.children[i]);
721 ret = snapshot_lookup(trans, id, &v);
722 if (bch2_err_matches(ret, ENOENT))
723 bch_err(c, "snapshot node %llu has nonexistent child %u",
728 if (le32_to_cpu(v.parent) != k.k->p.offset) {
729 bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
730 id, le32_to_cpu(v.parent), k.k->p.offset);
736 should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
737 !BCH_SNAPSHOT_DELETED(&s);
739 if (should_have_subvol) {
740 id = le32_to_cpu(s.subvol);
741 ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
742 if (bch2_err_matches(ret, ENOENT))
743 bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
744 (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
748 if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
749 bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
755 if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s",
756 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
757 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
758 ret = PTR_ERR_OR_ZERO(u);
767 ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
771 if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s",
772 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
773 ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
779 real_depth = bch2_snapshot_depth(c, parent_id);
781 if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, c,
782 "snapshot with incorrect depth fields, should be %u:\n %s",
784 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
785 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
786 ret = PTR_ERR_OR_ZERO(u);
790 u->v.depth = cpu_to_le32(real_depth);
794 ret = snapshot_rand_ancestor_good(trans, s);
798 if (fsck_err_on(!ret, c, "snapshot with bad rand_ancestor field:\n %s",
799 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
800 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
801 ret = PTR_ERR_OR_ZERO(u);
805 for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
806 u->v.skip[i] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent_id));
808 bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_int);
818 int bch2_check_snapshots(struct bch_fs *c)
820 struct btree_iter iter;
825 * We iterate backwards as checking/fixing the depth field requires that
826 * the parent's depth already be correct:
828 ret = bch2_trans_run(c,
829 for_each_btree_key_reverse_commit(&trans, iter,
830 BTREE_ID_snapshots, POS_MAX,
831 BTREE_ITER_PREFETCH, k,
832 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
833 check_snapshot(&trans, &iter, k)));
839 static int check_subvol(struct btree_trans *trans,
840 struct btree_iter *iter,
843 struct bch_fs *c = trans->c;
844 struct bkey_s_c_subvolume subvol;
845 struct bch_snapshot snapshot;
849 if (k.k->type != KEY_TYPE_subvolume)
852 subvol = bkey_s_c_to_subvolume(k);
853 snapid = le32_to_cpu(subvol.v->snapshot);
854 ret = snapshot_lookup(trans, snapid, &snapshot);
856 if (bch2_err_matches(ret, ENOENT))
857 bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
858 k.k->p.offset, snapid);
862 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
865 ret = bch2_subvolume_delete(trans, iter->pos.offset);
867 bch_err(c, "error deleting subvolume %llu: %s",
868 iter->pos.offset, bch2_err_str(ret));
869 return ret ?: -BCH_ERR_transaction_restart_nested;
872 if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
873 u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
875 struct bch_snapshot_tree st;
878 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
881 ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
883 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
884 "%s: snapshot tree %u not found", __func__, snapshot_tree);
889 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c,
890 "subvolume %llu is not set as snapshot but is not master subvolume",
892 struct bkey_i_subvolume *s =
893 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
894 ret = PTR_ERR_OR_ZERO(s);
898 SET_BCH_SUBVOLUME_SNAP(&s->v, true);
906 int bch2_check_subvols(struct bch_fs *c)
908 struct btree_iter iter;
912 ret = bch2_trans_run(c,
913 for_each_btree_key_commit(&trans, iter,
914 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
915 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
916 check_subvol(&trans, &iter, k)));
922 void bch2_fs_snapshots_exit(struct bch_fs *c)
927 int bch2_snapshots_read(struct bch_fs *c)
929 struct btree_iter iter;
933 ret = bch2_trans_run(c,
934 for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
936 bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
937 bch2_snapshot_set_equiv(&trans, k)));
944 * Mark a snapshot as deleted, for future cleanup:
946 static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
948 struct btree_iter iter;
949 struct bkey_i_snapshot *s;
952 s = bch2_bkey_get_mut_typed(trans, &iter,
953 BTREE_ID_snapshots, POS(0, id),
955 ret = PTR_ERR_OR_ZERO(s);
957 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
958 trans->c, "missing snapshot %u", id);
962 /* already deleted? */
963 if (BCH_SNAPSHOT_DELETED(&s->v))
966 SET_BCH_SNAPSHOT_DELETED(&s->v, true);
967 SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
970 bch2_trans_iter_exit(trans, &iter);
974 static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
976 struct bch_fs *c = trans->c;
977 struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
978 struct btree_iter tree_iter = (struct btree_iter) { NULL };
979 struct bkey_s_c_snapshot s;
984 s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
985 BTREE_ITER_INTENT, snapshot);
987 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
988 "missing snapshot %u", id);
993 BUG_ON(!BCH_SNAPSHOT_DELETED(s.v));
994 parent_id = le32_to_cpu(s.v->parent);
997 struct bkey_i_snapshot *parent;
999 parent = bch2_bkey_get_mut_typed(trans, &p_iter,
1000 BTREE_ID_snapshots, POS(0, parent_id),
1002 ret = PTR_ERR_OR_ZERO(parent);
1003 if (unlikely(ret)) {
1004 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1005 "missing snapshot %u", parent_id);
1009 for (i = 0; i < 2; i++)
1010 if (le32_to_cpu(parent->v.children[i]) == id)
1014 bch_err(c, "snapshot %u missing child pointer to %u",
1017 parent->v.children[i] = 0;
1019 if (le32_to_cpu(parent->v.children[0]) <
1020 le32_to_cpu(parent->v.children[1]))
1021 swap(parent->v.children[0],
1022 parent->v.children[1]);
1025 * We're deleting the root of a snapshot tree: update the
1026 * snapshot_tree entry to point to the new root, or delete it if
1027 * this is the last snapshot ID in this tree:
1029 struct bkey_i_snapshot_tree *s_t;
1031 BUG_ON(s.v->children[1]);
1033 s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
1034 BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
1036 ret = PTR_ERR_OR_ZERO(s_t);
1040 if (s.v->children[0]) {
1041 s_t->v.root_snapshot = s.v->children[0];
1043 s_t->k.type = KEY_TYPE_deleted;
1044 set_bkey_val_u64s(&s_t->k, 0);
1048 ret = bch2_btree_delete_at(trans, &iter, 0);
1050 bch2_trans_iter_exit(trans, &tree_iter);
1051 bch2_trans_iter_exit(trans, &p_iter);
1052 bch2_trans_iter_exit(trans, &iter);
1056 static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
1058 u32 *snapshot_subvols,
1059 unsigned nr_snapids)
1061 struct bch_fs *c = trans->c;
1062 struct btree_iter iter;
1063 struct bkey_i_snapshot *n;
1066 u32 depth = bch2_snapshot_depth(c, parent);
1069 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
1070 POS_MIN, BTREE_ITER_INTENT);
1071 k = bch2_btree_iter_peek(&iter);
1076 for (i = 0; i < nr_snapids; i++) {
1077 k = bch2_btree_iter_prev_slot(&iter);
1082 if (!k.k || !k.k->p.offset) {
1083 ret = -BCH_ERR_ENOSPC_snapshot_create;
1087 n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
1088 ret = PTR_ERR_OR_ZERO(n);
1093 n->v.parent = cpu_to_le32(parent);
1094 n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
1095 n->v.tree = cpu_to_le32(tree);
1096 n->v.depth = cpu_to_le32(depth);
1098 for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
1099 n->v.skip[j] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent));
1101 bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_int);
1102 SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
1104 ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
1105 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
1109 new_snapids[i] = iter.pos.offset;
1112 bch2_trans_iter_exit(trans, &iter);
1117 * Create new snapshot IDs as children of an existing snapshot ID:
1119 static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
1121 u32 *snapshot_subvols,
1122 unsigned nr_snapids)
1124 struct btree_iter iter;
1125 struct bkey_i_snapshot *n_parent;
1128 n_parent = bch2_bkey_get_mut_typed(trans, &iter,
1129 BTREE_ID_snapshots, POS(0, parent),
1131 ret = PTR_ERR_OR_ZERO(n_parent);
1132 if (unlikely(ret)) {
1133 if (bch2_err_matches(ret, ENOENT))
1134 bch_err(trans->c, "snapshot %u not found", parent);
1138 if (n_parent->v.children[0] || n_parent->v.children[1]) {
1139 bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
1144 ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
1145 new_snapids, snapshot_subvols, nr_snapids);
1149 n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
1150 n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
1151 n_parent->v.subvol = 0;
1152 SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
1154 bch2_trans_iter_exit(trans, &iter);
1159 * Create a snapshot node that is the root of a new tree:
1161 static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
1163 u32 *snapshot_subvols,
1164 unsigned nr_snapids)
1166 struct bkey_i_snapshot_tree *n_tree;
1169 n_tree = __snapshot_tree_create(trans);
1170 ret = PTR_ERR_OR_ZERO(n_tree) ?:
1171 create_snapids(trans, 0, n_tree->k.p.offset,
1172 new_snapids, snapshot_subvols, nr_snapids);
1176 n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]);
1177 n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]);
1181 int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
1183 u32 *snapshot_subvols,
1184 unsigned nr_snapids)
1186 BUG_ON((parent == 0) != (nr_snapids == 1));
1187 BUG_ON((parent != 0) != (nr_snapids == 2));
1190 ? bch2_snapshot_node_create_children(trans, parent,
1191 new_snapids, snapshot_subvols, nr_snapids)
1192 : bch2_snapshot_node_create_tree(trans,
1193 new_snapids, snapshot_subvols, nr_snapids);
1197 static int snapshot_delete_key(struct btree_trans *trans,
1198 struct btree_iter *iter,
1200 snapshot_id_list *deleted,
1201 snapshot_id_list *equiv_seen,
1202 struct bpos *last_pos)
1204 struct bch_fs *c = trans->c;
1205 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
1207 if (!bkey_eq(k.k->p, *last_pos))
1211 if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
1212 snapshot_list_has_id(equiv_seen, equiv)) {
1213 return bch2_btree_delete_at(trans, iter,
1214 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1216 return snapshot_list_add(c, equiv_seen, equiv);
1220 static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
1223 struct bkey_s_c_snapshot snap;
1227 if (k.k->type != KEY_TYPE_snapshot)
1230 snap = bkey_s_c_to_snapshot(k);
1231 if (BCH_SNAPSHOT_DELETED(snap.v) ||
1232 BCH_SNAPSHOT_SUBVOL(snap.v))
1235 children[0] = le32_to_cpu(snap.v->children[0]);
1236 children[1] = le32_to_cpu(snap.v->children[1]);
1238 ret = snapshot_live(trans, children[0]) ?:
1239 snapshot_live(trans, children[1]);
1244 return bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
1248 int bch2_delete_dead_snapshots(struct bch_fs *c)
1250 struct btree_trans trans;
1251 struct btree_iter iter;
1253 struct bkey_s_c_snapshot snap;
1254 snapshot_id_list deleted = { 0 };
1258 if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
1261 if (!test_bit(BCH_FS_STARTED, &c->flags)) {
1262 ret = bch2_fs_read_write_early(c);
1264 bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
1269 bch2_trans_init(&trans, c, 0, 0);
1272 * For every snapshot node: If we have no live children and it's not
1273 * pointed to by a subvolume, delete it:
1275 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
1278 bch2_delete_redundant_snapshot(&trans, &iter, k));
1280 bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
1284 for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
1286 bch2_snapshot_set_equiv(&trans, k));
1288 bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
1292 for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
1293 POS_MIN, 0, k, ret) {
1294 if (k.k->type != KEY_TYPE_snapshot)
1297 snap = bkey_s_c_to_snapshot(k);
1298 if (BCH_SNAPSHOT_DELETED(snap.v)) {
1299 ret = snapshot_list_add(c, &deleted, k.k->p.offset);
1304 bch2_trans_iter_exit(&trans, &iter);
1307 bch_err(c, "error walking snapshots: %s", bch2_err_str(ret));
1311 for (id = 0; id < BTREE_ID_NR; id++) {
1312 struct bpos last_pos = POS_MIN;
1313 snapshot_id_list equiv_seen = { 0 };
1315 if (!btree_type_has_snapshots(id))
1318 ret = for_each_btree_key_commit(&trans, iter,
1320 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1321 NULL, NULL, BTREE_INSERT_NOFAIL,
1322 snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos));
1324 darray_exit(&equiv_seen);
1327 bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret));
1332 for (i = 0; i < deleted.nr; i++) {
1333 ret = commit_do(&trans, NULL, NULL, 0,
1334 bch2_snapshot_node_delete(&trans, deleted.data[i]));
1336 bch_err(c, "error deleting snapshot %u: %s",
1337 deleted.data[i], bch2_err_str(ret));
1342 clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
1344 darray_exit(&deleted);
1345 bch2_trans_exit(&trans);
1351 static void bch2_delete_dead_snapshots_work(struct work_struct *work)
1353 struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
1355 bch2_delete_dead_snapshots(c);
1356 bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
1359 void bch2_delete_dead_snapshots_async(struct bch_fs *c)
1361 if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
1362 !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
1363 bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
1366 static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
1367 struct btree_trans_commit_hook *h)
1369 struct bch_fs *c = trans->c;
1371 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
1373 if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots)
1376 bch2_delete_dead_snapshots_async(c);
1382 int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
1383 unsigned flags, struct printbuf *err)
1385 if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
1386 bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
1387 prt_printf(err, "invalid pos");
1388 return -BCH_ERR_invalid_bkey;
1394 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
1397 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
1399 prt_printf(out, "root %llu snapshot id %u",
1400 le64_to_cpu(s.v->inode),
1401 le32_to_cpu(s.v->snapshot));
1403 if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
1404 prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
1407 static __always_inline int
1408 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
1409 bool inconsistent_if_not_found,
1411 struct bch_subvolume *s)
1413 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
1414 iter_flags, subvolume, s);
1415 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
1416 inconsistent_if_not_found,
1417 trans->c, "missing subvolume %u", subvol);
1421 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
1422 bool inconsistent_if_not_found,
1424 struct bch_subvolume *s)
1426 return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
1429 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
1430 struct bch_subvolume *subvol)
1432 struct bch_snapshot snap;
1434 return snapshot_lookup(trans, snapshot, &snap) ?:
1435 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
1438 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol,
1441 struct btree_iter iter;
1445 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol),
1447 BTREE_ITER_WITH_UPDATES);
1448 ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -BCH_ERR_ENOENT_subvolume;
1451 *snapid = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot);
1452 else if (bch2_err_matches(ret, ENOENT))
1453 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol);
1454 bch2_trans_iter_exit(trans, &iter);
1458 static int bch2_subvolume_reparent(struct btree_trans *trans,
1459 struct btree_iter *iter,
1461 u32 old_parent, u32 new_parent)
1463 struct bkey_i_subvolume *s;
1466 if (k.k->type != KEY_TYPE_subvolume)
1469 if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
1470 le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
1473 s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
1474 ret = PTR_ERR_OR_ZERO(s);
1478 s->v.parent = cpu_to_le32(new_parent);
1483 * Scan for subvolumes with parent @subvolid_to_delete, reparent:
1485 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete)
1487 struct btree_iter iter;
1489 struct bch_subvolume s;
1491 return lockrestart_do(trans,
1492 bch2_subvolume_get(trans, subvolid_to_delete, true,
1493 BTREE_ITER_CACHED, &s)) ?:
1494 for_each_btree_key_commit(trans, iter,
1495 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
1496 NULL, NULL, BTREE_INSERT_NOFAIL,
1497 bch2_subvolume_reparent(trans, &iter, k,
1498 subvolid_to_delete, le32_to_cpu(s.parent)));
1502 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
1505 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
1507 struct btree_iter iter;
1508 struct bkey_s_c_subvolume subvol;
1509 struct btree_trans_commit_hook *h;
1513 subvol = bch2_bkey_get_iter_typed(trans, &iter,
1514 BTREE_ID_subvolumes, POS(0, subvolid),
1515 BTREE_ITER_CACHED|BTREE_ITER_INTENT,
1517 ret = bkey_err(subvol);
1518 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
1519 "missing subvolume %u", subvolid);
1523 snapid = le32_to_cpu(subvol.v->snapshot);
1525 ret = bch2_btree_delete_at(trans, &iter, 0);
1529 ret = bch2_snapshot_node_set_deleted(trans, snapid);
1533 h = bch2_trans_kmalloc(trans, sizeof(*h));
1534 ret = PTR_ERR_OR_ZERO(h);
1538 h->fn = bch2_delete_dead_snapshots_hook;
1539 bch2_trans_commit_hook(trans, h);
1541 bch2_trans_iter_exit(trans, &iter);
1545 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
1547 return bch2_subvolumes_reparent(trans, subvolid) ?:
1548 commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
1549 __bch2_subvolume_delete(trans, subvolid));
1552 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
1554 struct bch_fs *c = container_of(work, struct bch_fs,
1555 snapshot_wait_for_pagecache_and_delete_work);
1561 mutex_lock(&c->snapshots_unlinked_lock);
1562 s = c->snapshots_unlinked;
1563 darray_init(&c->snapshots_unlinked);
1564 mutex_unlock(&c->snapshots_unlinked_lock);
1569 bch2_evict_subvolume_inodes(c, &s);
1571 for (id = s.data; id < s.data + s.nr; id++) {
1572 ret = bch2_trans_run(c, bch2_subvolume_delete(&trans, *id));
1574 bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
1582 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
1585 struct subvolume_unlink_hook {
1586 struct btree_trans_commit_hook h;
1590 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
1591 struct btree_trans_commit_hook *_h)
1593 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
1594 struct bch_fs *c = trans->c;
1597 mutex_lock(&c->snapshots_unlinked_lock);
1598 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
1599 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
1600 mutex_unlock(&c->snapshots_unlinked_lock);
1605 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
1608 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
1609 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
1613 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
1615 struct btree_iter iter;
1616 struct bkey_i_subvolume *n;
1617 struct subvolume_unlink_hook *h;
1620 h = bch2_trans_kmalloc(trans, sizeof(*h));
1621 ret = PTR_ERR_OR_ZERO(h);
1625 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook;
1626 h->subvol = subvolid;
1627 bch2_trans_commit_hook(trans, &h->h);
1629 n = bch2_bkey_get_mut_typed(trans, &iter,
1630 BTREE_ID_subvolumes, POS(0, subvolid),
1631 BTREE_ITER_CACHED, subvolume);
1632 ret = PTR_ERR_OR_ZERO(n);
1633 if (unlikely(ret)) {
1634 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
1635 "missing subvolume %u", subvolid);
1639 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
1640 bch2_trans_iter_exit(trans, &iter);
1644 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
1647 u32 *new_snapshotid,
1650 struct bch_fs *c = trans->c;
1651 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
1652 struct bkey_i_subvolume *new_subvol = NULL;
1653 struct bkey_i_subvolume *src_subvol = NULL;
1654 u32 parent = 0, new_nodes[2], snapshot_subvols[2];
1657 ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
1658 BTREE_ID_subvolumes, POS(0, U32_MAX));
1659 if (ret == -BCH_ERR_ENOSPC_btree_slot)
1660 ret = -BCH_ERR_ENOSPC_subvolume_create;
1664 snapshot_subvols[0] = dst_iter.pos.offset;
1665 snapshot_subvols[1] = src_subvolid;
1668 /* Creating a snapshot: */
1670 src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
1671 BTREE_ID_subvolumes, POS(0, src_subvolid),
1672 BTREE_ITER_CACHED, subvolume);
1673 ret = PTR_ERR_OR_ZERO(src_subvol);
1674 if (unlikely(ret)) {
1675 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1676 "subvolume %u not found", src_subvolid);
1680 parent = le32_to_cpu(src_subvol->v.snapshot);
1683 ret = bch2_snapshot_node_create(trans, parent, new_nodes,
1685 src_subvolid ? 2 : 1);
1690 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
1691 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
1696 new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
1697 ret = PTR_ERR_OR_ZERO(new_subvol);
1701 new_subvol->v.flags = 0;
1702 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
1703 new_subvol->v.inode = cpu_to_le64(inode);
1704 new_subvol->v.parent = cpu_to_le32(src_subvolid);
1705 new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
1706 new_subvol->v.otime.hi = 0;
1708 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
1709 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
1711 *new_subvolid = new_subvol->k.p.offset;
1712 *new_snapshotid = new_nodes[0];
1714 bch2_trans_iter_exit(trans, &src_iter);
1715 bch2_trans_iter_exit(trans, &dst_iter);
1719 int bch2_fs_subvolumes_init(struct bch_fs *c)
1721 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
1722 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
1723 bch2_subvolume_wait_for_pagecache_and_delete);
1724 mutex_init(&c->snapshots_unlinked_lock);