1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_key_cache.h"
5 #include "btree_update.h"
11 #include <linux/random.h>
13 static int bch2_subvolume_delete(struct btree_trans *, u32);
15 static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
17 const struct snapshot_t *s = __snapshot_t(t, id);
19 if (s->skip[2] <= ancestor)
21 if (s->skip[1] <= ancestor)
23 if (s->skip[0] <= ancestor)
28 bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
30 struct snapshot_table *t;
33 EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
36 t = rcu_dereference(c->snapshots);
38 while (id && id < ancestor - IS_ANCESTOR_BITMAP)
39 id = get_ancestor_below(t, id, ancestor);
41 ret = id && id < ancestor
42 ? test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor)
49 static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
51 struct snapshot_table *t;
54 t = rcu_dereference(c->snapshots);
56 while (id && id < ancestor)
57 id = __snapshot_t(t, id)->parent;
60 return id == ancestor;
63 static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
68 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
74 static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
76 size_t idx = U32_MAX - id;
78 struct snapshot_table *new, *old;
80 new_size = max(16UL, roundup_pow_of_two(idx + 1));
82 new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
89 rcu_dereference_protected(c->snapshots, true)->s,
90 sizeof(new->s[0]) * c->snapshot_table_size);
92 rcu_assign_pointer(c->snapshots, new);
93 c->snapshot_table_size = new_size;
97 return &rcu_dereference_protected(c->snapshots, true)->s[idx];
100 static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
102 size_t idx = U32_MAX - id;
104 lockdep_assert_held(&c->snapshot_table_lock);
106 if (likely(idx < c->snapshot_table_size))
107 return &rcu_dereference_protected(c->snapshots, true)->s[idx];
109 return __snapshot_t_mut(c, id);
114 void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
117 struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
119 prt_printf(out, "subvol %u root snapshot %u",
120 le32_to_cpu(t.v->master_subvol),
121 le32_to_cpu(t.v->root_snapshot));
124 int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k,
125 enum bkey_invalid_flags flags,
126 struct printbuf *err)
128 if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
129 bkey_lt(k.k->p, POS(0, 1))) {
130 prt_printf(err, "bad pos");
131 return -BCH_ERR_invalid_bkey;
137 int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
138 struct bch_snapshot_tree *s)
140 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
141 BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
143 if (bch2_err_matches(ret, ENOENT))
144 ret = -BCH_ERR_ENOENT_snapshot_tree;
148 static struct bkey_i_snapshot_tree *
149 __snapshot_tree_create(struct btree_trans *trans)
151 struct btree_iter iter;
152 int ret = bch2_bkey_get_empty_slot(trans, &iter,
153 BTREE_ID_snapshot_trees, POS(0, U32_MAX));
154 struct bkey_i_snapshot_tree *s_t;
156 if (ret == -BCH_ERR_ENOSPC_btree_slot)
157 ret = -BCH_ERR_ENOSPC_snapshot_tree;
161 s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
162 ret = PTR_ERR_OR_ZERO(s_t);
163 bch2_trans_iter_exit(trans, &iter);
164 return ret ? ERR_PTR(ret) : s_t;
167 static int snapshot_tree_create(struct btree_trans *trans,
168 u32 root_id, u32 subvol_id, u32 *tree_id)
170 struct bkey_i_snapshot_tree *n_tree =
171 __snapshot_tree_create(trans);
174 return PTR_ERR(n_tree);
176 n_tree->v.master_subvol = cpu_to_le32(subvol_id);
177 n_tree->v.root_snapshot = cpu_to_le32(root_id);
178 *tree_id = n_tree->k.p.offset;
182 /* Snapshot nodes: */
184 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
187 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
189 prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
190 BCH_SNAPSHOT_SUBVOL(s.v),
191 BCH_SNAPSHOT_DELETED(s.v),
192 le32_to_cpu(s.v->parent),
193 le32_to_cpu(s.v->children[0]),
194 le32_to_cpu(s.v->children[1]),
195 le32_to_cpu(s.v->subvol),
196 le32_to_cpu(s.v->tree));
198 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth))
199 prt_printf(out, " depth %u skiplist %u %u %u",
200 le32_to_cpu(s.v->depth),
201 le32_to_cpu(s.v->skip[0]),
202 le32_to_cpu(s.v->skip[1]),
203 le32_to_cpu(s.v->skip[2]));
206 int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
207 enum bkey_invalid_flags flags,
208 struct printbuf *err)
210 struct bkey_s_c_snapshot s;
213 if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
214 bkey_lt(k.k->p, POS(0, 1))) {
215 prt_printf(err, "bad pos");
216 return -BCH_ERR_invalid_bkey;
219 s = bkey_s_c_to_snapshot(k);
221 id = le32_to_cpu(s.v->parent);
222 if (id && id <= k.k->p.offset) {
223 prt_printf(err, "bad parent node (%u <= %llu)",
225 return -BCH_ERR_invalid_bkey;
228 if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
229 prt_printf(err, "children not normalized");
230 return -BCH_ERR_invalid_bkey;
233 if (s.v->children[0] &&
234 s.v->children[0] == s.v->children[1]) {
235 prt_printf(err, "duplicate child nodes");
236 return -BCH_ERR_invalid_bkey;
239 for (i = 0; i < 2; i++) {
240 id = le32_to_cpu(s.v->children[i]);
242 if (id >= k.k->p.offset) {
243 prt_printf(err, "bad child node (%u >= %llu)",
245 return -BCH_ERR_invalid_bkey;
249 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
250 if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
251 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
252 prt_printf(err, "skiplist not normalized");
253 return -BCH_ERR_invalid_bkey;
256 for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
257 id = le32_to_cpu(s.v->skip[i]);
259 if (!id != !s.v->parent ||
261 id <= k.k->p.offset)) {
262 prt_printf(err, "bad skiplist node %u)", id);
263 return -BCH_ERR_invalid_bkey;
271 int bch2_mark_snapshot(struct btree_trans *trans,
272 enum btree_id btree, unsigned level,
273 struct bkey_s_c old, struct bkey_s_c new,
276 struct bch_fs *c = trans->c;
277 struct snapshot_t *t;
278 u32 id = new.k->p.offset;
281 mutex_lock(&c->snapshot_table_lock);
283 t = snapshot_t_mut(c, id);
285 ret = -BCH_ERR_ENOMEM_mark_snapshot;
289 if (new.k->type == KEY_TYPE_snapshot) {
290 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
293 t->parent = le32_to_cpu(s.v->parent);
294 t->children[0] = le32_to_cpu(s.v->children[0]);
295 t->children[1] = le32_to_cpu(s.v->children[1]);
296 t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
297 t->tree = le32_to_cpu(s.v->tree);
299 if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) {
300 t->depth = le32_to_cpu(s.v->depth);
301 t->skip[0] = le32_to_cpu(s.v->skip[0]);
302 t->skip[1] = le32_to_cpu(s.v->skip[1]);
303 t->skip[2] = le32_to_cpu(s.v->skip[2]);
311 while ((parent = bch2_snapshot_parent_early(c, parent)) &&
312 parent - id - 1 < IS_ANCESTOR_BITMAP)
313 __set_bit(parent - id - 1, t->is_ancestor);
315 if (BCH_SNAPSHOT_DELETED(s.v)) {
316 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
317 c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots);
320 memset(t, 0, sizeof(*t));
323 mutex_unlock(&c->snapshot_table_lock);
327 static int snapshot_lookup(struct btree_trans *trans, u32 id,
328 struct bch_snapshot *s)
330 return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
331 BTREE_ITER_WITH_UPDATES, snapshot, s);
334 static int snapshot_live(struct btree_trans *trans, u32 id)
336 struct bch_snapshot v;
342 ret = snapshot_lookup(trans, id, &v);
343 if (bch2_err_matches(ret, ENOENT))
344 bch_err(trans->c, "snapshot node %u not found", id);
348 return !BCH_SNAPSHOT_DELETED(&v);
351 static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
353 struct bch_fs *c = trans->c;
354 unsigned i, nr_live = 0, live_idx = 0;
355 struct bkey_s_c_snapshot snap;
356 u32 id = k.k->p.offset, child[2];
358 if (k.k->type != KEY_TYPE_snapshot)
361 snap = bkey_s_c_to_snapshot(k);
363 child[0] = le32_to_cpu(snap.v->children[0]);
364 child[1] = le32_to_cpu(snap.v->children[1]);
366 for (i = 0; i < 2; i++) {
367 int ret = snapshot_live(trans, child[i]);
377 mutex_lock(&c->snapshot_table_lock);
379 snapshot_t_mut(c, id)->equiv = nr_live == 1
380 ? snapshot_t_mut(c, child[live_idx])->equiv
383 mutex_unlock(&c->snapshot_table_lock);
390 static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
392 return snapshot_t(c, id)->children[child];
395 static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
397 return bch2_snapshot_child(c, id, 0);
400 static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
402 return bch2_snapshot_child(c, id, 1);
405 static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
409 n = bch2_snapshot_left_child(c, id);
413 while ((parent = bch2_snapshot_parent(c, id))) {
414 n = bch2_snapshot_right_child(c, parent);
423 static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
425 u32 id = snapshot_root;
429 s = snapshot_t(c, id)->subvol;
431 if (s && (!subvol || s < subvol))
434 id = bch2_snapshot_tree_next(c, id);
440 static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
441 u32 snapshot_root, u32 *subvol_id)
443 struct bch_fs *c = trans->c;
444 struct btree_iter iter;
446 struct bkey_s_c_subvolume s;
450 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
452 if (k.k->type != KEY_TYPE_subvolume)
455 s = bkey_s_c_to_subvolume(k);
456 if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
458 if (!BCH_SUBVOLUME_SNAP(s.v)) {
459 *subvol_id = s.k->p.offset;
465 bch2_trans_iter_exit(trans, &iter);
467 if (!ret && !found) {
468 struct bkey_i_subvolume *s;
470 *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
472 s = bch2_bkey_get_mut_typed(trans, &iter,
473 BTREE_ID_subvolumes, POS(0, *subvol_id),
475 ret = PTR_ERR_OR_ZERO(s);
479 SET_BCH_SUBVOLUME_SNAP(&s->v, false);
485 static int check_snapshot_tree(struct btree_trans *trans,
486 struct btree_iter *iter,
489 struct bch_fs *c = trans->c;
490 struct bkey_s_c_snapshot_tree st;
491 struct bch_snapshot s;
492 struct bch_subvolume subvol;
493 struct printbuf buf = PRINTBUF;
497 if (k.k->type != KEY_TYPE_snapshot_tree)
500 st = bkey_s_c_to_snapshot_tree(k);
501 root_id = le32_to_cpu(st.v->root_snapshot);
503 ret = snapshot_lookup(trans, root_id, &s);
504 if (ret && !bch2_err_matches(ret, ENOENT))
507 if (fsck_err_on(ret ||
508 root_id != bch2_snapshot_root(c, root_id) ||
509 st.k->p.offset != le32_to_cpu(s.tree),
511 "snapshot tree points to missing/incorrect snapshot:\n %s",
512 (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
513 ret = bch2_btree_delete_at(trans, iter, 0);
517 ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
519 if (ret && !bch2_err_matches(ret, ENOENT))
522 if (fsck_err_on(ret, c,
523 "snapshot tree points to missing subvolume:\n %s",
524 (printbuf_reset(&buf),
525 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
526 fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
527 le32_to_cpu(subvol.snapshot),
529 "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
530 (printbuf_reset(&buf),
531 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
532 fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c,
533 "snapshot tree points to snapshot subvolume:\n %s",
534 (printbuf_reset(&buf),
535 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
536 struct bkey_i_snapshot_tree *u;
539 ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
543 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
544 ret = PTR_ERR_OR_ZERO(u);
548 u->v.master_subvol = cpu_to_le32(subvol_id);
549 st = snapshot_tree_i_to_s_c(u);
558 * For each snapshot_tree, make sure it points to the root of a snapshot tree
559 * and that snapshot entry points back to it, or delete it.
561 * And, make sure it points to a subvolume within that snapshot tree, or correct
562 * it to point to the oldest subvolume within that snapshot tree.
564 int bch2_check_snapshot_trees(struct bch_fs *c)
566 struct btree_iter iter;
570 ret = bch2_trans_run(c,
571 for_each_btree_key_commit(&trans, iter,
572 BTREE_ID_snapshot_trees, POS_MIN,
573 BTREE_ITER_PREFETCH, k,
574 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
575 check_snapshot_tree(&trans, &iter, k)));
578 bch_err(c, "error %i checking snapshot trees", ret);
583 * Look up snapshot tree for @tree_id and find root,
584 * make sure @snap_id is a descendent:
586 static int snapshot_tree_ptr_good(struct btree_trans *trans,
587 u32 snap_id, u32 tree_id)
589 struct bch_snapshot_tree s_t;
590 int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
592 if (bch2_err_matches(ret, ENOENT))
597 return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
600 static u32 snapshot_skiplist_get(struct bch_fs *c, u32 id)
602 const struct snapshot_t *s;
608 s = snapshot_t(c, id);
610 id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
616 static int snapshot_skiplist_good(struct btree_trans *trans, struct bch_snapshot s)
618 struct bch_snapshot a;
622 for (i = 0; i < 3; i++) {
623 if (!s.parent != !s.skip[i])
629 ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a);
630 if (bch2_err_matches(ret, ENOENT))
635 if (a.tree != s.tree)
643 * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
644 * its snapshot_tree pointer is correct (allocate new one if necessary), then
645 * update this node's pointer to root node's pointer:
647 static int snapshot_tree_ptr_repair(struct btree_trans *trans,
648 struct btree_iter *iter,
650 struct bch_snapshot *s)
652 struct bch_fs *c = trans->c;
653 struct btree_iter root_iter;
654 struct bch_snapshot_tree s_t;
655 struct bkey_s_c_snapshot root;
656 struct bkey_i_snapshot *u;
657 u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
660 root = bch2_bkey_get_iter_typed(trans, &root_iter,
661 BTREE_ID_snapshots, POS(0, root_id),
662 BTREE_ITER_WITH_UPDATES, snapshot);
663 ret = bkey_err(root);
667 tree_id = le32_to_cpu(root.v->tree);
669 ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
670 if (ret && !bch2_err_matches(ret, ENOENT))
673 if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
674 u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
675 ret = PTR_ERR_OR_ZERO(u) ?:
676 snapshot_tree_create(trans, root_id,
677 bch2_snapshot_tree_oldest_subvol(c, root_id),
682 u->v.tree = cpu_to_le32(tree_id);
683 if (k.k->p.offset == root_id)
687 if (k.k->p.offset != root_id) {
688 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
689 ret = PTR_ERR_OR_ZERO(u);
693 u->v.tree = cpu_to_le32(tree_id);
697 bch2_trans_iter_exit(trans, &root_iter);
701 static int check_snapshot(struct btree_trans *trans,
702 struct btree_iter *iter,
705 struct bch_fs *c = trans->c;
706 struct bch_snapshot s;
707 struct bch_subvolume subvol;
708 struct bch_snapshot v;
709 struct bkey_i_snapshot *u;
710 u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
712 struct printbuf buf = PRINTBUF;
713 bool should_have_subvol;
717 if (k.k->type != KEY_TYPE_snapshot)
720 memset(&s, 0, sizeof(s));
721 memcpy(&s, k.v, bkey_val_bytes(k.k));
723 id = le32_to_cpu(s.parent);
725 ret = snapshot_lookup(trans, id, &v);
726 if (bch2_err_matches(ret, ENOENT))
727 bch_err(c, "snapshot with nonexistent parent:\n %s",
728 (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
732 if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
733 le32_to_cpu(v.children[1]) != k.k->p.offset) {
734 bch_err(c, "snapshot parent %u missing pointer to child %llu",
741 for (i = 0; i < 2 && s.children[i]; i++) {
742 id = le32_to_cpu(s.children[i]);
744 ret = snapshot_lookup(trans, id, &v);
745 if (bch2_err_matches(ret, ENOENT))
746 bch_err(c, "snapshot node %llu has nonexistent child %u",
751 if (le32_to_cpu(v.parent) != k.k->p.offset) {
752 bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
753 id, le32_to_cpu(v.parent), k.k->p.offset);
759 should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
760 !BCH_SNAPSHOT_DELETED(&s);
762 if (should_have_subvol) {
763 id = le32_to_cpu(s.subvol);
764 ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
765 if (bch2_err_matches(ret, ENOENT))
766 bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
767 (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
771 if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
772 bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
778 if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s",
779 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
780 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
781 ret = PTR_ERR_OR_ZERO(u);
790 ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
794 if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s",
795 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
796 ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
802 real_depth = bch2_snapshot_depth(c, parent_id);
804 if (le32_to_cpu(s.depth) != real_depth &&
805 (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
806 fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s",
807 real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
808 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
809 ret = PTR_ERR_OR_ZERO(u);
813 u->v.depth = cpu_to_le32(real_depth);
817 ret = snapshot_skiplist_good(trans, s);
822 (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
823 fsck_err(c, "snapshot with bad skiplist field:\n %s",
824 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
825 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
826 ret = PTR_ERR_OR_ZERO(u);
830 for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
831 u->v.skip[i] = cpu_to_le32(snapshot_skiplist_get(c, parent_id));
833 bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_int);
843 int bch2_check_snapshots(struct bch_fs *c)
845 struct btree_iter iter;
850 * We iterate backwards as checking/fixing the depth field requires that
851 * the parent's depth already be correct:
853 ret = bch2_trans_run(c,
854 for_each_btree_key_reverse_commit(&trans, iter,
855 BTREE_ID_snapshots, POS_MAX,
856 BTREE_ITER_PREFETCH, k,
857 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
858 check_snapshot(&trans, &iter, k)));
864 static int check_subvol(struct btree_trans *trans,
865 struct btree_iter *iter,
868 struct bch_fs *c = trans->c;
869 struct bkey_s_c_subvolume subvol;
870 struct bch_snapshot snapshot;
874 if (k.k->type != KEY_TYPE_subvolume)
877 subvol = bkey_s_c_to_subvolume(k);
878 snapid = le32_to_cpu(subvol.v->snapshot);
879 ret = snapshot_lookup(trans, snapid, &snapshot);
881 if (bch2_err_matches(ret, ENOENT))
882 bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
883 k.k->p.offset, snapid);
887 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
890 ret = bch2_subvolume_delete(trans, iter->pos.offset);
892 bch_err(c, "error deleting subvolume %llu: %s",
893 iter->pos.offset, bch2_err_str(ret));
894 return ret ?: -BCH_ERR_transaction_restart_nested;
897 if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
898 u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
900 struct bch_snapshot_tree st;
903 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
906 ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
908 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
909 "%s: snapshot tree %u not found", __func__, snapshot_tree);
914 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c,
915 "subvolume %llu is not set as snapshot but is not master subvolume",
917 struct bkey_i_subvolume *s =
918 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
919 ret = PTR_ERR_OR_ZERO(s);
923 SET_BCH_SUBVOLUME_SNAP(&s->v, true);
931 int bch2_check_subvols(struct bch_fs *c)
933 struct btree_iter iter;
937 ret = bch2_trans_run(c,
938 for_each_btree_key_commit(&trans, iter,
939 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
940 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
941 check_subvol(&trans, &iter, k)));
947 void bch2_fs_snapshots_exit(struct bch_fs *c)
952 int bch2_snapshots_read(struct bch_fs *c)
954 struct btree_iter iter;
958 ret = bch2_trans_run(c,
959 for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
961 bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
962 bch2_snapshot_set_equiv(&trans, k)));
969 * Mark a snapshot as deleted, for future cleanup:
971 static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
973 struct btree_iter iter;
974 struct bkey_i_snapshot *s;
977 s = bch2_bkey_get_mut_typed(trans, &iter,
978 BTREE_ID_snapshots, POS(0, id),
980 ret = PTR_ERR_OR_ZERO(s);
982 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
983 trans->c, "missing snapshot %u", id);
987 /* already deleted? */
988 if (BCH_SNAPSHOT_DELETED(&s->v))
991 SET_BCH_SNAPSHOT_DELETED(&s->v, true);
992 SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
995 bch2_trans_iter_exit(trans, &iter);
999 static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
1001 struct bch_fs *c = trans->c;
1002 struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
1003 struct btree_iter tree_iter = (struct btree_iter) { NULL };
1004 struct bkey_s_c_snapshot s;
1009 s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
1010 BTREE_ITER_INTENT, snapshot);
1012 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1013 "missing snapshot %u", id);
1018 BUG_ON(!BCH_SNAPSHOT_DELETED(s.v));
1019 parent_id = le32_to_cpu(s.v->parent);
1022 struct bkey_i_snapshot *parent;
1024 parent = bch2_bkey_get_mut_typed(trans, &p_iter,
1025 BTREE_ID_snapshots, POS(0, parent_id),
1027 ret = PTR_ERR_OR_ZERO(parent);
1028 if (unlikely(ret)) {
1029 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1030 "missing snapshot %u", parent_id);
1034 for (i = 0; i < 2; i++)
1035 if (le32_to_cpu(parent->v.children[i]) == id)
1039 bch_err(c, "snapshot %u missing child pointer to %u",
1042 parent->v.children[i] = 0;
1044 if (le32_to_cpu(parent->v.children[0]) <
1045 le32_to_cpu(parent->v.children[1]))
1046 swap(parent->v.children[0],
1047 parent->v.children[1]);
1050 * We're deleting the root of a snapshot tree: update the
1051 * snapshot_tree entry to point to the new root, or delete it if
1052 * this is the last snapshot ID in this tree:
1054 struct bkey_i_snapshot_tree *s_t;
1056 BUG_ON(s.v->children[1]);
1058 s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
1059 BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
1061 ret = PTR_ERR_OR_ZERO(s_t);
1065 if (s.v->children[0]) {
1066 s_t->v.root_snapshot = s.v->children[0];
1068 s_t->k.type = KEY_TYPE_deleted;
1069 set_bkey_val_u64s(&s_t->k, 0);
1073 ret = bch2_btree_delete_at(trans, &iter, 0);
1075 bch2_trans_iter_exit(trans, &tree_iter);
1076 bch2_trans_iter_exit(trans, &p_iter);
1077 bch2_trans_iter_exit(trans, &iter);
1081 static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
1083 u32 *snapshot_subvols,
1084 unsigned nr_snapids)
1086 struct bch_fs *c = trans->c;
1087 struct btree_iter iter;
1088 struct bkey_i_snapshot *n;
1091 u32 depth = bch2_snapshot_depth(c, parent);
1094 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
1095 POS_MIN, BTREE_ITER_INTENT);
1096 k = bch2_btree_iter_peek(&iter);
1101 for (i = 0; i < nr_snapids; i++) {
1102 k = bch2_btree_iter_prev_slot(&iter);
1107 if (!k.k || !k.k->p.offset) {
1108 ret = -BCH_ERR_ENOSPC_snapshot_create;
1112 n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
1113 ret = PTR_ERR_OR_ZERO(n);
1118 n->v.parent = cpu_to_le32(parent);
1119 n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
1120 n->v.tree = cpu_to_le32(tree);
1121 n->v.depth = cpu_to_le32(depth);
1123 for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
1124 n->v.skip[j] = cpu_to_le32(snapshot_skiplist_get(c, parent));
1126 bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_int);
1127 SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
1129 ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
1130 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
1134 new_snapids[i] = iter.pos.offset;
1137 bch2_trans_iter_exit(trans, &iter);
1142 * Create new snapshot IDs as children of an existing snapshot ID:
1144 static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
1146 u32 *snapshot_subvols,
1147 unsigned nr_snapids)
1149 struct btree_iter iter;
1150 struct bkey_i_snapshot *n_parent;
1153 n_parent = bch2_bkey_get_mut_typed(trans, &iter,
1154 BTREE_ID_snapshots, POS(0, parent),
1156 ret = PTR_ERR_OR_ZERO(n_parent);
1157 if (unlikely(ret)) {
1158 if (bch2_err_matches(ret, ENOENT))
1159 bch_err(trans->c, "snapshot %u not found", parent);
1163 if (n_parent->v.children[0] || n_parent->v.children[1]) {
1164 bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
1169 ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
1170 new_snapids, snapshot_subvols, nr_snapids);
1174 n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
1175 n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
1176 n_parent->v.subvol = 0;
1177 SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
1179 bch2_trans_iter_exit(trans, &iter);
1184 * Create a snapshot node that is the root of a new tree:
1186 static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
1188 u32 *snapshot_subvols,
1189 unsigned nr_snapids)
1191 struct bkey_i_snapshot_tree *n_tree;
1194 n_tree = __snapshot_tree_create(trans);
1195 ret = PTR_ERR_OR_ZERO(n_tree) ?:
1196 create_snapids(trans, 0, n_tree->k.p.offset,
1197 new_snapids, snapshot_subvols, nr_snapids);
1201 n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]);
1202 n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]);
1206 int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
1208 u32 *snapshot_subvols,
1209 unsigned nr_snapids)
1211 BUG_ON((parent == 0) != (nr_snapids == 1));
1212 BUG_ON((parent != 0) != (nr_snapids == 2));
1215 ? bch2_snapshot_node_create_children(trans, parent,
1216 new_snapids, snapshot_subvols, nr_snapids)
1217 : bch2_snapshot_node_create_tree(trans,
1218 new_snapids, snapshot_subvols, nr_snapids);
1222 static int snapshot_delete_key(struct btree_trans *trans,
1223 struct btree_iter *iter,
1225 snapshot_id_list *deleted,
1226 snapshot_id_list *equiv_seen,
1227 struct bpos *last_pos)
1229 struct bch_fs *c = trans->c;
1230 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
1232 if (!bkey_eq(k.k->p, *last_pos))
1236 if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
1237 snapshot_list_has_id(equiv_seen, equiv)) {
1238 return bch2_btree_delete_at(trans, iter,
1239 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1241 return snapshot_list_add(c, equiv_seen, equiv);
1245 static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
1248 struct bkey_s_c_snapshot snap;
1252 if (k.k->type != KEY_TYPE_snapshot)
1255 snap = bkey_s_c_to_snapshot(k);
1256 if (BCH_SNAPSHOT_DELETED(snap.v) ||
1257 BCH_SNAPSHOT_SUBVOL(snap.v))
1260 children[0] = le32_to_cpu(snap.v->children[0]);
1261 children[1] = le32_to_cpu(snap.v->children[1]);
1263 ret = snapshot_live(trans, children[0]) ?:
1264 snapshot_live(trans, children[1]);
1269 return bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
1273 int bch2_delete_dead_snapshots(struct bch_fs *c)
1275 struct btree_trans trans;
1276 struct btree_iter iter;
1278 struct bkey_s_c_snapshot snap;
1279 snapshot_id_list deleted = { 0 };
1283 if (!test_bit(BCH_FS_STARTED, &c->flags)) {
1284 ret = bch2_fs_read_write_early(c);
1286 bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
1291 bch2_trans_init(&trans, c, 0, 0);
1294 * For every snapshot node: If we have no live children and it's not
1295 * pointed to by a subvolume, delete it:
1297 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
1300 bch2_delete_redundant_snapshot(&trans, &iter, k));
1302 bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
1306 for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
1308 bch2_snapshot_set_equiv(&trans, k));
1310 bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
1314 for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
1315 POS_MIN, 0, k, ret) {
1316 if (k.k->type != KEY_TYPE_snapshot)
1319 snap = bkey_s_c_to_snapshot(k);
1320 if (BCH_SNAPSHOT_DELETED(snap.v)) {
1321 ret = snapshot_list_add(c, &deleted, k.k->p.offset);
1326 bch2_trans_iter_exit(&trans, &iter);
1329 bch_err(c, "error walking snapshots: %s", bch2_err_str(ret));
1333 for (id = 0; id < BTREE_ID_NR; id++) {
1334 struct bpos last_pos = POS_MIN;
1335 snapshot_id_list equiv_seen = { 0 };
1337 if (!btree_type_has_snapshots(id))
1340 ret = for_each_btree_key_commit(&trans, iter,
1342 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1343 NULL, NULL, BTREE_INSERT_NOFAIL,
1344 snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos));
1346 darray_exit(&equiv_seen);
1349 bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret));
1354 for (i = 0; i < deleted.nr; i++) {
1355 ret = commit_do(&trans, NULL, NULL, 0,
1356 bch2_snapshot_node_delete(&trans, deleted.data[i]));
1358 bch_err(c, "error deleting snapshot %u: %s",
1359 deleted.data[i], bch2_err_str(ret));
1364 clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
1366 darray_exit(&deleted);
1367 bch2_trans_exit(&trans);
1373 static void bch2_delete_dead_snapshots_work(struct work_struct *work)
1375 struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
1377 if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
1378 bch2_delete_dead_snapshots(c);
1379 bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
1382 void bch2_delete_dead_snapshots_async(struct bch_fs *c)
1384 if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
1385 !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
1386 bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
1389 static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
1390 struct btree_trans_commit_hook *h)
1392 struct bch_fs *c = trans->c;
1394 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
1396 if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots)
1399 bch2_delete_dead_snapshots_async(c);
1405 int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
1406 unsigned flags, struct printbuf *err)
1408 if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
1409 bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
1410 prt_printf(err, "invalid pos");
1411 return -BCH_ERR_invalid_bkey;
1417 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
1420 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
1422 prt_printf(out, "root %llu snapshot id %u",
1423 le64_to_cpu(s.v->inode),
1424 le32_to_cpu(s.v->snapshot));
1426 if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
1427 prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
1430 static __always_inline int
1431 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
1432 bool inconsistent_if_not_found,
1434 struct bch_subvolume *s)
1436 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
1437 iter_flags, subvolume, s);
1438 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
1439 inconsistent_if_not_found,
1440 trans->c, "missing subvolume %u", subvol);
1444 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
1445 bool inconsistent_if_not_found,
1447 struct bch_subvolume *s)
1449 return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
1452 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
1453 struct bch_subvolume *subvol)
1455 struct bch_snapshot snap;
1457 return snapshot_lookup(trans, snapshot, &snap) ?:
1458 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
1461 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol,
1464 struct btree_iter iter;
1468 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol),
1470 BTREE_ITER_WITH_UPDATES);
1471 ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -BCH_ERR_ENOENT_subvolume;
1474 *snapid = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot);
1475 else if (bch2_err_matches(ret, ENOENT))
1476 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol);
1477 bch2_trans_iter_exit(trans, &iter);
1481 static int bch2_subvolume_reparent(struct btree_trans *trans,
1482 struct btree_iter *iter,
1484 u32 old_parent, u32 new_parent)
1486 struct bkey_i_subvolume *s;
1489 if (k.k->type != KEY_TYPE_subvolume)
1492 if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
1493 le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
1496 s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
1497 ret = PTR_ERR_OR_ZERO(s);
1501 s->v.parent = cpu_to_le32(new_parent);
1506 * Scan for subvolumes with parent @subvolid_to_delete, reparent:
1508 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete)
1510 struct btree_iter iter;
1512 struct bch_subvolume s;
1514 return lockrestart_do(trans,
1515 bch2_subvolume_get(trans, subvolid_to_delete, true,
1516 BTREE_ITER_CACHED, &s)) ?:
1517 for_each_btree_key_commit(trans, iter,
1518 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
1519 NULL, NULL, BTREE_INSERT_NOFAIL,
1520 bch2_subvolume_reparent(trans, &iter, k,
1521 subvolid_to_delete, le32_to_cpu(s.parent)));
1525 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
1528 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
1530 struct btree_iter iter;
1531 struct bkey_s_c_subvolume subvol;
1532 struct btree_trans_commit_hook *h;
1536 subvol = bch2_bkey_get_iter_typed(trans, &iter,
1537 BTREE_ID_subvolumes, POS(0, subvolid),
1538 BTREE_ITER_CACHED|BTREE_ITER_INTENT,
1540 ret = bkey_err(subvol);
1541 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
1542 "missing subvolume %u", subvolid);
1546 snapid = le32_to_cpu(subvol.v->snapshot);
1548 ret = bch2_btree_delete_at(trans, &iter, 0);
1552 ret = bch2_snapshot_node_set_deleted(trans, snapid);
1556 h = bch2_trans_kmalloc(trans, sizeof(*h));
1557 ret = PTR_ERR_OR_ZERO(h);
1561 h->fn = bch2_delete_dead_snapshots_hook;
1562 bch2_trans_commit_hook(trans, h);
1564 bch2_trans_iter_exit(trans, &iter);
1568 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
1570 return bch2_subvolumes_reparent(trans, subvolid) ?:
1571 commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
1572 __bch2_subvolume_delete(trans, subvolid));
1575 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
1577 struct bch_fs *c = container_of(work, struct bch_fs,
1578 snapshot_wait_for_pagecache_and_delete_work);
1584 mutex_lock(&c->snapshots_unlinked_lock);
1585 s = c->snapshots_unlinked;
1586 darray_init(&c->snapshots_unlinked);
1587 mutex_unlock(&c->snapshots_unlinked_lock);
1592 bch2_evict_subvolume_inodes(c, &s);
1594 for (id = s.data; id < s.data + s.nr; id++) {
1595 ret = bch2_trans_run(c, bch2_subvolume_delete(&trans, *id));
1597 bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
1605 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
1608 struct subvolume_unlink_hook {
1609 struct btree_trans_commit_hook h;
1613 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
1614 struct btree_trans_commit_hook *_h)
1616 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
1617 struct bch_fs *c = trans->c;
1620 mutex_lock(&c->snapshots_unlinked_lock);
1621 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
1622 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
1623 mutex_unlock(&c->snapshots_unlinked_lock);
1628 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
1631 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
1632 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
1636 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
1638 struct btree_iter iter;
1639 struct bkey_i_subvolume *n;
1640 struct subvolume_unlink_hook *h;
1643 h = bch2_trans_kmalloc(trans, sizeof(*h));
1644 ret = PTR_ERR_OR_ZERO(h);
1648 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook;
1649 h->subvol = subvolid;
1650 bch2_trans_commit_hook(trans, &h->h);
1652 n = bch2_bkey_get_mut_typed(trans, &iter,
1653 BTREE_ID_subvolumes, POS(0, subvolid),
1654 BTREE_ITER_CACHED, subvolume);
1655 ret = PTR_ERR_OR_ZERO(n);
1656 if (unlikely(ret)) {
1657 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
1658 "missing subvolume %u", subvolid);
1662 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
1663 bch2_trans_iter_exit(trans, &iter);
1667 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
1670 u32 *new_snapshotid,
1673 struct bch_fs *c = trans->c;
1674 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
1675 struct bkey_i_subvolume *new_subvol = NULL;
1676 struct bkey_i_subvolume *src_subvol = NULL;
1677 u32 parent = 0, new_nodes[2], snapshot_subvols[2];
1680 ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
1681 BTREE_ID_subvolumes, POS(0, U32_MAX));
1682 if (ret == -BCH_ERR_ENOSPC_btree_slot)
1683 ret = -BCH_ERR_ENOSPC_subvolume_create;
1687 snapshot_subvols[0] = dst_iter.pos.offset;
1688 snapshot_subvols[1] = src_subvolid;
1691 /* Creating a snapshot: */
1693 src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
1694 BTREE_ID_subvolumes, POS(0, src_subvolid),
1695 BTREE_ITER_CACHED, subvolume);
1696 ret = PTR_ERR_OR_ZERO(src_subvol);
1697 if (unlikely(ret)) {
1698 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1699 "subvolume %u not found", src_subvolid);
1703 parent = le32_to_cpu(src_subvol->v.snapshot);
1706 ret = bch2_snapshot_node_create(trans, parent, new_nodes,
1708 src_subvolid ? 2 : 1);
1713 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
1714 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
1719 new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
1720 ret = PTR_ERR_OR_ZERO(new_subvol);
1724 new_subvol->v.flags = 0;
1725 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
1726 new_subvol->v.inode = cpu_to_le64(inode);
1727 new_subvol->v.parent = cpu_to_le32(src_subvolid);
1728 new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
1729 new_subvol->v.otime.hi = 0;
1731 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
1732 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
1734 *new_subvolid = new_subvol->k.p.offset;
1735 *new_snapshotid = new_nodes[0];
1737 bch2_trans_iter_exit(trans, &src_iter);
1738 bch2_trans_iter_exit(trans, &dst_iter);
1742 int bch2_fs_subvolumes_init(struct bch_fs *c)
1744 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
1745 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
1746 bch2_subvolume_wait_for_pagecache_and_delete);
1747 mutex_init(&c->snapshots_unlinked_lock);