1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_key_cache.h"
5 #include "btree_update.h"
11 #include <linux/random.h>
13 static int bch2_subvolume_delete(struct btree_trans *, u32);
15 static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
17 const struct snapshot_t *s = __snapshot_t(t, id);
19 if (s->skip[2] <= ancestor)
21 if (s->skip[1] <= ancestor)
23 if (s->skip[0] <= ancestor)
28 bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
30 struct snapshot_table *t;
33 EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
36 t = rcu_dereference(c->snapshots);
38 while (id && id < ancestor - IS_ANCESTOR_BITMAP)
39 id = get_ancestor_below(t, id, ancestor);
41 ret = id && id < ancestor
42 ? test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor)
49 static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
51 struct snapshot_table *t;
54 t = rcu_dereference(c->snapshots);
56 while (id && id < ancestor)
57 id = __snapshot_t(t, id)->parent;
60 return id == ancestor;
63 static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
68 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
74 static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
76 size_t idx = U32_MAX - id;
78 struct snapshot_table *new, *old;
80 new_size = max(16UL, roundup_pow_of_two(idx + 1));
82 new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
86 old = rcu_dereference_protected(c->snapshots, true);
89 rcu_dereference_protected(c->snapshots, true)->s,
90 sizeof(new->s[0]) * c->snapshot_table_size);
92 rcu_assign_pointer(c->snapshots, new);
93 c->snapshot_table_size = new_size;
97 return &rcu_dereference_protected(c->snapshots, true)->s[idx];
100 static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
102 size_t idx = U32_MAX - id;
104 lockdep_assert_held(&c->snapshot_table_lock);
106 if (likely(idx < c->snapshot_table_size))
107 return &rcu_dereference_protected(c->snapshots, true)->s[idx];
109 return __snapshot_t_mut(c, id);
114 void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
117 struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
119 prt_printf(out, "subvol %u root snapshot %u",
120 le32_to_cpu(t.v->master_subvol),
121 le32_to_cpu(t.v->root_snapshot));
124 int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k,
125 enum bkey_invalid_flags flags,
126 struct printbuf *err)
128 if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
129 bkey_lt(k.k->p, POS(0, 1))) {
130 prt_printf(err, "bad pos");
131 return -BCH_ERR_invalid_bkey;
137 int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
138 struct bch_snapshot_tree *s)
140 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
141 BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
143 if (bch2_err_matches(ret, ENOENT))
144 ret = -BCH_ERR_ENOENT_snapshot_tree;
148 static struct bkey_i_snapshot_tree *
149 __snapshot_tree_create(struct btree_trans *trans)
151 struct btree_iter iter;
152 int ret = bch2_bkey_get_empty_slot(trans, &iter,
153 BTREE_ID_snapshot_trees, POS(0, U32_MAX));
154 struct bkey_i_snapshot_tree *s_t;
156 if (ret == -BCH_ERR_ENOSPC_btree_slot)
157 ret = -BCH_ERR_ENOSPC_snapshot_tree;
161 s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
162 ret = PTR_ERR_OR_ZERO(s_t);
163 bch2_trans_iter_exit(trans, &iter);
164 return ret ? ERR_PTR(ret) : s_t;
167 static int snapshot_tree_create(struct btree_trans *trans,
168 u32 root_id, u32 subvol_id, u32 *tree_id)
170 struct bkey_i_snapshot_tree *n_tree =
171 __snapshot_tree_create(trans);
174 return PTR_ERR(n_tree);
176 n_tree->v.master_subvol = cpu_to_le32(subvol_id);
177 n_tree->v.root_snapshot = cpu_to_le32(root_id);
178 *tree_id = n_tree->k.p.offset;
182 /* Snapshot nodes: */
184 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
187 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
189 prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
190 BCH_SNAPSHOT_SUBVOL(s.v),
191 BCH_SNAPSHOT_DELETED(s.v),
192 le32_to_cpu(s.v->parent),
193 le32_to_cpu(s.v->children[0]),
194 le32_to_cpu(s.v->children[1]),
195 le32_to_cpu(s.v->subvol),
196 le32_to_cpu(s.v->tree));
198 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth))
199 prt_printf(out, " depth %u skiplist %u %u %u",
200 le32_to_cpu(s.v->depth),
201 le32_to_cpu(s.v->skip[0]),
202 le32_to_cpu(s.v->skip[1]),
203 le32_to_cpu(s.v->skip[2]));
206 int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
207 enum bkey_invalid_flags flags,
208 struct printbuf *err)
210 struct bkey_s_c_snapshot s;
213 if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
214 bkey_lt(k.k->p, POS(0, 1))) {
215 prt_printf(err, "bad pos");
216 return -BCH_ERR_invalid_bkey;
219 s = bkey_s_c_to_snapshot(k);
221 id = le32_to_cpu(s.v->parent);
222 if (id && id <= k.k->p.offset) {
223 prt_printf(err, "bad parent node (%u <= %llu)",
225 return -BCH_ERR_invalid_bkey;
228 if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
229 prt_printf(err, "children not normalized");
230 return -BCH_ERR_invalid_bkey;
233 if (s.v->children[0] &&
234 s.v->children[0] == s.v->children[1]) {
235 prt_printf(err, "duplicate child nodes");
236 return -BCH_ERR_invalid_bkey;
239 for (i = 0; i < 2; i++) {
240 id = le32_to_cpu(s.v->children[i]);
242 if (id >= k.k->p.offset) {
243 prt_printf(err, "bad child node (%u >= %llu)",
245 return -BCH_ERR_invalid_bkey;
249 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
250 if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
251 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
252 prt_printf(err, "skiplist not normalized");
253 return -BCH_ERR_invalid_bkey;
256 for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
257 id = le32_to_cpu(s.v->skip[i]);
259 if (!id != !s.v->parent ||
261 id <= k.k->p.offset)) {
262 prt_printf(err, "bad skiplist node %u)", id);
263 return -BCH_ERR_invalid_bkey;
271 int bch2_mark_snapshot(struct btree_trans *trans,
272 enum btree_id btree, unsigned level,
273 struct bkey_s_c old, struct bkey_s_c new,
276 struct bch_fs *c = trans->c;
277 struct snapshot_t *t;
278 u32 id = new.k->p.offset;
281 mutex_lock(&c->snapshot_table_lock);
283 t = snapshot_t_mut(c, id);
285 ret = -BCH_ERR_ENOMEM_mark_snapshot;
289 if (new.k->type == KEY_TYPE_snapshot) {
290 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
293 t->parent = le32_to_cpu(s.v->parent);
294 t->children[0] = le32_to_cpu(s.v->children[0]);
295 t->children[1] = le32_to_cpu(s.v->children[1]);
296 t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
297 t->tree = le32_to_cpu(s.v->tree);
299 if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) {
300 t->depth = le32_to_cpu(s.v->depth);
301 t->skip[0] = le32_to_cpu(s.v->skip[0]);
302 t->skip[1] = le32_to_cpu(s.v->skip[1]);
303 t->skip[2] = le32_to_cpu(s.v->skip[2]);
311 while ((parent = bch2_snapshot_parent_early(c, parent)) &&
312 parent - id - 1 < IS_ANCESTOR_BITMAP)
313 __set_bit(parent - id - 1, t->is_ancestor);
315 if (BCH_SNAPSHOT_DELETED(s.v)) {
316 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
317 c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots);
320 memset(t, 0, sizeof(*t));
323 mutex_unlock(&c->snapshot_table_lock);
327 static int snapshot_lookup(struct btree_trans *trans, u32 id,
328 struct bch_snapshot *s)
330 return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
331 BTREE_ITER_WITH_UPDATES, snapshot, s);
334 static int snapshot_live(struct btree_trans *trans, u32 id)
336 struct bch_snapshot v;
342 ret = snapshot_lookup(trans, id, &v);
343 if (bch2_err_matches(ret, ENOENT))
344 bch_err(trans->c, "snapshot node %u not found", id);
348 return !BCH_SNAPSHOT_DELETED(&v);
351 static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
353 struct bch_fs *c = trans->c;
354 unsigned i, nr_live = 0, live_idx = 0;
355 struct bkey_s_c_snapshot snap;
356 u32 id = k.k->p.offset, child[2];
358 if (k.k->type != KEY_TYPE_snapshot)
361 snap = bkey_s_c_to_snapshot(k);
363 child[0] = le32_to_cpu(snap.v->children[0]);
364 child[1] = le32_to_cpu(snap.v->children[1]);
366 for (i = 0; i < 2; i++) {
367 int ret = snapshot_live(trans, child[i]);
377 mutex_lock(&c->snapshot_table_lock);
379 snapshot_t_mut(c, id)->equiv = nr_live == 1
380 ? snapshot_t_mut(c, child[live_idx])->equiv
383 mutex_unlock(&c->snapshot_table_lock);
390 static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
392 return snapshot_t(c, id)->children[child];
395 static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
397 return bch2_snapshot_child(c, id, 0);
400 static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
402 return bch2_snapshot_child(c, id, 1);
405 static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
409 n = bch2_snapshot_left_child(c, id);
413 while ((parent = bch2_snapshot_parent(c, id))) {
414 n = bch2_snapshot_right_child(c, parent);
423 static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
425 u32 id = snapshot_root;
429 s = snapshot_t(c, id)->subvol;
431 if (s && (!subvol || s < subvol))
434 id = bch2_snapshot_tree_next(c, id);
440 static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
441 u32 snapshot_root, u32 *subvol_id)
443 struct bch_fs *c = trans->c;
444 struct btree_iter iter;
446 struct bkey_s_c_subvolume s;
450 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
452 if (k.k->type != KEY_TYPE_subvolume)
455 s = bkey_s_c_to_subvolume(k);
456 if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
458 if (!BCH_SUBVOLUME_SNAP(s.v)) {
459 *subvol_id = s.k->p.offset;
465 bch2_trans_iter_exit(trans, &iter);
467 if (!ret && !found) {
468 struct bkey_i_subvolume *s;
470 *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
472 s = bch2_bkey_get_mut_typed(trans, &iter,
473 BTREE_ID_subvolumes, POS(0, *subvol_id),
475 ret = PTR_ERR_OR_ZERO(s);
479 SET_BCH_SUBVOLUME_SNAP(&s->v, false);
485 static int check_snapshot_tree(struct btree_trans *trans,
486 struct btree_iter *iter,
489 struct bch_fs *c = trans->c;
490 struct bkey_s_c_snapshot_tree st;
491 struct bch_snapshot s;
492 struct bch_subvolume subvol;
493 struct printbuf buf = PRINTBUF;
497 if (k.k->type != KEY_TYPE_snapshot_tree)
500 st = bkey_s_c_to_snapshot_tree(k);
501 root_id = le32_to_cpu(st.v->root_snapshot);
503 ret = snapshot_lookup(trans, root_id, &s);
504 if (ret && !bch2_err_matches(ret, ENOENT))
507 if (fsck_err_on(ret ||
508 root_id != bch2_snapshot_root(c, root_id) ||
509 st.k->p.offset != le32_to_cpu(s.tree),
511 "snapshot tree points to missing/incorrect snapshot:\n %s",
512 (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
513 ret = bch2_btree_delete_at(trans, iter, 0);
517 ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
519 if (ret && !bch2_err_matches(ret, ENOENT))
522 if (fsck_err_on(ret, c,
523 "snapshot tree points to missing subvolume:\n %s",
524 (printbuf_reset(&buf),
525 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
526 fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
527 le32_to_cpu(subvol.snapshot),
529 "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
530 (printbuf_reset(&buf),
531 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
532 fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c,
533 "snapshot tree points to snapshot subvolume:\n %s",
534 (printbuf_reset(&buf),
535 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
536 struct bkey_i_snapshot_tree *u;
539 ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
543 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
544 ret = PTR_ERR_OR_ZERO(u);
548 u->v.master_subvol = cpu_to_le32(subvol_id);
549 st = snapshot_tree_i_to_s_c(u);
558 * For each snapshot_tree, make sure it points to the root of a snapshot tree
559 * and that snapshot entry points back to it, or delete it.
561 * And, make sure it points to a subvolume within that snapshot tree, or correct
562 * it to point to the oldest subvolume within that snapshot tree.
564 int bch2_check_snapshot_trees(struct bch_fs *c)
566 struct btree_iter iter;
570 ret = bch2_trans_run(c,
571 for_each_btree_key_commit(&trans, iter,
572 BTREE_ID_snapshot_trees, POS_MIN,
573 BTREE_ITER_PREFETCH, k,
574 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
575 check_snapshot_tree(&trans, &iter, k)));
578 bch_err(c, "error %i checking snapshot trees", ret);
583 * Look up snapshot tree for @tree_id and find root,
584 * make sure @snap_id is a descendent:
586 static int snapshot_tree_ptr_good(struct btree_trans *trans,
587 u32 snap_id, u32 tree_id)
589 struct bch_snapshot_tree s_t;
590 int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
592 if (bch2_err_matches(ret, ENOENT))
597 return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
600 static u32 snapshot_skiplist_get(struct bch_fs *c, u32 id)
602 const struct snapshot_t *s;
608 s = snapshot_t(c, id);
610 id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
616 static int snapshot_skiplist_good(struct btree_trans *trans, struct bch_snapshot s)
618 struct bch_snapshot a;
622 for (i = 0; i < 3; i++) {
623 if (!s.parent != !s.skip[i])
629 ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a);
630 if (bch2_err_matches(ret, ENOENT))
635 if (a.tree != s.tree)
643 * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
644 * its snapshot_tree pointer is correct (allocate new one if necessary), then
645 * update this node's pointer to root node's pointer:
647 static int snapshot_tree_ptr_repair(struct btree_trans *trans,
648 struct btree_iter *iter,
650 struct bch_snapshot *s)
652 struct bch_fs *c = trans->c;
653 struct btree_iter root_iter;
654 struct bch_snapshot_tree s_t;
655 struct bkey_s_c_snapshot root;
656 struct bkey_i_snapshot *u;
657 u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
660 root = bch2_bkey_get_iter_typed(trans, &root_iter,
661 BTREE_ID_snapshots, POS(0, root_id),
662 BTREE_ITER_WITH_UPDATES, snapshot);
663 ret = bkey_err(root);
667 tree_id = le32_to_cpu(root.v->tree);
669 ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
670 if (ret && !bch2_err_matches(ret, ENOENT))
673 if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
674 u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
675 ret = PTR_ERR_OR_ZERO(u) ?:
676 snapshot_tree_create(trans, root_id,
677 bch2_snapshot_tree_oldest_subvol(c, root_id),
682 u->v.tree = cpu_to_le32(tree_id);
683 if (k.k->p.offset == root_id)
687 if (k.k->p.offset != root_id) {
688 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
689 ret = PTR_ERR_OR_ZERO(u);
693 u->v.tree = cpu_to_le32(tree_id);
697 bch2_trans_iter_exit(trans, &root_iter);
701 static int cmp_le32(__le32 l, __le32 r)
703 return cmp_int(le32_to_cpu(l), le32_to_cpu(r));
706 static int check_snapshot(struct btree_trans *trans,
707 struct btree_iter *iter,
710 struct bch_fs *c = trans->c;
711 struct bch_snapshot s;
712 struct bch_subvolume subvol;
713 struct bch_snapshot v;
714 struct bkey_i_snapshot *u;
715 u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
717 struct printbuf buf = PRINTBUF;
718 bool should_have_subvol;
722 if (k.k->type != KEY_TYPE_snapshot)
725 memset(&s, 0, sizeof(s));
726 memcpy(&s, k.v, bkey_val_bytes(k.k));
728 id = le32_to_cpu(s.parent);
730 ret = snapshot_lookup(trans, id, &v);
731 if (bch2_err_matches(ret, ENOENT))
732 bch_err(c, "snapshot with nonexistent parent:\n %s",
733 (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
737 if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
738 le32_to_cpu(v.children[1]) != k.k->p.offset) {
739 bch_err(c, "snapshot parent %u missing pointer to child %llu",
746 for (i = 0; i < 2 && s.children[i]; i++) {
747 id = le32_to_cpu(s.children[i]);
749 ret = snapshot_lookup(trans, id, &v);
750 if (bch2_err_matches(ret, ENOENT))
751 bch_err(c, "snapshot node %llu has nonexistent child %u",
756 if (le32_to_cpu(v.parent) != k.k->p.offset) {
757 bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
758 id, le32_to_cpu(v.parent), k.k->p.offset);
764 should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
765 !BCH_SNAPSHOT_DELETED(&s);
767 if (should_have_subvol) {
768 id = le32_to_cpu(s.subvol);
769 ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
770 if (bch2_err_matches(ret, ENOENT))
771 bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
772 (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
776 if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
777 bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
783 if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s",
784 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
785 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
786 ret = PTR_ERR_OR_ZERO(u);
795 ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
799 if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s",
800 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
801 ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
807 real_depth = bch2_snapshot_depth(c, parent_id);
809 if (le32_to_cpu(s.depth) != real_depth &&
810 (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
811 fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s",
812 real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
813 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
814 ret = PTR_ERR_OR_ZERO(u);
818 u->v.depth = cpu_to_le32(real_depth);
822 ret = snapshot_skiplist_good(trans, s);
827 (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
828 fsck_err(c, "snapshot with bad skiplist field:\n %s",
829 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
830 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
831 ret = PTR_ERR_OR_ZERO(u);
835 for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
836 u->v.skip[i] = cpu_to_le32(snapshot_skiplist_get(c, parent_id));
838 bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32);
848 int bch2_check_snapshots(struct bch_fs *c)
850 struct btree_iter iter;
855 * We iterate backwards as checking/fixing the depth field requires that
856 * the parent's depth already be correct:
858 ret = bch2_trans_run(c,
859 for_each_btree_key_reverse_commit(&trans, iter,
860 BTREE_ID_snapshots, POS_MAX,
861 BTREE_ITER_PREFETCH, k,
862 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
863 check_snapshot(&trans, &iter, k)));
869 static int check_subvol(struct btree_trans *trans,
870 struct btree_iter *iter,
873 struct bch_fs *c = trans->c;
874 struct bkey_s_c_subvolume subvol;
875 struct bch_snapshot snapshot;
879 if (k.k->type != KEY_TYPE_subvolume)
882 subvol = bkey_s_c_to_subvolume(k);
883 snapid = le32_to_cpu(subvol.v->snapshot);
884 ret = snapshot_lookup(trans, snapid, &snapshot);
886 if (bch2_err_matches(ret, ENOENT))
887 bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
888 k.k->p.offset, snapid);
892 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
895 ret = bch2_subvolume_delete(trans, iter->pos.offset);
897 bch_err(c, "error deleting subvolume %llu: %s",
898 iter->pos.offset, bch2_err_str(ret));
899 return ret ?: -BCH_ERR_transaction_restart_nested;
902 if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
903 u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
905 struct bch_snapshot_tree st;
908 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
911 ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
913 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
914 "%s: snapshot tree %u not found", __func__, snapshot_tree);
919 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c,
920 "subvolume %llu is not set as snapshot but is not master subvolume",
922 struct bkey_i_subvolume *s =
923 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
924 ret = PTR_ERR_OR_ZERO(s);
928 SET_BCH_SUBVOLUME_SNAP(&s->v, true);
936 int bch2_check_subvols(struct bch_fs *c)
938 struct btree_iter iter;
942 ret = bch2_trans_run(c,
943 for_each_btree_key_commit(&trans, iter,
944 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
945 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
946 check_subvol(&trans, &iter, k)));
952 void bch2_fs_snapshots_exit(struct bch_fs *c)
954 kfree(rcu_dereference_protected(c->snapshots, true));
957 int bch2_snapshots_read(struct bch_fs *c)
959 struct btree_iter iter;
963 ret = bch2_trans_run(c,
964 for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
966 bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
967 bch2_snapshot_set_equiv(&trans, k)));
974 * Mark a snapshot as deleted, for future cleanup:
976 static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
978 struct btree_iter iter;
979 struct bkey_i_snapshot *s;
982 s = bch2_bkey_get_mut_typed(trans, &iter,
983 BTREE_ID_snapshots, POS(0, id),
985 ret = PTR_ERR_OR_ZERO(s);
987 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
988 trans->c, "missing snapshot %u", id);
992 /* already deleted? */
993 if (BCH_SNAPSHOT_DELETED(&s->v))
996 SET_BCH_SNAPSHOT_DELETED(&s->v, true);
997 SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
1000 bch2_trans_iter_exit(trans, &iter);
1004 static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
1006 struct bch_fs *c = trans->c;
1007 struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
1008 struct btree_iter tree_iter = (struct btree_iter) { NULL };
1009 struct bkey_s_c_snapshot s;
1014 s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
1015 BTREE_ITER_INTENT, snapshot);
1017 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1018 "missing snapshot %u", id);
1023 BUG_ON(!BCH_SNAPSHOT_DELETED(s.v));
1024 parent_id = le32_to_cpu(s.v->parent);
1027 struct bkey_i_snapshot *parent;
1029 parent = bch2_bkey_get_mut_typed(trans, &p_iter,
1030 BTREE_ID_snapshots, POS(0, parent_id),
1032 ret = PTR_ERR_OR_ZERO(parent);
1033 if (unlikely(ret)) {
1034 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1035 "missing snapshot %u", parent_id);
1039 for (i = 0; i < 2; i++)
1040 if (le32_to_cpu(parent->v.children[i]) == id)
1044 bch_err(c, "snapshot %u missing child pointer to %u",
1047 parent->v.children[i] = 0;
1049 if (le32_to_cpu(parent->v.children[0]) <
1050 le32_to_cpu(parent->v.children[1]))
1051 swap(parent->v.children[0],
1052 parent->v.children[1]);
1055 * We're deleting the root of a snapshot tree: update the
1056 * snapshot_tree entry to point to the new root, or delete it if
1057 * this is the last snapshot ID in this tree:
1059 struct bkey_i_snapshot_tree *s_t;
1061 BUG_ON(s.v->children[1]);
1063 s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
1064 BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
1066 ret = PTR_ERR_OR_ZERO(s_t);
1070 if (s.v->children[0]) {
1071 s_t->v.root_snapshot = s.v->children[0];
1073 s_t->k.type = KEY_TYPE_deleted;
1074 set_bkey_val_u64s(&s_t->k, 0);
1078 ret = bch2_btree_delete_at(trans, &iter, 0);
1080 bch2_trans_iter_exit(trans, &tree_iter);
1081 bch2_trans_iter_exit(trans, &p_iter);
1082 bch2_trans_iter_exit(trans, &iter);
1086 static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
1088 u32 *snapshot_subvols,
1089 unsigned nr_snapids)
1091 struct bch_fs *c = trans->c;
1092 struct btree_iter iter;
1093 struct bkey_i_snapshot *n;
1096 u32 depth = bch2_snapshot_depth(c, parent);
1099 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
1100 POS_MIN, BTREE_ITER_INTENT);
1101 k = bch2_btree_iter_peek(&iter);
1106 for (i = 0; i < nr_snapids; i++) {
1107 k = bch2_btree_iter_prev_slot(&iter);
1112 if (!k.k || !k.k->p.offset) {
1113 ret = -BCH_ERR_ENOSPC_snapshot_create;
1117 n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
1118 ret = PTR_ERR_OR_ZERO(n);
1123 n->v.parent = cpu_to_le32(parent);
1124 n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
1125 n->v.tree = cpu_to_le32(tree);
1126 n->v.depth = cpu_to_le32(depth);
1128 for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
1129 n->v.skip[j] = cpu_to_le32(snapshot_skiplist_get(c, parent));
1131 bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
1132 SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
1134 ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
1135 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
1139 new_snapids[i] = iter.pos.offset;
1142 bch2_trans_iter_exit(trans, &iter);
1147 * Create new snapshot IDs as children of an existing snapshot ID:
1149 static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
1151 u32 *snapshot_subvols,
1152 unsigned nr_snapids)
1154 struct btree_iter iter;
1155 struct bkey_i_snapshot *n_parent;
1158 n_parent = bch2_bkey_get_mut_typed(trans, &iter,
1159 BTREE_ID_snapshots, POS(0, parent),
1161 ret = PTR_ERR_OR_ZERO(n_parent);
1162 if (unlikely(ret)) {
1163 if (bch2_err_matches(ret, ENOENT))
1164 bch_err(trans->c, "snapshot %u not found", parent);
1168 if (n_parent->v.children[0] || n_parent->v.children[1]) {
1169 bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
1174 ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
1175 new_snapids, snapshot_subvols, nr_snapids);
1179 n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
1180 n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
1181 n_parent->v.subvol = 0;
1182 SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
1184 bch2_trans_iter_exit(trans, &iter);
1189 * Create a snapshot node that is the root of a new tree:
1191 static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
1193 u32 *snapshot_subvols,
1194 unsigned nr_snapids)
1196 struct bkey_i_snapshot_tree *n_tree;
1199 n_tree = __snapshot_tree_create(trans);
1200 ret = PTR_ERR_OR_ZERO(n_tree) ?:
1201 create_snapids(trans, 0, n_tree->k.p.offset,
1202 new_snapids, snapshot_subvols, nr_snapids);
1206 n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]);
1207 n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]);
1211 int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
1213 u32 *snapshot_subvols,
1214 unsigned nr_snapids)
1216 BUG_ON((parent == 0) != (nr_snapids == 1));
1217 BUG_ON((parent != 0) != (nr_snapids == 2));
1220 ? bch2_snapshot_node_create_children(trans, parent,
1221 new_snapids, snapshot_subvols, nr_snapids)
1222 : bch2_snapshot_node_create_tree(trans,
1223 new_snapids, snapshot_subvols, nr_snapids);
1227 static int snapshot_delete_key(struct btree_trans *trans,
1228 struct btree_iter *iter,
1230 snapshot_id_list *deleted,
1231 snapshot_id_list *equiv_seen,
1232 struct bpos *last_pos)
1234 struct bch_fs *c = trans->c;
1235 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
1237 if (!bkey_eq(k.k->p, *last_pos))
1241 if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
1242 snapshot_list_has_id(equiv_seen, equiv)) {
1243 return bch2_btree_delete_at(trans, iter,
1244 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1246 return snapshot_list_add(c, equiv_seen, equiv);
1250 static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
1253 struct bkey_s_c_snapshot snap;
1257 if (k.k->type != KEY_TYPE_snapshot)
1260 snap = bkey_s_c_to_snapshot(k);
1261 if (BCH_SNAPSHOT_DELETED(snap.v) ||
1262 BCH_SNAPSHOT_SUBVOL(snap.v))
1265 children[0] = le32_to_cpu(snap.v->children[0]);
1266 children[1] = le32_to_cpu(snap.v->children[1]);
1268 ret = snapshot_live(trans, children[0]) ?:
1269 snapshot_live(trans, children[1]);
1274 return bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
1278 int bch2_delete_dead_snapshots(struct bch_fs *c)
1280 struct btree_trans trans;
1281 struct btree_iter iter;
1283 struct bkey_s_c_snapshot snap;
1284 snapshot_id_list deleted = { 0 };
1288 if (!test_bit(BCH_FS_STARTED, &c->flags)) {
1289 ret = bch2_fs_read_write_early(c);
1291 bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
1296 bch2_trans_init(&trans, c, 0, 0);
1299 * For every snapshot node: If we have no live children and it's not
1300 * pointed to by a subvolume, delete it:
1302 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
1305 bch2_delete_redundant_snapshot(&trans, &iter, k));
1307 bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
1311 for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
1313 bch2_snapshot_set_equiv(&trans, k));
1315 bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
1319 for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
1320 POS_MIN, 0, k, ret) {
1321 if (k.k->type != KEY_TYPE_snapshot)
1324 snap = bkey_s_c_to_snapshot(k);
1325 if (BCH_SNAPSHOT_DELETED(snap.v)) {
1326 ret = snapshot_list_add(c, &deleted, k.k->p.offset);
1331 bch2_trans_iter_exit(&trans, &iter);
1334 bch_err(c, "error walking snapshots: %s", bch2_err_str(ret));
1338 for (id = 0; id < BTREE_ID_NR; id++) {
1339 struct bpos last_pos = POS_MIN;
1340 snapshot_id_list equiv_seen = { 0 };
1342 if (!btree_type_has_snapshots(id))
1345 ret = for_each_btree_key_commit(&trans, iter,
1347 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1348 NULL, NULL, BTREE_INSERT_NOFAIL,
1349 snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos));
1351 darray_exit(&equiv_seen);
1354 bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret));
1359 for (i = 0; i < deleted.nr; i++) {
1360 ret = commit_do(&trans, NULL, NULL, 0,
1361 bch2_snapshot_node_delete(&trans, deleted.data[i]));
1363 bch_err(c, "error deleting snapshot %u: %s",
1364 deleted.data[i], bch2_err_str(ret));
1369 clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
1371 darray_exit(&deleted);
1372 bch2_trans_exit(&trans);
1378 static void bch2_delete_dead_snapshots_work(struct work_struct *work)
1380 struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
1382 if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
1383 bch2_delete_dead_snapshots(c);
1384 bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
1387 void bch2_delete_dead_snapshots_async(struct bch_fs *c)
1389 if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
1390 !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
1391 bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
1394 static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
1395 struct btree_trans_commit_hook *h)
1397 struct bch_fs *c = trans->c;
1399 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
1401 if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots)
1404 bch2_delete_dead_snapshots_async(c);
1410 int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
1411 unsigned flags, struct printbuf *err)
1413 if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
1414 bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
1415 prt_printf(err, "invalid pos");
1416 return -BCH_ERR_invalid_bkey;
1422 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
1425 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
1427 prt_printf(out, "root %llu snapshot id %u",
1428 le64_to_cpu(s.v->inode),
1429 le32_to_cpu(s.v->snapshot));
1431 if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
1432 prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
1435 static __always_inline int
1436 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
1437 bool inconsistent_if_not_found,
1439 struct bch_subvolume *s)
1441 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
1442 iter_flags, subvolume, s);
1443 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
1444 inconsistent_if_not_found,
1445 trans->c, "missing subvolume %u", subvol);
1449 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
1450 bool inconsistent_if_not_found,
1452 struct bch_subvolume *s)
1454 return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
1457 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
1458 struct bch_subvolume *subvol)
1460 struct bch_snapshot snap;
1462 return snapshot_lookup(trans, snapshot, &snap) ?:
1463 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
1466 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol,
1469 struct btree_iter iter;
1473 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol),
1475 BTREE_ITER_WITH_UPDATES);
1476 ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -BCH_ERR_ENOENT_subvolume;
1479 *snapid = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot);
1480 else if (bch2_err_matches(ret, ENOENT))
1481 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol);
1482 bch2_trans_iter_exit(trans, &iter);
1486 static int bch2_subvolume_reparent(struct btree_trans *trans,
1487 struct btree_iter *iter,
1489 u32 old_parent, u32 new_parent)
1491 struct bkey_i_subvolume *s;
1494 if (k.k->type != KEY_TYPE_subvolume)
1497 if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
1498 le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
1501 s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
1502 ret = PTR_ERR_OR_ZERO(s);
1506 s->v.parent = cpu_to_le32(new_parent);
1511 * Scan for subvolumes with parent @subvolid_to_delete, reparent:
1513 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete)
1515 struct btree_iter iter;
1517 struct bch_subvolume s;
1519 return lockrestart_do(trans,
1520 bch2_subvolume_get(trans, subvolid_to_delete, true,
1521 BTREE_ITER_CACHED, &s)) ?:
1522 for_each_btree_key_commit(trans, iter,
1523 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
1524 NULL, NULL, BTREE_INSERT_NOFAIL,
1525 bch2_subvolume_reparent(trans, &iter, k,
1526 subvolid_to_delete, le32_to_cpu(s.parent)));
1530 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
1533 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
1535 struct btree_iter iter;
1536 struct bkey_s_c_subvolume subvol;
1537 struct btree_trans_commit_hook *h;
1541 subvol = bch2_bkey_get_iter_typed(trans, &iter,
1542 BTREE_ID_subvolumes, POS(0, subvolid),
1543 BTREE_ITER_CACHED|BTREE_ITER_INTENT,
1545 ret = bkey_err(subvol);
1546 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
1547 "missing subvolume %u", subvolid);
1551 snapid = le32_to_cpu(subvol.v->snapshot);
1553 ret = bch2_btree_delete_at(trans, &iter, 0);
1557 ret = bch2_snapshot_node_set_deleted(trans, snapid);
1561 h = bch2_trans_kmalloc(trans, sizeof(*h));
1562 ret = PTR_ERR_OR_ZERO(h);
1566 h->fn = bch2_delete_dead_snapshots_hook;
1567 bch2_trans_commit_hook(trans, h);
1569 bch2_trans_iter_exit(trans, &iter);
1573 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
1575 return bch2_subvolumes_reparent(trans, subvolid) ?:
1576 commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
1577 __bch2_subvolume_delete(trans, subvolid));
1580 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
1582 struct bch_fs *c = container_of(work, struct bch_fs,
1583 snapshot_wait_for_pagecache_and_delete_work);
1589 mutex_lock(&c->snapshots_unlinked_lock);
1590 s = c->snapshots_unlinked;
1591 darray_init(&c->snapshots_unlinked);
1592 mutex_unlock(&c->snapshots_unlinked_lock);
1597 bch2_evict_subvolume_inodes(c, &s);
1599 for (id = s.data; id < s.data + s.nr; id++) {
1600 ret = bch2_trans_run(c, bch2_subvolume_delete(&trans, *id));
1602 bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
1610 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
1613 struct subvolume_unlink_hook {
1614 struct btree_trans_commit_hook h;
1618 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
1619 struct btree_trans_commit_hook *_h)
1621 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
1622 struct bch_fs *c = trans->c;
1625 mutex_lock(&c->snapshots_unlinked_lock);
1626 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
1627 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
1628 mutex_unlock(&c->snapshots_unlinked_lock);
1633 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
1636 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
1637 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
1641 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
1643 struct btree_iter iter;
1644 struct bkey_i_subvolume *n;
1645 struct subvolume_unlink_hook *h;
1648 h = bch2_trans_kmalloc(trans, sizeof(*h));
1649 ret = PTR_ERR_OR_ZERO(h);
1653 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook;
1654 h->subvol = subvolid;
1655 bch2_trans_commit_hook(trans, &h->h);
1657 n = bch2_bkey_get_mut_typed(trans, &iter,
1658 BTREE_ID_subvolumes, POS(0, subvolid),
1659 BTREE_ITER_CACHED, subvolume);
1660 ret = PTR_ERR_OR_ZERO(n);
1661 if (unlikely(ret)) {
1662 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
1663 "missing subvolume %u", subvolid);
1667 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
1668 bch2_trans_iter_exit(trans, &iter);
1672 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
1675 u32 *new_snapshotid,
1678 struct bch_fs *c = trans->c;
1679 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
1680 struct bkey_i_subvolume *new_subvol = NULL;
1681 struct bkey_i_subvolume *src_subvol = NULL;
1682 u32 parent = 0, new_nodes[2], snapshot_subvols[2];
1685 ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
1686 BTREE_ID_subvolumes, POS(0, U32_MAX));
1687 if (ret == -BCH_ERR_ENOSPC_btree_slot)
1688 ret = -BCH_ERR_ENOSPC_subvolume_create;
1692 snapshot_subvols[0] = dst_iter.pos.offset;
1693 snapshot_subvols[1] = src_subvolid;
1696 /* Creating a snapshot: */
1698 src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
1699 BTREE_ID_subvolumes, POS(0, src_subvolid),
1700 BTREE_ITER_CACHED, subvolume);
1701 ret = PTR_ERR_OR_ZERO(src_subvol);
1702 if (unlikely(ret)) {
1703 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1704 "subvolume %u not found", src_subvolid);
1708 parent = le32_to_cpu(src_subvol->v.snapshot);
1711 ret = bch2_snapshot_node_create(trans, parent, new_nodes,
1713 src_subvolid ? 2 : 1);
1718 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
1719 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
1724 new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
1725 ret = PTR_ERR_OR_ZERO(new_subvol);
1729 new_subvol->v.flags = 0;
1730 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
1731 new_subvol->v.inode = cpu_to_le64(inode);
1732 new_subvol->v.parent = cpu_to_le32(src_subvolid);
1733 new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
1734 new_subvol->v.otime.hi = 0;
1736 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
1737 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
1739 *new_subvolid = new_subvol->k.p.offset;
1740 *new_snapshotid = new_nodes[0];
1742 bch2_trans_iter_exit(trans, &src_iter);
1743 bch2_trans_iter_exit(trans, &dst_iter);
1747 int bch2_fs_subvolumes_init(struct bch_fs *c)
1749 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
1750 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
1751 bch2_subvolume_wait_for_pagecache_and_delete);
1752 mutex_init(&c->snapshots_unlinked_lock);