1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_key_cache.h"
5 #include "btree_update.h"
12 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
15 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
17 prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u",
18 BCH_SNAPSHOT_SUBVOL(s.v),
19 BCH_SNAPSHOT_DELETED(s.v),
20 le32_to_cpu(s.v->parent),
21 le32_to_cpu(s.v->children[0]),
22 le32_to_cpu(s.v->children[1]),
23 le32_to_cpu(s.v->subvol));
26 int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
27 int rw, struct printbuf *err)
29 struct bkey_s_c_snapshot s;
32 if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0 ||
33 bkey_cmp(k.k->p, POS(0, 1)) < 0) {
34 prt_printf(err, "bad pos");
38 if (bkey_val_bytes(k.k) != sizeof(struct bch_snapshot)) {
39 prt_printf(err, "bad val size (%zu != %zu)",
40 bkey_val_bytes(k.k), sizeof(struct bch_snapshot));
44 s = bkey_s_c_to_snapshot(k);
46 id = le32_to_cpu(s.v->parent);
47 if (id && id <= k.k->p.offset) {
48 prt_printf(err, "bad parent node (%u <= %llu)",
53 if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
54 prt_printf(err, "children not normalized");
58 if (s.v->children[0] &&
59 s.v->children[0] == s.v->children[1]) {
60 prt_printf(err, "duplicate child nodes");
64 for (i = 0; i < 2; i++) {
65 id = le32_to_cpu(s.v->children[i]);
67 if (id >= k.k->p.offset) {
68 prt_printf(err, "bad child node (%u >= %llu)",
77 int bch2_mark_snapshot(struct btree_trans *trans,
78 struct bkey_s_c old, struct bkey_s_c new,
81 struct bch_fs *c = trans->c;
84 t = genradix_ptr_alloc(&c->snapshots,
85 U32_MAX - new.k->p.offset,
90 if (new.k->type == KEY_TYPE_snapshot) {
91 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
93 t->parent = le32_to_cpu(s.v->parent);
94 t->children[0] = le32_to_cpu(s.v->children[0]);
95 t->children[1] = le32_to_cpu(s.v->children[1]);
96 t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
107 static int snapshot_lookup(struct btree_trans *trans, u32 id,
108 struct bch_snapshot *s)
110 struct btree_iter iter;
114 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
115 BTREE_ITER_WITH_UPDATES);
116 k = bch2_btree_iter_peek_slot(&iter);
117 ret = bkey_err(k) ?: k.k->type == KEY_TYPE_snapshot ? 0 : -ENOENT;
120 *s = *bkey_s_c_to_snapshot(k).v;
122 bch2_trans_iter_exit(trans, &iter);
126 static int snapshot_live(struct btree_trans *trans, u32 id)
128 struct bch_snapshot v;
134 ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
136 bch_err(trans->c, "snapshot node %u not found", id);
140 return !BCH_SNAPSHOT_DELETED(&v);
143 static int bch2_snapshot_set_equiv(struct btree_trans *trans,
144 struct bkey_s_c_snapshot snap)
146 struct bch_fs *c = trans->c;
147 unsigned i, nr_live = 0, live_idx = 0;
148 u32 id = snap.k->p.offset, child[2] = {
149 [0] = le32_to_cpu(snap.v->children[0]),
150 [1] = le32_to_cpu(snap.v->children[1])
153 for (i = 0; i < 2; i++) {
154 int ret = snapshot_live(trans, child[i]);
163 snapshot_t(c, id)->equiv = nr_live == 1
164 ? snapshot_t(c, child[live_idx])->equiv
169 static int bch2_snapshots_set_equiv(struct btree_trans *trans)
171 struct btree_iter iter;
175 for_each_btree_key(trans, iter, BTREE_ID_snapshots,
176 POS_MIN, 0, k, ret) {
177 if (k.k->type != KEY_TYPE_snapshot)
180 ret = bch2_snapshot_set_equiv(trans, bkey_s_c_to_snapshot(k));
184 bch2_trans_iter_exit(trans, &iter);
187 bch_err(trans->c, "error in bch2_snapshots_set_equiv: %i", ret);
193 static int check_snapshot(struct btree_trans *trans,
194 struct btree_iter *iter)
196 struct bch_fs *c = trans->c;
197 struct bkey_s_c_snapshot s;
198 struct bch_subvolume subvol;
199 struct bch_snapshot v;
201 struct printbuf buf = PRINTBUF;
202 bool should_have_subvol;
206 k = bch2_btree_iter_peek(iter);
214 if (k.k->type != KEY_TYPE_snapshot)
217 s = bkey_s_c_to_snapshot(k);
218 id = le32_to_cpu(s.v->parent);
220 ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
222 bch_err(c, "snapshot with nonexistent parent:\n %s",
223 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
227 if (le32_to_cpu(v.children[0]) != s.k->p.offset &&
228 le32_to_cpu(v.children[1]) != s.k->p.offset) {
229 bch_err(c, "snapshot parent %u missing pointer to child %llu",
236 for (i = 0; i < 2 && s.v->children[i]; i++) {
237 id = le32_to_cpu(s.v->children[i]);
239 ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
241 bch_err(c, "snapshot node %llu has nonexistent child %u",
246 if (le32_to_cpu(v.parent) != s.k->p.offset) {
247 bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
248 id, le32_to_cpu(v.parent), s.k->p.offset);
254 should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) &&
255 !BCH_SNAPSHOT_DELETED(s.v);
257 if (should_have_subvol) {
258 id = le32_to_cpu(s.v->subvol);
259 ret = lockrestart_do(trans, bch2_subvolume_get(trans, id, 0, false, &subvol));
261 bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
262 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
266 if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
267 bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
273 if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n %s",
274 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
275 struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u));
277 ret = PTR_ERR_OR_ZERO(u);
281 bkey_reassemble(&u->k_i, s.s_c);
283 ret = bch2_trans_update(trans, iter, &u->k_i, 0);
289 if (BCH_SNAPSHOT_DELETED(s.v))
290 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
297 int bch2_fs_check_snapshots(struct bch_fs *c)
299 struct btree_trans trans;
300 struct btree_iter iter;
303 bch2_trans_init(&trans, c, 0, 0);
305 bch2_trans_iter_init(&trans, &iter, BTREE_ID_snapshots,
306 POS_MIN, BTREE_ITER_PREFETCH);
309 ret = commit_do(&trans, NULL, NULL,
310 BTREE_INSERT_LAZY_RW|
312 check_snapshot(&trans, &iter));
315 } while (bch2_btree_iter_advance(&iter));
316 bch2_trans_iter_exit(&trans, &iter);
319 bch_err(c, "error %i checking snapshots", ret);
321 bch2_trans_exit(&trans);
325 static int check_subvol(struct btree_trans *trans,
326 struct btree_iter *iter)
329 struct bkey_s_c_subvolume subvol;
330 struct bch_snapshot snapshot;
334 k = bch2_btree_iter_peek(iter);
342 if (k.k->type != KEY_TYPE_subvolume)
345 subvol = bkey_s_c_to_subvolume(k);
346 snapid = le32_to_cpu(subvol.v->snapshot);
347 ret = snapshot_lookup(trans, snapid, &snapshot);
350 bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u",
351 k.k->p.offset, snapid);
355 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
356 ret = bch2_subvolume_delete(trans, iter->pos.offset);
357 if (ret && ret != -EINTR)
358 bch_err(trans->c, "error deleting subvolume %llu: %i",
359 iter->pos.offset, ret);
367 int bch2_fs_check_subvols(struct bch_fs *c)
369 struct btree_trans trans;
370 struct btree_iter iter;
373 bch2_trans_init(&trans, c, 0, 0);
375 bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes,
376 POS_MIN, BTREE_ITER_PREFETCH);
379 ret = commit_do(&trans, NULL, NULL,
380 BTREE_INSERT_LAZY_RW|
382 check_subvol(&trans, &iter));
385 } while (bch2_btree_iter_advance(&iter));
386 bch2_trans_iter_exit(&trans, &iter);
388 bch2_trans_exit(&trans);
393 void bch2_fs_snapshots_exit(struct bch_fs *c)
395 genradix_free(&c->snapshots);
398 int bch2_fs_snapshots_start(struct bch_fs *c)
400 struct btree_trans trans;
401 struct btree_iter iter;
405 bch2_trans_init(&trans, c, 0, 0);
407 for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
408 POS_MIN, 0, k, ret) {
409 if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0)
412 if (k.k->type != KEY_TYPE_snapshot)
415 ret = bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0) ?:
416 bch2_snapshot_set_equiv(&trans, bkey_s_c_to_snapshot(k));
420 bch2_trans_iter_exit(&trans, &iter);
422 bch2_trans_exit(&trans);
425 bch_err(c, "error starting snapshots: %i", ret);
430 * Mark a snapshot as deleted, for future cleanup:
432 static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
434 struct btree_iter iter;
436 struct bkey_i_snapshot *s;
439 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
441 k = bch2_btree_iter_peek_slot(&iter);
446 if (k.k->type != KEY_TYPE_snapshot) {
447 bch2_fs_inconsistent(trans->c, "missing snapshot %u", id);
452 /* already deleted? */
453 if (BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v))
456 s = bch2_trans_kmalloc(trans, sizeof(*s));
457 ret = PTR_ERR_OR_ZERO(s);
461 bkey_reassemble(&s->k_i, k);
462 SET_BCH_SNAPSHOT_DELETED(&s->v, true);
463 SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
466 ret = bch2_trans_update(trans, &iter, &s->k_i, 0);
470 bch2_trans_iter_exit(trans, &iter);
474 static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
476 struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
478 struct bkey_s_c_snapshot s;
479 struct bkey_i_snapshot *parent;
484 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
486 k = bch2_btree_iter_peek_slot(&iter);
491 if (k.k->type != KEY_TYPE_snapshot) {
492 bch2_fs_inconsistent(trans->c, "missing snapshot %u", id);
497 s = bkey_s_c_to_snapshot(k);
499 BUG_ON(!BCH_SNAPSHOT_DELETED(s.v));
500 parent_id = le32_to_cpu(s.v->parent);
503 bch2_trans_iter_init(trans, &p_iter, BTREE_ID_snapshots,
506 k = bch2_btree_iter_peek_slot(&p_iter);
511 if (k.k->type != KEY_TYPE_snapshot) {
512 bch2_fs_inconsistent(trans->c, "missing snapshot %u", parent_id);
517 parent = bch2_trans_kmalloc(trans, sizeof(*parent));
518 ret = PTR_ERR_OR_ZERO(parent);
522 bkey_reassemble(&parent->k_i, k);
524 for (i = 0; i < 2; i++)
525 if (le32_to_cpu(parent->v.children[i]) == id)
529 bch_err(trans->c, "snapshot %u missing child pointer to %u",
532 parent->v.children[i] = 0;
534 if (le32_to_cpu(parent->v.children[0]) <
535 le32_to_cpu(parent->v.children[1]))
536 swap(parent->v.children[0],
537 parent->v.children[1]);
539 ret = bch2_trans_update(trans, &p_iter, &parent->k_i, 0);
544 ret = bch2_btree_delete_at(trans, &iter, 0);
546 bch2_trans_iter_exit(trans, &p_iter);
547 bch2_trans_iter_exit(trans, &iter);
551 int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
553 u32 *snapshot_subvols,
556 struct btree_iter iter;
557 struct bkey_i_snapshot *n;
562 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
563 POS_MIN, BTREE_ITER_INTENT);
564 k = bch2_btree_iter_peek(&iter);
569 for (i = 0; i < nr_snapids; i++) {
570 k = bch2_btree_iter_prev_slot(&iter);
575 if (!k.k || !k.k->p.offset) {
580 n = bch2_trans_kmalloc(trans, sizeof(*n));
581 ret = PTR_ERR_OR_ZERO(n);
585 bkey_snapshot_init(&n->k_i);
588 n->v.parent = cpu_to_le32(parent);
589 n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
591 SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
593 ret = bch2_trans_update(trans, &iter, &n->k_i, 0) ?:
594 bch2_mark_snapshot(trans, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
598 new_snapids[i] = iter.pos.offset;
602 bch2_btree_iter_set_pos(&iter, POS(0, parent));
603 k = bch2_btree_iter_peek(&iter);
608 if (k.k->type != KEY_TYPE_snapshot) {
609 bch_err(trans->c, "snapshot %u not found", parent);
614 n = bch2_trans_kmalloc(trans, sizeof(*n));
615 ret = PTR_ERR_OR_ZERO(n);
619 bkey_reassemble(&n->k_i, k);
621 if (n->v.children[0] || n->v.children[1]) {
622 bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
627 n->v.children[0] = cpu_to_le32(new_snapids[0]);
628 n->v.children[1] = cpu_to_le32(new_snapids[1]);
630 SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
631 ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
636 bch2_trans_iter_exit(trans, &iter);
640 static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
641 snapshot_id_list *deleted,
642 enum btree_id btree_id)
644 struct bch_fs *c = trans->c;
645 struct btree_iter iter;
647 snapshot_id_list equiv_seen = { 0 };
648 struct bpos last_pos = POS_MIN;
652 * XXX: We should also delete whiteouts that no longer overwrite
656 bch2_trans_iter_init(trans, &iter, btree_id, POS_MIN,
659 BTREE_ITER_NOT_EXTENTS|
660 BTREE_ITER_ALL_SNAPSHOTS);
662 while ((bch2_trans_begin(trans),
663 (k = bch2_btree_iter_peek(&iter)).k) &&
664 !(ret = bkey_err(k))) {
665 u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
667 if (bkey_cmp(k.k->p, last_pos))
671 if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
672 snapshot_list_has_id(&equiv_seen, equiv)) {
673 ret = commit_do(trans, NULL, NULL,
675 bch2_btree_iter_traverse(&iter) ?:
676 bch2_btree_delete_at(trans, &iter,
677 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
681 ret = snapshot_list_add(c, &equiv_seen, equiv);
686 bch2_btree_iter_advance(&iter);
688 bch2_trans_iter_exit(trans, &iter);
690 darray_exit(&equiv_seen);
695 int bch2_delete_dead_snapshots(struct bch_fs *c)
697 struct btree_trans trans;
698 struct btree_iter iter;
700 struct bkey_s_c_snapshot snap;
701 snapshot_id_list deleted = { 0 };
702 u32 i, id, children[2];
705 if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
708 if (!test_bit(BCH_FS_STARTED, &c->flags)) {
709 ret = bch2_fs_read_write_early(c);
711 bch_err(c, "error deleleting dead snapshots: error going rw: %i", ret);
716 bch2_trans_init(&trans, c, 0, 0);
719 * For every snapshot node: If we have no live children and it's not
720 * pointed to by a subvolume, delete it:
722 for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
723 POS_MIN, 0, k, ret) {
724 if (k.k->type != KEY_TYPE_snapshot)
727 snap = bkey_s_c_to_snapshot(k);
728 if (BCH_SNAPSHOT_DELETED(snap.v) ||
729 BCH_SNAPSHOT_SUBVOL(snap.v))
732 children[0] = le32_to_cpu(snap.v->children[0]);
733 children[1] = le32_to_cpu(snap.v->children[1]);
735 ret = snapshot_live(&trans, children[0]) ?:
736 snapshot_live(&trans, children[1]);
742 ret = commit_do(&trans, NULL, NULL, 0,
743 bch2_snapshot_node_set_deleted(&trans, iter.pos.offset));
745 bch_err(c, "error deleting snapshot %llu: %i", iter.pos.offset, ret);
749 bch2_trans_iter_exit(&trans, &iter);
752 bch_err(c, "error walking snapshots: %i", ret);
756 ret = bch2_snapshots_set_equiv(&trans);
760 for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
761 POS_MIN, 0, k, ret) {
762 if (k.k->type != KEY_TYPE_snapshot)
765 snap = bkey_s_c_to_snapshot(k);
766 if (BCH_SNAPSHOT_DELETED(snap.v)) {
767 ret = snapshot_list_add(c, &deleted, k.k->p.offset);
772 bch2_trans_iter_exit(&trans, &iter);
775 bch_err(c, "error walking snapshots: %i", ret);
779 for (id = 0; id < BTREE_ID_NR; id++) {
780 if (!btree_type_has_snapshots(id))
783 ret = bch2_snapshot_delete_keys_btree(&trans, &deleted, id);
785 bch_err(c, "error deleting snapshot keys: %i", ret);
790 for (i = 0; i < deleted.nr; i++) {
791 ret = commit_do(&trans, NULL, NULL, 0,
792 bch2_snapshot_node_delete(&trans, deleted.data[i]));
794 bch_err(c, "error deleting snapshot %u: %i",
795 deleted.data[i], ret);
800 clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
802 darray_exit(&deleted);
803 bch2_trans_exit(&trans);
807 static void bch2_delete_dead_snapshots_work(struct work_struct *work)
809 struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
811 bch2_delete_dead_snapshots(c);
812 percpu_ref_put(&c->writes);
815 void bch2_delete_dead_snapshots_async(struct bch_fs *c)
817 if (!percpu_ref_tryget_live(&c->writes))
820 if (!queue_work(system_long_wq, &c->snapshot_delete_work))
821 percpu_ref_put(&c->writes);
824 static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
825 struct btree_trans_commit_hook *h)
827 struct bch_fs *c = trans->c;
829 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
831 if (!test_bit(BCH_FS_FSCK_DONE, &c->flags))
834 bch2_delete_dead_snapshots_async(c);
840 int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
841 int rw, struct printbuf *err)
843 if (bkey_cmp(k.k->p, SUBVOL_POS_MIN) < 0 ||
844 bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0) {
845 prt_printf(err, "invalid pos");
849 if (bkey_val_bytes(k.k) != sizeof(struct bch_subvolume)) {
850 prt_printf(err, "incorrect value size (%zu != %zu)",
851 bkey_val_bytes(k.k), sizeof(struct bch_subvolume));
858 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
861 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
863 prt_printf(out, "root %llu snapshot id %u",
864 le64_to_cpu(s.v->inode),
865 le32_to_cpu(s.v->snapshot));
868 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
869 bool inconsistent_if_not_found,
871 struct bch_subvolume *s)
873 struct btree_iter iter;
877 bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol),
879 k = bch2_btree_iter_peek_slot(&iter);
880 ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -ENOENT;
882 if (ret == -ENOENT && inconsistent_if_not_found)
883 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol);
885 *s = *bkey_s_c_to_subvolume(k).v;
887 bch2_trans_iter_exit(trans, &iter);
891 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
892 struct bch_subvolume *subvol)
894 struct bch_snapshot snap;
896 return snapshot_lookup(trans, snapshot, &snap) ?:
897 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
900 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol,
903 struct bch_subvolume s;
906 ret = bch2_subvolume_get(trans, subvol, true,
908 BTREE_ITER_WITH_UPDATES,
911 *snapid = le32_to_cpu(s.snapshot);
916 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
919 int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
921 struct btree_iter iter;
923 struct bkey_s_c_subvolume subvol;
924 struct btree_trans_commit_hook *h;
928 bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes,
932 k = bch2_btree_iter_peek_slot(&iter);
937 if (k.k->type != KEY_TYPE_subvolume) {
938 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid);
943 subvol = bkey_s_c_to_subvolume(k);
944 snapid = le32_to_cpu(subvol.v->snapshot);
946 ret = bch2_btree_delete_at(trans, &iter, 0);
950 ret = bch2_snapshot_node_set_deleted(trans, snapid);
952 h = bch2_trans_kmalloc(trans, sizeof(*h));
953 ret = PTR_ERR_OR_ZERO(h);
957 h->fn = bch2_delete_dead_snapshots_hook;
958 bch2_trans_commit_hook(trans, h);
960 bch2_trans_iter_exit(trans, &iter);
964 void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
966 struct bch_fs *c = container_of(work, struct bch_fs,
967 snapshot_wait_for_pagecache_and_delete_work);
973 mutex_lock(&c->snapshots_unlinked_lock);
974 s = c->snapshots_unlinked;
975 darray_init(&c->snapshots_unlinked);
976 mutex_unlock(&c->snapshots_unlinked_lock);
981 bch2_evict_subvolume_inodes(c, &s);
983 for (id = s.data; id < s.data + s.nr; id++) {
984 ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
985 bch2_subvolume_delete(&trans, *id));
987 bch_err(c, "error %i deleting subvolume %u", ret, *id);
995 percpu_ref_put(&c->writes);
998 struct subvolume_unlink_hook {
999 struct btree_trans_commit_hook h;
1003 int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
1004 struct btree_trans_commit_hook *_h)
1006 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
1007 struct bch_fs *c = trans->c;
1010 mutex_lock(&c->snapshots_unlinked_lock);
1011 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
1012 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
1013 mutex_unlock(&c->snapshots_unlinked_lock);
1018 if (unlikely(!percpu_ref_tryget_live(&c->writes)))
1021 if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
1022 percpu_ref_put(&c->writes);
1026 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
1028 struct btree_iter iter;
1030 struct bkey_i_subvolume *n;
1031 struct subvolume_unlink_hook *h;
1034 bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes,
1038 k = bch2_btree_iter_peek_slot(&iter);
1043 if (k.k->type != KEY_TYPE_subvolume) {
1044 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid);
1049 n = bch2_trans_kmalloc(trans, sizeof(*n));
1050 ret = PTR_ERR_OR_ZERO(n);
1054 bkey_reassemble(&n->k_i, k);
1055 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
1057 ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
1061 h = bch2_trans_kmalloc(trans, sizeof(*h));
1062 ret = PTR_ERR_OR_ZERO(h);
1066 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook;
1067 h->subvol = subvolid;
1068 bch2_trans_commit_hook(trans, &h->h);
1070 bch2_trans_iter_exit(trans, &iter);
1074 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
1077 u32 *new_snapshotid,
1080 struct bch_fs *c = trans->c;
1081 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
1082 struct bkey_i_subvolume *new_subvol = NULL;
1083 struct bkey_i_subvolume *src_subvol = NULL;
1085 u32 parent = 0, new_nodes[2], snapshot_subvols[2];
1088 for_each_btree_key(trans, dst_iter, BTREE_ID_subvolumes, SUBVOL_POS_MIN,
1089 BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
1090 if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0)
1094 * bch2_subvolume_delete() doesn't flush the btree key cache -
1095 * ideally it would but that's tricky
1097 if (bkey_deleted(k.k) &&
1098 !bch2_btree_key_cache_find(c, BTREE_ID_subvolumes, dst_iter.pos))
1106 snapshot_subvols[0] = dst_iter.pos.offset;
1107 snapshot_subvols[1] = src_subvolid;
1110 /* Creating a snapshot: */
1111 src_subvol = bch2_trans_kmalloc(trans, sizeof(*src_subvol));
1112 ret = PTR_ERR_OR_ZERO(src_subvol);
1116 bch2_trans_iter_init(trans, &src_iter, BTREE_ID_subvolumes,
1117 POS(0, src_subvolid),
1120 k = bch2_btree_iter_peek_slot(&src_iter);
1125 if (k.k->type != KEY_TYPE_subvolume) {
1126 bch_err(c, "subvolume %u not found", src_subvolid);
1131 bkey_reassemble(&src_subvol->k_i, k);
1132 parent = le32_to_cpu(src_subvol->v.snapshot);
1135 ret = bch2_snapshot_node_create(trans, parent, new_nodes,
1137 src_subvolid ? 2 : 1);
1142 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
1143 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
1148 new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
1149 ret = PTR_ERR_OR_ZERO(new_subvol);
1153 bkey_subvolume_init(&new_subvol->k_i);
1154 new_subvol->v.flags = 0;
1155 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
1156 new_subvol->v.inode = cpu_to_le64(inode);
1157 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
1158 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
1159 new_subvol->k.p = dst_iter.pos;
1160 ret = bch2_trans_update(trans, &dst_iter, &new_subvol->k_i, 0);
1164 *new_subvolid = new_subvol->k.p.offset;
1165 *new_snapshotid = new_nodes[0];
1167 bch2_trans_iter_exit(trans, &src_iter);
1168 bch2_trans_iter_exit(trans, &dst_iter);
1172 int bch2_fs_subvolumes_init(struct bch_fs *c)
1174 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
1175 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
1176 bch2_subvolume_wait_for_pagecache_and_delete);
1177 mutex_init(&c->snapshots_unlinked_lock);