]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/subvolume.c
New upstream release
[bcachefs-tools-debian] / libbcachefs / subvolume.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "btree_key_cache.h"
5 #include "btree_update.h"
6 #include "errcode.h"
7 #include "error.h"
8 #include "fs.h"
9 #include "subvolume.h"
10
11 /* Snapshot tree: */
12
13 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
14                            struct bkey_s_c k)
15 {
16         struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
17
18         prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u",
19                BCH_SNAPSHOT_SUBVOL(s.v),
20                BCH_SNAPSHOT_DELETED(s.v),
21                le32_to_cpu(s.v->parent),
22                le32_to_cpu(s.v->children[0]),
23                le32_to_cpu(s.v->children[1]),
24                le32_to_cpu(s.v->subvol));
25 }
26
27 int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
28                           int rw, struct printbuf *err)
29 {
30         struct bkey_s_c_snapshot s;
31         u32 i, id;
32
33         if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0 ||
34             bkey_cmp(k.k->p, POS(0, 1)) < 0) {
35                 prt_printf(err, "bad pos");
36                 return -EINVAL;
37         }
38
39         if (bkey_val_bytes(k.k) != sizeof(struct bch_snapshot)) {
40                 prt_printf(err, "bad val size (%zu != %zu)",
41                        bkey_val_bytes(k.k), sizeof(struct bch_snapshot));
42                 return -EINVAL;
43         }
44
45         s = bkey_s_c_to_snapshot(k);
46
47         id = le32_to_cpu(s.v->parent);
48         if (id && id <= k.k->p.offset) {
49                 prt_printf(err, "bad parent node (%u <= %llu)",
50                        id, k.k->p.offset);
51                 return -EINVAL;
52         }
53
54         if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
55                 prt_printf(err, "children not normalized");
56                 return -EINVAL;
57         }
58
59         if (s.v->children[0] &&
60             s.v->children[0] == s.v->children[1]) {
61                 prt_printf(err, "duplicate child nodes");
62                 return -EINVAL;
63         }
64
65         for (i = 0; i < 2; i++) {
66                 id = le32_to_cpu(s.v->children[i]);
67
68                 if (id >= k.k->p.offset) {
69                         prt_printf(err, "bad child node (%u >= %llu)",
70                                id, k.k->p.offset);
71                         return -EINVAL;
72                 }
73         }
74
75         return 0;
76 }
77
78 int bch2_mark_snapshot(struct btree_trans *trans,
79                        struct bkey_s_c old, struct bkey_s_c new,
80                        unsigned flags)
81 {
82         struct bch_fs *c = trans->c;
83         struct snapshot_t *t;
84
85         t = genradix_ptr_alloc(&c->snapshots,
86                                U32_MAX - new.k->p.offset,
87                                GFP_KERNEL);
88         if (!t)
89                 return -ENOMEM;
90
91         if (new.k->type == KEY_TYPE_snapshot) {
92                 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
93
94                 t->parent       = le32_to_cpu(s.v->parent);
95                 t->children[0]  = le32_to_cpu(s.v->children[0]);
96                 t->children[1]  = le32_to_cpu(s.v->children[1]);
97                 t->subvol       = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
98         } else {
99                 t->parent       = 0;
100                 t->children[0]  = 0;
101                 t->children[1]  = 0;
102                 t->subvol       = 0;
103         }
104
105         return 0;
106 }
107
108 static int snapshot_lookup(struct btree_trans *trans, u32 id,
109                            struct bch_snapshot *s)
110 {
111         struct btree_iter iter;
112         struct bkey_s_c k;
113         int ret;
114
115         bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
116                              BTREE_ITER_WITH_UPDATES);
117         k = bch2_btree_iter_peek_slot(&iter);
118         ret = bkey_err(k) ?: k.k->type == KEY_TYPE_snapshot ? 0 : -ENOENT;
119
120         if (!ret)
121                 *s = *bkey_s_c_to_snapshot(k).v;
122
123         bch2_trans_iter_exit(trans, &iter);
124         return ret;
125 }
126
127 static int snapshot_live(struct btree_trans *trans, u32 id)
128 {
129         struct bch_snapshot v;
130         int ret;
131
132         if (!id)
133                 return 0;
134
135         ret = snapshot_lookup(trans, id, &v);
136         if (ret == -ENOENT)
137                 bch_err(trans->c, "snapshot node %u not found", id);
138         if (ret)
139                 return ret;
140
141         return !BCH_SNAPSHOT_DELETED(&v);
142 }
143
144 static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
145 {
146         struct bch_fs *c = trans->c;
147         unsigned i, nr_live = 0, live_idx = 0;
148         struct bkey_s_c_snapshot snap;
149         u32 id = k.k->p.offset, child[2];
150
151         if (k.k->type != KEY_TYPE_snapshot)
152                 return 0;
153
154         snap = bkey_s_c_to_snapshot(k);
155
156         child[0] = le32_to_cpu(snap.v->children[0]);
157         child[1] = le32_to_cpu(snap.v->children[1]);
158
159         for (i = 0; i < 2; i++) {
160                 int ret = snapshot_live(trans, child[i]);
161
162                 if (ret < 0)
163                         return ret;
164
165                 if (ret)
166                         live_idx = i;
167                 nr_live += ret;
168         }
169
170         snapshot_t(c, id)->equiv = nr_live == 1
171                 ? snapshot_t(c, child[live_idx])->equiv
172                 : id;
173         return 0;
174 }
175
176 /* fsck: */
177 static int check_snapshot(struct btree_trans *trans,
178                           struct btree_iter *iter,
179                           struct bkey_s_c k)
180 {
181         struct bch_fs *c = trans->c;
182         struct bkey_s_c_snapshot s;
183         struct bch_subvolume subvol;
184         struct bch_snapshot v;
185         struct printbuf buf = PRINTBUF;
186         bool should_have_subvol;
187         u32 i, id;
188         int ret = 0;
189
190         if (k.k->type != KEY_TYPE_snapshot)
191                 return 0;
192
193         s = bkey_s_c_to_snapshot(k);
194         id = le32_to_cpu(s.v->parent);
195         if (id) {
196                 ret = snapshot_lookup(trans, id, &v);
197                 if (ret == -ENOENT)
198                         bch_err(c, "snapshot with nonexistent parent:\n  %s",
199                                 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
200                 if (ret)
201                         goto err;
202
203                 if (le32_to_cpu(v.children[0]) != s.k->p.offset &&
204                     le32_to_cpu(v.children[1]) != s.k->p.offset) {
205                         bch_err(c, "snapshot parent %u missing pointer to child %llu",
206                                 id, s.k->p.offset);
207                         ret = -EINVAL;
208                         goto err;
209                 }
210         }
211
212         for (i = 0; i < 2 && s.v->children[i]; i++) {
213                 id = le32_to_cpu(s.v->children[i]);
214
215                 ret = snapshot_lookup(trans, id, &v);
216                 if (ret == -ENOENT)
217                         bch_err(c, "snapshot node %llu has nonexistent child %u",
218                                 s.k->p.offset, id);
219                 if (ret)
220                         goto err;
221
222                 if (le32_to_cpu(v.parent) != s.k->p.offset) {
223                         bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
224                                 id, le32_to_cpu(v.parent), s.k->p.offset);
225                         ret = -EINVAL;
226                         goto err;
227                 }
228         }
229
230         should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) &&
231                 !BCH_SNAPSHOT_DELETED(s.v);
232
233         if (should_have_subvol) {
234                 id = le32_to_cpu(s.v->subvol);
235                 ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
236                 if (ret == -ENOENT)
237                         bch_err(c, "snapshot points to nonexistent subvolume:\n  %s",
238                                 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
239                 if (ret)
240                         goto err;
241
242                 if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
243                         bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
244                                 s.k->p.offset);
245                         ret = -EINVAL;
246                         goto err;
247                 }
248         } else {
249                 if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n  %s",
250                                 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
251                         struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u));
252
253                         ret = PTR_ERR_OR_ZERO(u);
254                         if (ret)
255                                 goto err;
256
257                         bkey_reassemble(&u->k_i, s.s_c);
258                         u->v.subvol = 0;
259                         ret = bch2_trans_update(trans, iter, &u->k_i, 0);
260                         if (ret)
261                                 goto err;
262                 }
263         }
264
265         if (BCH_SNAPSHOT_DELETED(s.v))
266                 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
267 err:
268 fsck_err:
269         printbuf_exit(&buf);
270         return ret;
271 }
272
273 int bch2_fs_check_snapshots(struct bch_fs *c)
274 {
275         struct btree_trans trans;
276         struct btree_iter iter;
277         struct bkey_s_c k;
278         int ret;
279
280         bch2_trans_init(&trans, c, 0, 0);
281
282         ret = for_each_btree_key_commit(&trans, iter,
283                         BTREE_ID_snapshots, POS_MIN,
284                         BTREE_ITER_PREFETCH, k,
285                         NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
286                 check_snapshot(&trans, &iter, k));
287
288         if (ret)
289                 bch_err(c, "error %i checking snapshots", ret);
290
291         bch2_trans_exit(&trans);
292         return ret;
293 }
294
295 static int check_subvol(struct btree_trans *trans,
296                         struct btree_iter *iter,
297                         struct bkey_s_c k)
298 {
299         struct bkey_s_c_subvolume subvol;
300         struct bch_snapshot snapshot;
301         unsigned snapid;
302         int ret;
303
304         if (k.k->type != KEY_TYPE_subvolume)
305                 return 0;
306
307         subvol = bkey_s_c_to_subvolume(k);
308         snapid = le32_to_cpu(subvol.v->snapshot);
309         ret = snapshot_lookup(trans, snapid, &snapshot);
310
311         if (ret == -ENOENT)
312                 bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u",
313                         k.k->p.offset, snapid);
314         if (ret)
315                 return ret;
316
317         if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
318                 ret = bch2_subvolume_delete(trans, iter->pos.offset);
319                 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
320                         bch_err(trans->c, "error deleting subvolume %llu: %s",
321                                 iter->pos.offset, bch2_err_str(ret));
322                 if (ret)
323                         return ret;
324         }
325
326         return 0;
327 }
328
329 int bch2_fs_check_subvols(struct bch_fs *c)
330 {
331         struct btree_trans trans;
332         struct btree_iter iter;
333         struct bkey_s_c k;
334         int ret;
335
336         bch2_trans_init(&trans, c, 0, 0);
337
338         ret = for_each_btree_key_commit(&trans, iter,
339                         BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
340                         NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
341                 check_subvol(&trans, &iter, k));
342
343         bch2_trans_exit(&trans);
344
345         return ret;
346 }
347
348 void bch2_fs_snapshots_exit(struct bch_fs *c)
349 {
350         genradix_free(&c->snapshots);
351 }
352
353 int bch2_fs_snapshots_start(struct bch_fs *c)
354 {
355         struct btree_trans trans;
356         struct btree_iter iter;
357         struct bkey_s_c k;
358         int ret = 0;
359
360         bch2_trans_init(&trans, c, 0, 0);
361
362         for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
363                            POS_MIN, 0, k,
364                 bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0) ?:
365                 bch2_snapshot_set_equiv(&trans, k));
366
367         bch2_trans_exit(&trans);
368
369         if (ret)
370                 bch_err(c, "error starting snapshots: %s", bch2_err_str(ret));
371         return ret;
372 }
373
374 /*
375  * Mark a snapshot as deleted, for future cleanup:
376  */
377 static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
378 {
379         struct btree_iter iter;
380         struct bkey_s_c k;
381         struct bkey_i_snapshot *s;
382         int ret = 0;
383
384         bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
385                              BTREE_ITER_INTENT);
386         k = bch2_btree_iter_peek_slot(&iter);
387         ret = bkey_err(k);
388         if (ret)
389                 goto err;
390
391         if (k.k->type != KEY_TYPE_snapshot) {
392                 bch2_fs_inconsistent(trans->c, "missing snapshot %u", id);
393                 ret = -ENOENT;
394                 goto err;
395         }
396
397         /* already deleted? */
398         if (BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v))
399                 goto err;
400
401         s = bch2_trans_kmalloc(trans, sizeof(*s));
402         ret = PTR_ERR_OR_ZERO(s);
403         if (ret)
404                 goto err;
405
406         bkey_reassemble(&s->k_i, k);
407         SET_BCH_SNAPSHOT_DELETED(&s->v, true);
408         SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
409         s->v.subvol = 0;
410
411         ret = bch2_trans_update(trans, &iter, &s->k_i, 0);
412         if (ret)
413                 goto err;
414 err:
415         bch2_trans_iter_exit(trans, &iter);
416         return ret;
417 }
418
419 static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
420 {
421         struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
422         struct bkey_s_c k;
423         struct bkey_s_c_snapshot s;
424         struct bkey_i_snapshot *parent;
425         u32 parent_id;
426         unsigned i;
427         int ret = 0;
428
429         bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
430                              BTREE_ITER_INTENT);
431         k = bch2_btree_iter_peek_slot(&iter);
432         ret = bkey_err(k);
433         if (ret)
434                 goto err;
435
436         if (k.k->type != KEY_TYPE_snapshot) {
437                 bch2_fs_inconsistent(trans->c, "missing snapshot %u", id);
438                 ret = -ENOENT;
439                 goto err;
440         }
441
442         s = bkey_s_c_to_snapshot(k);
443
444         BUG_ON(!BCH_SNAPSHOT_DELETED(s.v));
445         parent_id = le32_to_cpu(s.v->parent);
446
447         if (parent_id) {
448                 bch2_trans_iter_init(trans, &p_iter, BTREE_ID_snapshots,
449                                      POS(0, parent_id),
450                                      BTREE_ITER_INTENT);
451                 k = bch2_btree_iter_peek_slot(&p_iter);
452                 ret = bkey_err(k);
453                 if (ret)
454                         goto err;
455
456                 if (k.k->type != KEY_TYPE_snapshot) {
457                         bch2_fs_inconsistent(trans->c, "missing snapshot %u", parent_id);
458                         ret = -ENOENT;
459                         goto err;
460                 }
461
462                 parent = bch2_trans_kmalloc(trans, sizeof(*parent));
463                 ret = PTR_ERR_OR_ZERO(parent);
464                 if (ret)
465                         goto err;
466
467                 bkey_reassemble(&parent->k_i, k);
468
469                 for (i = 0; i < 2; i++)
470                         if (le32_to_cpu(parent->v.children[i]) == id)
471                                 break;
472
473                 if (i == 2)
474                         bch_err(trans->c, "snapshot %u missing child pointer to %u",
475                                 parent_id, id);
476                 else
477                         parent->v.children[i] = 0;
478
479                 if (le32_to_cpu(parent->v.children[0]) <
480                     le32_to_cpu(parent->v.children[1]))
481                         swap(parent->v.children[0],
482                              parent->v.children[1]);
483
484                 ret = bch2_trans_update(trans, &p_iter, &parent->k_i, 0);
485                 if (ret)
486                         goto err;
487         }
488
489         ret = bch2_btree_delete_at(trans, &iter, 0);
490 err:
491         bch2_trans_iter_exit(trans, &p_iter);
492         bch2_trans_iter_exit(trans, &iter);
493         return ret;
494 }
495
496 int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
497                               u32 *new_snapids,
498                               u32 *snapshot_subvols,
499                               unsigned nr_snapids)
500 {
501         struct btree_iter iter;
502         struct bkey_i_snapshot *n;
503         struct bkey_s_c k;
504         unsigned i;
505         int ret = 0;
506
507         bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
508                              POS_MIN, BTREE_ITER_INTENT);
509         k = bch2_btree_iter_peek(&iter);
510         ret = bkey_err(k);
511         if (ret)
512                 goto err;
513
514         for (i = 0; i < nr_snapids; i++) {
515                 k = bch2_btree_iter_prev_slot(&iter);
516                 ret = bkey_err(k);
517                 if (ret)
518                         goto err;
519
520                 if (!k.k || !k.k->p.offset) {
521                         ret = -BCH_ERR_ENOSPC_snapshot_create;
522                         goto err;
523                 }
524
525                 n = bch2_trans_kmalloc(trans, sizeof(*n));
526                 ret = PTR_ERR_OR_ZERO(n);
527                 if (ret)
528                         goto err;
529
530                 bkey_snapshot_init(&n->k_i);
531                 n->k.p          = iter.pos;
532                 n->v.flags      = 0;
533                 n->v.parent     = cpu_to_le32(parent);
534                 n->v.subvol     = cpu_to_le32(snapshot_subvols[i]);
535                 n->v.pad        = 0;
536                 SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
537
538                 ret   = bch2_trans_update(trans, &iter, &n->k_i, 0) ?:
539                         bch2_mark_snapshot(trans, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
540                 if (ret)
541                         goto err;
542
543                 new_snapids[i]  = iter.pos.offset;
544         }
545
546         if (parent) {
547                 bch2_btree_iter_set_pos(&iter, POS(0, parent));
548                 k = bch2_btree_iter_peek(&iter);
549                 ret = bkey_err(k);
550                 if (ret)
551                         goto err;
552
553                 if (k.k->type != KEY_TYPE_snapshot) {
554                         bch_err(trans->c, "snapshot %u not found", parent);
555                         ret = -ENOENT;
556                         goto err;
557                 }
558
559                 n = bch2_trans_kmalloc(trans, sizeof(*n));
560                 ret = PTR_ERR_OR_ZERO(n);
561                 if (ret)
562                         goto err;
563
564                 bkey_reassemble(&n->k_i, k);
565
566                 if (n->v.children[0] || n->v.children[1]) {
567                         bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
568                         ret = -EINVAL;
569                         goto err;
570                 }
571
572                 n->v.children[0] = cpu_to_le32(new_snapids[0]);
573                 n->v.children[1] = cpu_to_le32(new_snapids[1]);
574                 n->v.subvol = 0;
575                 SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
576                 ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
577                 if (ret)
578                         goto err;
579         }
580 err:
581         bch2_trans_iter_exit(trans, &iter);
582         return ret;
583 }
584
585 static int snapshot_delete_key(struct btree_trans *trans,
586                                struct btree_iter *iter,
587                                struct bkey_s_c k,
588                                snapshot_id_list *deleted,
589                                snapshot_id_list *equiv_seen,
590                                struct bpos *last_pos)
591 {
592         struct bch_fs *c = trans->c;
593         u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
594
595         if (bkey_cmp(k.k->p, *last_pos))
596                 equiv_seen->nr = 0;
597         *last_pos = k.k->p;
598
599         if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
600             snapshot_list_has_id(equiv_seen, equiv)) {
601                 return bch2_btree_delete_at(trans, iter,
602                                             BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
603         } else {
604                 return snapshot_list_add(c, equiv_seen, equiv);
605         }
606 }
607
608 static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
609                                           struct bkey_s_c k)
610 {
611         struct bkey_s_c_snapshot snap;
612         u32 children[2];
613         int ret;
614
615         if (k.k->type != KEY_TYPE_snapshot)
616                 return 0;
617
618         snap = bkey_s_c_to_snapshot(k);
619         if (BCH_SNAPSHOT_DELETED(snap.v) ||
620             BCH_SNAPSHOT_SUBVOL(snap.v))
621                 return 0;
622
623         children[0] = le32_to_cpu(snap.v->children[0]);
624         children[1] = le32_to_cpu(snap.v->children[1]);
625
626         ret   = snapshot_live(trans, children[0]) ?:
627                 snapshot_live(trans, children[1]);
628         if (ret < 0)
629                 return ret;
630
631         if (!ret)
632                 return bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
633         return 0;
634 }
635
636 int bch2_delete_dead_snapshots(struct bch_fs *c)
637 {
638         struct btree_trans trans;
639         struct btree_iter iter;
640         struct bkey_s_c k;
641         struct bkey_s_c_snapshot snap;
642         snapshot_id_list deleted = { 0 };
643         u32 i, id;
644         int ret = 0;
645
646         if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
647                 return 0;
648
649         if (!test_bit(BCH_FS_STARTED, &c->flags)) {
650                 ret = bch2_fs_read_write_early(c);
651                 if (ret) {
652                         bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
653                         return ret;
654                 }
655         }
656
657         bch2_trans_init(&trans, c, 0, 0);
658
659         /*
660          * For every snapshot node: If we have no live children and it's not
661          * pointed to by a subvolume, delete it:
662          */
663         ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
664                         POS_MIN, 0, k,
665                         NULL, NULL, 0,
666                 bch2_delete_redundant_snapshot(&trans, &iter, k));
667         if (ret) {
668                 bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
669                 goto err;
670         }
671
672         for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
673                            POS_MIN, 0, k,
674                 bch2_snapshot_set_equiv(&trans, k));
675         if (ret) {
676                 bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
677                 goto err;
678         }
679
680         for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
681                            POS_MIN, 0, k, ret) {
682                 if (k.k->type != KEY_TYPE_snapshot)
683                         continue;
684
685                 snap = bkey_s_c_to_snapshot(k);
686                 if (BCH_SNAPSHOT_DELETED(snap.v)) {
687                         ret = snapshot_list_add(c, &deleted, k.k->p.offset);
688                         if (ret)
689                                 break;
690                 }
691         }
692         bch2_trans_iter_exit(&trans, &iter);
693
694         if (ret) {
695                 bch_err(c, "error walking snapshots: %s", bch2_err_str(ret));
696                 goto err;
697         }
698
699         for (id = 0; id < BTREE_ID_NR; id++) {
700                 struct bpos last_pos = POS_MIN;
701                 snapshot_id_list equiv_seen = { 0 };
702
703                 if (!btree_type_has_snapshots(id))
704                         continue;
705
706                 ret = for_each_btree_key_commit(&trans, iter,
707                                 id, POS_MIN,
708                                 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
709                                 NULL, NULL, BTREE_INSERT_NOFAIL,
710                         snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos));
711
712                 darray_exit(&equiv_seen);
713
714                 if (ret) {
715                         bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret));
716                         goto err;
717                 }
718         }
719
720         for (i = 0; i < deleted.nr; i++) {
721                 ret = commit_do(&trans, NULL, NULL, 0,
722                         bch2_snapshot_node_delete(&trans, deleted.data[i]));
723                 if (ret) {
724                         bch_err(c, "error deleting snapshot %u: %s",
725                                 deleted.data[i], bch2_err_str(ret));
726                         goto err;
727                 }
728         }
729
730         clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
731 err:
732         darray_exit(&deleted);
733         bch2_trans_exit(&trans);
734         return ret;
735 }
736
737 static void bch2_delete_dead_snapshots_work(struct work_struct *work)
738 {
739         struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
740
741         bch2_delete_dead_snapshots(c);
742         percpu_ref_put(&c->writes);
743 }
744
745 void bch2_delete_dead_snapshots_async(struct bch_fs *c)
746 {
747         if (!percpu_ref_tryget_live(&c->writes))
748                 return;
749
750         if (!queue_work(system_long_wq, &c->snapshot_delete_work))
751                 percpu_ref_put(&c->writes);
752 }
753
754 static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
755                                            struct btree_trans_commit_hook *h)
756 {
757         struct bch_fs *c = trans->c;
758
759         set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
760
761         if (!test_bit(BCH_FS_FSCK_DONE, &c->flags))
762                 return 0;
763
764         bch2_delete_dead_snapshots_async(c);
765         return 0;
766 }
767
768 /* Subvolumes: */
769
770 int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
771                            int rw, struct printbuf *err)
772 {
773         if (bkey_cmp(k.k->p, SUBVOL_POS_MIN) < 0 ||
774             bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0) {
775                 prt_printf(err, "invalid pos");
776                 return -EINVAL;
777         }
778
779         if (bkey_val_bytes(k.k) != sizeof(struct bch_subvolume)) {
780                 prt_printf(err, "incorrect value size (%zu != %zu)",
781                        bkey_val_bytes(k.k), sizeof(struct bch_subvolume));
782                 return -EINVAL;
783         }
784
785         return 0;
786 }
787
788 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
789                             struct bkey_s_c k)
790 {
791         struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
792
793         prt_printf(out, "root %llu snapshot id %u",
794                le64_to_cpu(s.v->inode),
795                le32_to_cpu(s.v->snapshot));
796 }
797
798 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
799                        bool inconsistent_if_not_found,
800                        int iter_flags,
801                        struct bch_subvolume *s)
802 {
803         struct btree_iter iter;
804         struct bkey_s_c k;
805         int ret;
806
807         bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol),
808                              iter_flags);
809         k = bch2_btree_iter_peek_slot(&iter);
810         ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -ENOENT;
811
812         if (ret == -ENOENT && inconsistent_if_not_found)
813                 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol);
814         if (!ret)
815                 *s = *bkey_s_c_to_subvolume(k).v;
816
817         bch2_trans_iter_exit(trans, &iter);
818         return ret;
819 }
820
821 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
822                              struct bch_subvolume *subvol)
823 {
824         struct bch_snapshot snap;
825
826         return  snapshot_lookup(trans, snapshot, &snap) ?:
827                 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
828 }
829
830 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol,
831                                 u32 *snapid)
832 {
833         struct bch_subvolume s;
834         int ret;
835
836         ret = bch2_subvolume_get(trans, subvol, true,
837                                  BTREE_ITER_CACHED|
838                                  BTREE_ITER_WITH_UPDATES,
839                                  &s);
840
841         *snapid = le32_to_cpu(s.snapshot);
842         return ret;
843 }
844
845 /*
846  * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
847  * deletion/cleanup:
848  */
849 int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
850 {
851         struct btree_iter iter;
852         struct bkey_s_c k;
853         struct bkey_s_c_subvolume subvol;
854         struct btree_trans_commit_hook *h;
855         u32 snapid;
856         int ret = 0;
857
858         bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes,
859                              POS(0, subvolid),
860                              BTREE_ITER_CACHED|
861                              BTREE_ITER_INTENT);
862         k = bch2_btree_iter_peek_slot(&iter);
863         ret = bkey_err(k);
864         if (ret)
865                 goto err;
866
867         if (k.k->type != KEY_TYPE_subvolume) {
868                 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid);
869                 ret = -EIO;
870                 goto err;
871         }
872
873         subvol = bkey_s_c_to_subvolume(k);
874         snapid = le32_to_cpu(subvol.v->snapshot);
875
876         ret = bch2_btree_delete_at(trans, &iter, 0);
877         if (ret)
878                 goto err;
879
880         ret = bch2_snapshot_node_set_deleted(trans, snapid);
881         if (ret)
882                 goto err;
883
884         h = bch2_trans_kmalloc(trans, sizeof(*h));
885         ret = PTR_ERR_OR_ZERO(h);
886         if (ret)
887                 goto err;
888
889         h->fn = bch2_delete_dead_snapshots_hook;
890         bch2_trans_commit_hook(trans, h);
891 err:
892         bch2_trans_iter_exit(trans, &iter);
893         return ret;
894 }
895
896 void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
897 {
898         struct bch_fs *c = container_of(work, struct bch_fs,
899                                 snapshot_wait_for_pagecache_and_delete_work);
900         snapshot_id_list s;
901         u32 *id;
902         int ret = 0;
903
904         while (!ret) {
905                 mutex_lock(&c->snapshots_unlinked_lock);
906                 s = c->snapshots_unlinked;
907                 darray_init(&c->snapshots_unlinked);
908                 mutex_unlock(&c->snapshots_unlinked_lock);
909
910                 if (!s.nr)
911                         break;
912
913                 bch2_evict_subvolume_inodes(c, &s);
914
915                 for (id = s.data; id < s.data + s.nr; id++) {
916                         ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
917                                       bch2_subvolume_delete(&trans, *id));
918                         if (ret) {
919                                 bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
920                                 break;
921                         }
922                 }
923
924                 darray_exit(&s);
925         }
926
927         percpu_ref_put(&c->writes);
928 }
929
930 struct subvolume_unlink_hook {
931         struct btree_trans_commit_hook  h;
932         u32                             subvol;
933 };
934
935 int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
936                                                       struct btree_trans_commit_hook *_h)
937 {
938         struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
939         struct bch_fs *c = trans->c;
940         int ret = 0;
941
942         mutex_lock(&c->snapshots_unlinked_lock);
943         if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
944                 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
945         mutex_unlock(&c->snapshots_unlinked_lock);
946
947         if (ret)
948                 return ret;
949
950         if (unlikely(!percpu_ref_tryget_live(&c->writes)))
951                 return -EROFS;
952
953         if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
954                 percpu_ref_put(&c->writes);
955         return 0;
956 }
957
958 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
959 {
960         struct btree_iter iter;
961         struct bkey_s_c k;
962         struct bkey_i_subvolume *n;
963         struct subvolume_unlink_hook *h;
964         int ret = 0;
965
966         bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes,
967                              POS(0, subvolid),
968                              BTREE_ITER_CACHED|
969                              BTREE_ITER_INTENT);
970         k = bch2_btree_iter_peek_slot(&iter);
971         ret = bkey_err(k);
972         if (ret)
973                 goto err;
974
975         if (k.k->type != KEY_TYPE_subvolume) {
976                 bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid);
977                 ret = -EIO;
978                 goto err;
979         }
980
981         n = bch2_trans_kmalloc(trans, sizeof(*n));
982         ret = PTR_ERR_OR_ZERO(n);
983         if (ret)
984                 goto err;
985
986         bkey_reassemble(&n->k_i, k);
987         SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
988
989         ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
990         if (ret)
991                 goto err;
992
993         h = bch2_trans_kmalloc(trans, sizeof(*h));
994         ret = PTR_ERR_OR_ZERO(h);
995         if (ret)
996                 goto err;
997
998         h->h.fn         = bch2_subvolume_wait_for_pagecache_and_delete_hook;
999         h->subvol       = subvolid;
1000         bch2_trans_commit_hook(trans, &h->h);
1001 err:
1002         bch2_trans_iter_exit(trans, &iter);
1003         return ret;
1004 }
1005
1006 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
1007                           u32 src_subvolid,
1008                           u32 *new_subvolid,
1009                           u32 *new_snapshotid,
1010                           bool ro)
1011 {
1012         struct bch_fs *c = trans->c;
1013         struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
1014         struct bkey_i_subvolume *new_subvol = NULL;
1015         struct bkey_i_subvolume *src_subvol = NULL;
1016         struct bkey_s_c k;
1017         u32 parent = 0, new_nodes[2], snapshot_subvols[2];
1018         int ret = 0;
1019
1020         for_each_btree_key(trans, dst_iter, BTREE_ID_subvolumes, SUBVOL_POS_MIN,
1021                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
1022                 if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0)
1023                         break;
1024
1025                 /*
1026                  * bch2_subvolume_delete() doesn't flush the btree key cache -
1027                  * ideally it would but that's tricky
1028                  */
1029                 if (bkey_deleted(k.k) &&
1030                     !bch2_btree_key_cache_find(c, BTREE_ID_subvolumes, dst_iter.pos))
1031                         goto found_slot;
1032         }
1033
1034         if (!ret)
1035                 ret = -BCH_ERR_ENOSPC_subvolume_create;
1036         goto err;
1037 found_slot:
1038         snapshot_subvols[0] = dst_iter.pos.offset;
1039         snapshot_subvols[1] = src_subvolid;
1040
1041         if (src_subvolid) {
1042                 /* Creating a snapshot: */
1043                 src_subvol = bch2_trans_kmalloc(trans, sizeof(*src_subvol));
1044                 ret = PTR_ERR_OR_ZERO(src_subvol);
1045                 if (ret)
1046                         goto err;
1047
1048                 bch2_trans_iter_init(trans, &src_iter, BTREE_ID_subvolumes,
1049                                      POS(0, src_subvolid),
1050                                      BTREE_ITER_CACHED|
1051                                      BTREE_ITER_INTENT);
1052                 k = bch2_btree_iter_peek_slot(&src_iter);
1053                 ret = bkey_err(k);
1054                 if (ret)
1055                         goto err;
1056
1057                 if (k.k->type != KEY_TYPE_subvolume) {
1058                         bch_err(c, "subvolume %u not found", src_subvolid);
1059                         ret = -ENOENT;
1060                         goto err;
1061                 }
1062
1063                 bkey_reassemble(&src_subvol->k_i, k);
1064                 parent = le32_to_cpu(src_subvol->v.snapshot);
1065         }
1066
1067         ret = bch2_snapshot_node_create(trans, parent, new_nodes,
1068                                         snapshot_subvols,
1069                                         src_subvolid ? 2 : 1);
1070         if (ret)
1071                 goto err;
1072
1073         if (src_subvolid) {
1074                 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
1075                 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
1076                 if (ret)
1077                         goto err;
1078         }
1079
1080         new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
1081         ret = PTR_ERR_OR_ZERO(new_subvol);
1082         if (ret)
1083                 goto err;
1084
1085         bkey_subvolume_init(&new_subvol->k_i);
1086         new_subvol->v.flags     = 0;
1087         new_subvol->v.snapshot  = cpu_to_le32(new_nodes[0]);
1088         new_subvol->v.inode     = cpu_to_le64(inode);
1089         SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
1090         SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
1091         new_subvol->k.p         = dst_iter.pos;
1092         ret = bch2_trans_update(trans, &dst_iter, &new_subvol->k_i, 0);
1093         if (ret)
1094                 goto err;
1095
1096         *new_subvolid   = new_subvol->k.p.offset;
1097         *new_snapshotid = new_nodes[0];
1098 err:
1099         bch2_trans_iter_exit(trans, &src_iter);
1100         bch2_trans_iter_exit(trans, &dst_iter);
1101         return ret;
1102 }
1103
1104 int bch2_fs_subvolumes_init(struct bch_fs *c)
1105 {
1106         INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
1107         INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
1108                   bch2_subvolume_wait_for_pagecache_and_delete);
1109         mutex_init(&c->snapshots_unlinked_lock);
1110         return 0;
1111 }