]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/backpointers.c
Update bcachefs sources to ed2a5f4260 bcachefs: Add a missing bch2_btree_path_travers...
[bcachefs-tools-debian] / libbcachefs / backpointers.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "btree_cache.h"
7 #include "btree_update.h"
8 #include "error.h"
9
10 #include <linux/mm.h>
11
12 /*
13  * Convert from pos in backpointer btree to pos of corresponding bucket in alloc
14  * btree:
15  */
16 static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
17                                            struct bpos bp_pos)
18 {
19         struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode);
20         u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
21
22         return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
23 }
24
25 /*
26  * Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
27  */
28 static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
29                                            struct bpos bucket,
30                                            u64 bucket_offset)
31 {
32         struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
33         struct bpos ret;
34
35         ret = POS(bucket.inode,
36                   (bucket_to_sector(ca, bucket.offset) <<
37                    MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
38
39         BUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
40
41         return ret;
42 }
43
44 static bool extent_matches_bp(struct bch_fs *c,
45                               enum btree_id btree_id, unsigned level,
46                               struct bkey_s_c k,
47                               struct bpos bucket,
48                               struct bch_backpointer bp)
49 {
50         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
51         const union bch_extent_entry *entry;
52         struct extent_ptr_decoded p;
53
54         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
55                 struct bpos bucket2;
56                 struct bch_backpointer bp2;
57
58                 if (p.ptr.cached)
59                         continue;
60
61                 bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
62                                       &bucket2, &bp2);
63                 if (bpos_eq(bucket, bucket2) &&
64                     !memcmp(&bp, &bp2, sizeof(bp)))
65                         return true;
66         }
67
68         return false;
69 }
70
71 int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
72                              int rw, struct printbuf *err)
73 {
74         struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
75         struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
76
77         if (bkey_val_bytes(bp.k) < sizeof(*bp.v)) {
78                 prt_str(err, "incorrect value size");
79                 return -BCH_ERR_invalid_bkey;
80         }
81
82         if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
83                 prt_str(err, "backpointer at wrong pos");
84                 return -BCH_ERR_invalid_bkey;
85         }
86
87         return 0;
88 }
89
90 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
91 {
92         prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
93                bch2_btree_ids[bp->btree_id],
94                bp->level,
95                (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
96                (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
97                bp->bucket_len);
98         bch2_bpos_to_text(out, bp->pos);
99 }
100
101 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
102 {
103         bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
104 }
105
106 void bch2_backpointer_swab(struct bkey_s k)
107 {
108         struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
109
110         bp.v->bucket_offset     = swab32(bp.v->bucket_offset);
111         bp.v->bucket_len        = swab32(bp.v->bucket_len);
112         bch2_bpos_swab(&bp.v->pos);
113 }
114
115 #define BACKPOINTER_OFFSET_MAX  ((1ULL << 40) - 1)
116
117 static inline int backpointer_cmp(struct bch_backpointer l, struct bch_backpointer r)
118 {
119         return cmp_int(l.bucket_offset, r.bucket_offset);
120 }
121
122 static int bch2_backpointer_del_by_offset(struct btree_trans *trans,
123                                           struct bpos bucket,
124                                           u64 bp_offset,
125                                           struct bch_backpointer bp)
126 {
127         struct bch_fs *c = trans->c;
128         struct btree_iter iter;
129         struct bkey_s_c k;
130         int ret;
131
132         if (bp_offset < BACKPOINTER_OFFSET_MAX) {
133                 struct bch_backpointer *bps;
134                 struct bkey_i_alloc_v4 *a;
135                 unsigned i, nr;
136
137                 bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
138                                      bucket,
139                                      BTREE_ITER_INTENT|
140                                      BTREE_ITER_SLOTS|
141                                      BTREE_ITER_WITH_UPDATES);
142                 k = bch2_btree_iter_peek_slot(&iter);
143                 ret = bkey_err(k);
144                 if (ret)
145                         goto err;
146
147                 if (k.k->type != KEY_TYPE_alloc_v4) {
148                         ret = -ENOENT;
149                         goto err;
150                 }
151
152                 a = bch2_alloc_to_v4_mut(trans, k);
153                 ret = PTR_ERR_OR_ZERO(a);
154                 if (ret)
155                         goto err;
156                 bps = alloc_v4_backpointers(&a->v);
157                 nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
158
159                 for (i = 0; i < nr; i++) {
160                         if (bps[i].bucket_offset == bp_offset)
161                                 goto found;
162                         if (bps[i].bucket_offset > bp_offset)
163                                 break;
164                 }
165
166                 ret = -ENOENT;
167                 goto err;
168 found:
169                 if (memcmp(&bps[i], &bp, sizeof(bp))) {
170                         ret = -ENOENT;
171                         goto err;
172                 }
173                 array_remove_item(bps, nr, i);
174                 SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
175                 set_alloc_v4_u64s(a);
176                 ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
177         } else {
178                 bp_offset -= BACKPOINTER_OFFSET_MAX;
179
180                 bch2_trans_iter_init(trans, &iter, BTREE_ID_backpointers,
181                                      bucket_pos_to_bp(c, bucket, bp_offset),
182                                      BTREE_ITER_INTENT|
183                                      BTREE_ITER_SLOTS|
184                                      BTREE_ITER_WITH_UPDATES);
185                 k = bch2_btree_iter_peek_slot(&iter);
186                 ret = bkey_err(k);
187                 if (ret)
188                         goto err;
189
190                 if (k.k->type != KEY_TYPE_backpointer ||
191                     memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
192                         ret = -ENOENT;
193                         goto err;
194                 }
195
196                 ret = bch2_btree_delete_at(trans, &iter, 0);
197         }
198 err:
199         bch2_trans_iter_exit(trans, &iter);
200         return ret;
201 }
202
203 int bch2_bucket_backpointer_del(struct btree_trans *trans,
204                                 struct bkey_i_alloc_v4 *a,
205                                 struct bch_backpointer bp,
206                                 struct bkey_s_c orig_k)
207 {
208         struct bch_fs *c = trans->c;
209         struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
210         unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
211         struct btree_iter bp_iter;
212         struct bkey_s_c k;
213         int ret;
214
215         for (i = 0; i < nr; i++) {
216                 int cmp = backpointer_cmp(bps[i], bp) ?:
217                         memcmp(&bps[i], &bp, sizeof(bp));
218                 if (!cmp)
219                         goto found;
220                 if (cmp >= 0)
221                         break;
222         }
223
224         goto btree;
225 found:
226         array_remove_item(bps, nr, i);
227         SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
228         set_alloc_v4_u64s(a);
229         return 0;
230 btree:
231         bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
232                              bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
233                              BTREE_ITER_INTENT|
234                              BTREE_ITER_SLOTS|
235                              BTREE_ITER_WITH_UPDATES);
236         k = bch2_btree_iter_peek_slot(&bp_iter);
237         ret = bkey_err(k);
238         if (ret)
239                 goto err;
240
241         if (k.k->type != KEY_TYPE_backpointer ||
242             memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
243                 struct printbuf buf = PRINTBUF;
244
245                 prt_printf(&buf, "backpointer not found when deleting");
246                 prt_newline(&buf);
247                 printbuf_indent_add(&buf, 2);
248
249                 prt_printf(&buf, "searching for ");
250                 bch2_backpointer_to_text(&buf, &bp);
251                 prt_newline(&buf);
252
253                 prt_printf(&buf, "got ");
254                 bch2_bkey_val_to_text(&buf, c, k);
255                 prt_newline(&buf);
256
257                 prt_str(&buf, "alloc ");
258                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
259                 prt_newline(&buf);
260
261                 prt_printf(&buf, "for ");
262                 bch2_bkey_val_to_text(&buf, c, orig_k);
263
264                 bch_err(c, "%s", buf.buf);
265                 if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
266                         bch2_inconsistent_error(c);
267                         ret = -EIO;
268                 }
269                 printbuf_exit(&buf);
270                 goto err;
271         }
272
273         ret = bch2_btree_delete_at(trans, &bp_iter, 0);
274 err:
275         bch2_trans_iter_exit(trans, &bp_iter);
276         return ret;
277 }
278
279 int bch2_bucket_backpointer_add(struct btree_trans *trans,
280                                 struct bkey_i_alloc_v4 *a,
281                                 struct bch_backpointer bp,
282                                 struct bkey_s_c orig_k)
283 {
284         struct bch_fs *c = trans->c;
285         struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
286         unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
287         struct bkey_i_backpointer *bp_k;
288         struct btree_iter bp_iter;
289         struct bkey_s_c k;
290         int ret;
291
292         /* Check for duplicates: */
293         for (i = 0; i < nr; i++) {
294                 int cmp = backpointer_cmp(bps[i], bp);
295                 if (cmp >= 0)
296                         break;
297         }
298
299         if ((i &&
300              (bps[i - 1].bucket_offset +
301               bps[i - 1].bucket_len > bp.bucket_offset)) ||
302             (i < nr &&
303              (bp.bucket_offset + bp.bucket_len > bps[i].bucket_offset))) {
304                 struct printbuf buf = PRINTBUF;
305
306                 prt_printf(&buf, "overlapping backpointer found when inserting ");
307                 bch2_backpointer_to_text(&buf, &bp);
308                 prt_newline(&buf);
309                 printbuf_indent_add(&buf, 2);
310
311                 prt_printf(&buf, "into ");
312                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
313                 prt_newline(&buf);
314
315                 prt_printf(&buf, "for ");
316                 bch2_bkey_val_to_text(&buf, c, orig_k);
317
318                 bch_err(c, "%s", buf.buf);
319                 printbuf_exit(&buf);
320                 if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
321                         bch2_inconsistent_error(c);
322                         return -EIO;
323                 }
324         }
325
326         if (nr < BCH_ALLOC_V4_NR_BACKPOINTERS_MAX) {
327                 array_insert_item(bps, nr, i, bp);
328                 SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
329                 set_alloc_v4_u64s(a);
330                 return 0;
331         }
332
333         /* Overflow: use backpointer btree */
334
335         bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
336                              bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
337                              BTREE_ITER_INTENT|
338                              BTREE_ITER_SLOTS|
339                              BTREE_ITER_WITH_UPDATES);
340         k = bch2_btree_iter_peek_slot(&bp_iter);
341         ret = bkey_err(k);
342         if (ret)
343                 goto err;
344
345         if (k.k->type) {
346                 struct printbuf buf = PRINTBUF;
347
348                 prt_printf(&buf, "existing btree backpointer key found when inserting ");
349                 bch2_backpointer_to_text(&buf, &bp);
350                 prt_newline(&buf);
351                 printbuf_indent_add(&buf, 2);
352
353                 prt_printf(&buf, "found ");
354                 bch2_bkey_val_to_text(&buf, c, k);
355                 prt_newline(&buf);
356
357                 prt_printf(&buf, "for ");
358                 bch2_bkey_val_to_text(&buf, c, orig_k);
359
360                 bch_err(c, "%s", buf.buf);
361                 printbuf_exit(&buf);
362                 if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
363                         bch2_inconsistent_error(c);
364                         ret = -EIO;
365                         goto err;
366                 }
367         }
368
369         bp_k = bch2_bkey_alloc(trans, &bp_iter, backpointer);
370         ret = PTR_ERR_OR_ZERO(bp_k);
371         if (ret)
372                 goto err;
373
374         bp_k->v = bp;
375
376         ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
377 err:
378         bch2_trans_iter_exit(trans, &bp_iter);
379         return ret;
380 }
381
382 /*
383  * Find the next backpointer >= *bp_offset:
384  */
385 int bch2_get_next_backpointer(struct btree_trans *trans,
386                               struct bpos bucket, int gen,
387                               u64 *bp_offset,
388                               struct bch_backpointer *dst,
389                               unsigned iter_flags)
390 {
391         struct bch_fs *c = trans->c;
392         struct bpos bp_pos, bp_end_pos;
393         struct btree_iter alloc_iter, bp_iter = { NULL };
394         struct bkey_s_c k;
395         struct bkey_s_c_alloc_v4 a;
396         size_t i;
397         int ret;
398
399         if (*bp_offset == U64_MAX)
400                 return 0;
401
402         bp_pos = bucket_pos_to_bp(c, bucket,
403                                   max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
404         bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
405
406         bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
407                              bucket, BTREE_ITER_CACHED);
408         k = bch2_btree_iter_peek_slot(&alloc_iter);
409         ret = bkey_err(k);
410         if (ret)
411                 goto out;
412
413         if (k.k->type != KEY_TYPE_alloc_v4)
414                 goto done;
415
416         a = bkey_s_c_to_alloc_v4(k);
417         if (gen >= 0 && a.v->gen != gen)
418                 goto done;
419
420         for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) {
421                 if (alloc_v4_backpointers_c(a.v)[i].bucket_offset < *bp_offset)
422                         continue;
423
424                 *dst = alloc_v4_backpointers_c(a.v)[i];
425                 *bp_offset = dst->bucket_offset;
426                 goto out;
427         }
428
429         for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
430                                      bp_pos, 0, k, ret) {
431                 if (bpos_ge(k.k->p, bp_end_pos))
432                         break;
433
434                 if (k.k->type != KEY_TYPE_backpointer)
435                         continue;
436
437                 *dst = *bkey_s_c_to_backpointer(k).v;
438                 *bp_offset = dst->bucket_offset + BACKPOINTER_OFFSET_MAX;
439                 goto out;
440         }
441 done:
442         *bp_offset = U64_MAX;
443 out:
444         bch2_trans_iter_exit(trans, &bp_iter);
445         bch2_trans_iter_exit(trans, &alloc_iter);
446         return ret;
447 }
448
449 static void backpointer_not_found(struct btree_trans *trans,
450                                   struct bpos bucket,
451                                   u64 bp_offset,
452                                   struct bch_backpointer bp,
453                                   struct bkey_s_c k,
454                                   const char *thing_it_points_to)
455 {
456         struct bch_fs *c = trans->c;
457         struct printbuf buf = PRINTBUF;
458
459         prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
460                    thing_it_points_to);
461         prt_printf(&buf, "bucket: ");
462         bch2_bpos_to_text(&buf, bucket);
463         prt_printf(&buf, "\n  ");
464
465         if (bp_offset >= BACKPOINTER_OFFSET_MAX) {
466                 struct bpos bp_pos =
467                         bucket_pos_to_bp(c, bucket,
468                                         bp_offset - BACKPOINTER_OFFSET_MAX);
469                 prt_printf(&buf, "backpointer pos: ");
470                 bch2_bpos_to_text(&buf, bp_pos);
471                 prt_printf(&buf, "\n  ");
472         }
473
474         bch2_backpointer_to_text(&buf, &bp);
475         prt_printf(&buf, "\n  ");
476         bch2_bkey_val_to_text(&buf, c, k);
477         if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
478                 bch_err_ratelimited(c, "%s", buf.buf);
479         else
480                 bch2_trans_inconsistent(trans, "%s", buf.buf);
481
482         printbuf_exit(&buf);
483 }
484
485 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
486                                          struct btree_iter *iter,
487                                          struct bpos bucket,
488                                          u64 bp_offset,
489                                          struct bch_backpointer bp)
490 {
491         struct bch_fs *c = trans->c;
492         struct bkey_s_c k;
493
494         bch2_trans_node_iter_init(trans, iter,
495                                   bp.btree_id,
496                                   bp.pos,
497                                   0,
498                                   min(bp.level, c->btree_roots[bp.btree_id].level),
499                                   0);
500         k = bch2_btree_iter_peek_slot(iter);
501         if (bkey_err(k)) {
502                 bch2_trans_iter_exit(trans, iter);
503                 return k;
504         }
505
506         if (bp.level == c->btree_roots[bp.btree_id].level + 1)
507                 k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
508
509         if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
510                 return k;
511
512         bch2_trans_iter_exit(trans, iter);
513
514         if (bp.level) {
515                 struct btree *b;
516
517                 /*
518                  * If a backpointer for a btree node wasn't found, it may be
519                  * because it was overwritten by a new btree node that hasn't
520                  * been written out yet - backpointer_get_node() checks for
521                  * this:
522                  */
523                 b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
524                 if (!IS_ERR_OR_NULL(b))
525                         return bkey_i_to_s_c(&b->key);
526
527                 bch2_trans_iter_exit(trans, iter);
528
529                 if (IS_ERR(b))
530                         return bkey_s_c_err(PTR_ERR(b));
531                 return bkey_s_c_null;
532         }
533
534         backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
535         return bkey_s_c_null;
536 }
537
538 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
539                                         struct btree_iter *iter,
540                                         struct bpos bucket,
541                                         u64 bp_offset,
542                                         struct bch_backpointer bp)
543 {
544         struct bch_fs *c = trans->c;
545         struct btree *b;
546
547         BUG_ON(!bp.level);
548
549         bch2_trans_node_iter_init(trans, iter,
550                                   bp.btree_id,
551                                   bp.pos,
552                                   0,
553                                   bp.level - 1,
554                                   0);
555         b = bch2_btree_iter_peek_node(iter);
556         if (IS_ERR(b))
557                 goto err;
558
559         if (b && extent_matches_bp(c, bp.btree_id, bp.level,
560                                    bkey_i_to_s_c(&b->key),
561                                    bucket, bp))
562                 return b;
563
564         if (b && btree_node_will_make_reachable(b)) {
565                 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
566         } else {
567                 backpointer_not_found(trans, bucket, bp_offset, bp,
568                                       bkey_i_to_s_c(&b->key), "btree node");
569                 b = NULL;
570         }
571 err:
572         bch2_trans_iter_exit(trans, iter);
573         return b;
574 }
575
576 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
577                                         struct bkey_s_c k)
578 {
579         struct bch_fs *c = trans->c;
580         struct btree_iter alloc_iter = { NULL };
581         struct bch_dev *ca;
582         struct bkey_s_c alloc_k;
583         struct printbuf buf = PRINTBUF;
584         int ret = 0;
585
586         if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
587                         "backpointer for mising device:\n%s",
588                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
589                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
590                 goto out;
591         }
592
593         ca = bch_dev_bkey_exists(c, k.k->p.inode);
594
595         bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
596                              bp_pos_to_bucket(c, k.k->p), 0);
597
598         alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
599         ret = bkey_err(alloc_k);
600         if (ret)
601                 goto out;
602
603         if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
604                         "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
605                         alloc_iter.pos.inode, alloc_iter.pos.offset,
606                         (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
607                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
608                 goto out;
609         }
610 out:
611 fsck_err:
612         bch2_trans_iter_exit(trans, &alloc_iter);
613         printbuf_exit(&buf);
614         return ret;
615 }
616
617 /* verify that every backpointer has a corresponding alloc key */
618 int bch2_check_btree_backpointers(struct bch_fs *c)
619 {
620         struct btree_iter iter;
621         struct bkey_s_c k;
622
623         return bch2_trans_run(c,
624                 for_each_btree_key_commit(&trans, iter,
625                         BTREE_ID_backpointers, POS_MIN, 0, k,
626                         NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
627                   bch2_check_btree_backpointer(&trans, &iter, k)));
628 }
629
630 static int check_bp_exists(struct btree_trans *trans,
631                            struct bpos bucket_pos,
632                            struct bch_backpointer bp,
633                            struct bkey_s_c orig_k,
634                            struct bpos bucket_start,
635                            struct bpos bucket_end)
636 {
637         struct bch_fs *c = trans->c;
638         struct btree_iter alloc_iter, bp_iter = { NULL };
639         struct printbuf buf = PRINTBUF;
640         struct bkey_s_c alloc_k, bp_k;
641         int ret;
642
643         if (bpos_lt(bucket_pos, bucket_start) ||
644             bpos_gt(bucket_pos, bucket_end))
645                 return 0;
646
647         bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
648         alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
649         ret = bkey_err(alloc_k);
650         if (ret)
651                 goto err;
652
653         if (alloc_k.k->type == KEY_TYPE_alloc_v4) {
654                 struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(alloc_k);
655                 const struct bch_backpointer *bps = alloc_v4_backpointers_c(a.v);
656                 unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(a.v);
657
658                 for (i = 0; i < nr; i++) {
659                         int cmp = backpointer_cmp(bps[i], bp) ?:
660                                 memcmp(&bps[i], &bp, sizeof(bp));
661                         if (!cmp)
662                                 goto out;
663                         if (cmp >= 0)
664                                 break;
665                 }
666         } else {
667                 goto missing;
668         }
669
670         bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
671                              bucket_pos_to_bp(c, bucket_pos, bp.bucket_offset),
672                              0);
673         bp_k = bch2_btree_iter_peek_slot(&bp_iter);
674         ret = bkey_err(bp_k);
675         if (ret)
676                 goto err;
677
678         if (bp_k.k->type != KEY_TYPE_backpointer ||
679             memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp)))
680                 goto missing;
681 out:
682 err:
683 fsck_err:
684         bch2_trans_iter_exit(trans, &bp_iter);
685         bch2_trans_iter_exit(trans, &alloc_iter);
686         printbuf_exit(&buf);
687         return ret;
688 missing:
689         prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
690                bch2_btree_ids[bp.btree_id], bp.level);
691         bch2_bkey_val_to_text(&buf, c, orig_k);
692         prt_printf(&buf, "\nin alloc key ");
693         bch2_bkey_val_to_text(&buf, c, alloc_k);
694
695         if (c->sb.version < bcachefs_metadata_version_backpointers ||
696             c->opts.reconstruct_alloc ||
697             fsck_err(c, "%s", buf.buf)) {
698                 struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k);
699
700                 ret   = PTR_ERR_OR_ZERO(a) ?:
701                         bch2_bucket_backpointer_add(trans, a, bp, orig_k) ?:
702                         bch2_trans_update(trans, &alloc_iter, &a->k_i, 0);
703         }
704
705         goto out;
706 }
707
708 static int check_extent_to_backpointers(struct btree_trans *trans,
709                                         struct btree_iter *iter,
710                                         struct bpos bucket_start,
711                                         struct bpos bucket_end)
712 {
713         struct bch_fs *c = trans->c;
714         struct bkey_ptrs_c ptrs;
715         const union bch_extent_entry *entry;
716         struct extent_ptr_decoded p;
717         struct bkey_s_c k;
718         int ret;
719
720         k = bch2_btree_iter_peek_all_levels(iter);
721         ret = bkey_err(k);
722         if (ret)
723                 return ret;
724         if (!k.k)
725                 return 0;
726
727         ptrs = bch2_bkey_ptrs_c(k);
728         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
729                 struct bpos bucket_pos;
730                 struct bch_backpointer bp;
731
732                 if (p.ptr.cached)
733                         continue;
734
735                 bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
736                                       k, p, &bucket_pos, &bp);
737
738                 ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
739                 if (ret)
740                         return ret;
741         }
742
743         return 0;
744 }
745
746 static int check_btree_root_to_backpointers(struct btree_trans *trans,
747                                             enum btree_id btree_id,
748                                             struct bpos bucket_start,
749                                             struct bpos bucket_end)
750 {
751         struct bch_fs *c = trans->c;
752         struct btree_iter iter;
753         struct btree *b;
754         struct bkey_s_c k;
755         struct bkey_ptrs_c ptrs;
756         struct extent_ptr_decoded p;
757         const union bch_extent_entry *entry;
758         int ret;
759
760         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
761                                   c->btree_roots[btree_id].level, 0);
762         b = bch2_btree_iter_peek_node(&iter);
763         ret = PTR_ERR_OR_ZERO(b);
764         if (ret)
765                 goto err;
766
767         BUG_ON(b != btree_node_root(c, b));
768
769         k = bkey_i_to_s_c(&b->key);
770         ptrs = bch2_bkey_ptrs_c(k);
771         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
772                 struct bpos bucket_pos;
773                 struct bch_backpointer bp;
774
775                 if (p.ptr.cached)
776                         continue;
777
778                 bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
779                                       k, p, &bucket_pos, &bp);
780
781                 ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
782                 if (ret)
783                         goto err;
784         }
785 err:
786         bch2_trans_iter_exit(trans, &iter);
787         return ret;
788 }
789
790 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
791 {
792         return (struct bbpos) {
793                 .btree  = bp.btree_id,
794                 .pos    = bp.pos,
795         };
796 }
797
798 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
799 {
800         struct sysinfo i;
801         u64 mem_bytes;
802
803         si_meminfo(&i);
804         mem_bytes = i.totalram * i.mem_unit;
805         return (mem_bytes >> 1) / btree_bytes(c);
806 }
807
808 int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
809                                  unsigned btree_leaf_mask,
810                                  unsigned btree_interior_mask,
811                                  struct bbpos start, struct bbpos *end)
812 {
813         struct btree_iter iter;
814         struct bkey_s_c k;
815         size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
816         enum btree_id btree;
817         int ret = 0;
818
819         for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
820                 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
821
822                 if (!((1U << btree) & btree_leaf_mask) &&
823                     !((1U << btree) & btree_interior_mask))
824                         continue;
825
826                 bch2_trans_node_iter_init(trans, &iter, btree,
827                                           btree == start.btree ? start.pos : POS_MIN,
828                                           0, depth, 0);
829                 /*
830                  * for_each_btree_key_contineu() doesn't check the return value
831                  * from bch2_btree_iter_advance(), which is needed when
832                  * iterating over interior nodes where we'll see keys at
833                  * SPOS_MAX:
834                  */
835                 do {
836                         k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
837                         ret = bkey_err(k);
838                         if (!k.k || ret)
839                                 break;
840
841                         --btree_nodes;
842                         if (!btree_nodes) {
843                                 *end = BBPOS(btree, k.k->p);
844                                 bch2_trans_iter_exit(trans, &iter);
845                                 return 0;
846                         }
847                 } while (bch2_btree_iter_advance(&iter));
848                 bch2_trans_iter_exit(trans, &iter);
849         }
850
851         *end = BBPOS_MAX;
852         return ret;
853 }
854
855 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
856                                                    struct bpos bucket_start,
857                                                    struct bpos bucket_end)
858 {
859         struct btree_iter iter;
860         enum btree_id btree_id;
861         int ret = 0;
862
863         for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
864                 unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
865
866                 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
867                                           depth,
868                                           BTREE_ITER_ALL_LEVELS|
869                                           BTREE_ITER_PREFETCH);
870
871                 do {
872                         ret = commit_do(trans, NULL, NULL,
873                                         BTREE_INSERT_LAZY_RW|
874                                         BTREE_INSERT_NOFAIL,
875                                         check_extent_to_backpointers(trans, &iter,
876                                                                 bucket_start, bucket_end));
877                         if (ret)
878                                 break;
879                 } while (!bch2_btree_iter_advance(&iter));
880
881                 bch2_trans_iter_exit(trans, &iter);
882
883                 if (ret)
884                         break;
885
886                 ret = commit_do(trans, NULL, NULL,
887                                 BTREE_INSERT_LAZY_RW|
888                                 BTREE_INSERT_NOFAIL,
889                                 check_btree_root_to_backpointers(trans, btree_id,
890                                                         bucket_start, bucket_end));
891                 if (ret)
892                         break;
893         }
894         return ret;
895 }
896
897 static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
898                                          struct bpos bucket)
899 {
900         return bch2_dev_exists2(c, bucket.inode)
901                 ? bucket_pos_to_bp(c, bucket, 0)
902                 : bucket;
903 }
904
905 int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
906                                  struct bpos start, struct bpos *end)
907 {
908         struct btree_iter alloc_iter;
909         struct btree_iter bp_iter;
910         struct bkey_s_c alloc_k, bp_k;
911         size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
912         bool alloc_end = false, bp_end = false;
913         int ret = 0;
914
915         bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
916                                   start, 0, 1, 0);
917         bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
918                                   bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
919         while (1) {
920                 alloc_k = !alloc_end
921                         ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
922                         : bkey_s_c_null;
923                 bp_k = !bp_end
924                         ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
925                         : bkey_s_c_null;
926
927                 ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
928                 if ((!alloc_k.k && !bp_k.k) || ret) {
929                         *end = SPOS_MAX;
930                         break;
931                 }
932
933                 --btree_nodes;
934                 if (!btree_nodes) {
935                         *end = alloc_k.k->p;
936                         break;
937                 }
938
939                 if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
940                     bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
941                         if (!bch2_btree_iter_advance(&alloc_iter))
942                                 alloc_end = true;
943                 } else {
944                         if (!bch2_btree_iter_advance(&bp_iter))
945                                 bp_end = true;
946                 }
947         }
948         bch2_trans_iter_exit(trans, &bp_iter);
949         bch2_trans_iter_exit(trans, &alloc_iter);
950         return ret;
951 }
952
953 int bch2_check_extents_to_backpointers(struct bch_fs *c)
954 {
955         struct btree_trans trans;
956         struct bpos start = POS_MIN, end;
957         int ret;
958
959         bch2_trans_init(&trans, c, 0, 0);
960         while (1) {
961                 ret = bch2_get_alloc_in_memory_pos(&trans, start, &end);
962                 if (ret)
963                         break;
964
965                 if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
966                         bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
967                                     __func__, btree_nodes_fit_in_ram(c));
968
969                 if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
970                         struct printbuf buf = PRINTBUF;
971
972                         prt_str(&buf, "check_extents_to_backpointers(): ");
973                         bch2_bpos_to_text(&buf, start);
974                         prt_str(&buf, "-");
975                         bch2_bpos_to_text(&buf, end);
976
977                         bch_verbose(c, "%s", buf.buf);
978                         printbuf_exit(&buf);
979                 }
980
981                 ret = bch2_check_extents_to_backpointers_pass(&trans, start, end);
982                 if (ret || bpos_eq(end, SPOS_MAX))
983                         break;
984
985                 start = bpos_successor(end);
986         }
987         bch2_trans_exit(&trans);
988
989         return ret;
990 }
991
992 static int check_one_backpointer(struct btree_trans *trans,
993                                  struct bpos bucket,
994                                  u64 *bp_offset,
995                                  struct bbpos start,
996                                  struct bbpos end)
997 {
998         struct btree_iter iter;
999         struct bch_backpointer bp;
1000         struct bbpos pos;
1001         struct bkey_s_c k;
1002         struct printbuf buf = PRINTBUF;
1003         int ret;
1004
1005         ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp, 0);
1006         if (ret || *bp_offset == U64_MAX)
1007                 return ret;
1008
1009         pos = bp_to_bbpos(bp);
1010         if (bbpos_cmp(pos, start) < 0 ||
1011             bbpos_cmp(pos, end) > 0)
1012                 return 0;
1013
1014         k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
1015         ret = bkey_err(k);
1016         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
1017                 return 0;
1018         if (ret)
1019                 return ret;
1020
1021         if (fsck_err_on(!k.k, trans->c,
1022                         "%s backpointer points to missing extent\n%s",
1023                         *bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree",
1024                         (bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
1025                 ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp);
1026                 if (ret == -ENOENT)
1027                         bch_err(trans->c, "backpointer at %llu not found", *bp_offset);
1028         }
1029
1030         bch2_trans_iter_exit(trans, &iter);
1031 fsck_err:
1032         printbuf_exit(&buf);
1033         return ret;
1034 }
1035
1036 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
1037                                                    struct bbpos start,
1038                                                    struct bbpos end)
1039 {
1040         struct btree_iter iter;
1041         struct bkey_s_c k;
1042         int ret = 0;
1043
1044         for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
1045                            BTREE_ITER_PREFETCH, k, ret) {
1046                 u64 bp_offset = 0;
1047
1048                 while (!(ret = commit_do(trans, NULL, NULL,
1049                                          BTREE_INSERT_LAZY_RW|
1050                                          BTREE_INSERT_NOFAIL,
1051                                 check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) &&
1052                        bp_offset < U64_MAX)
1053                         bp_offset++;
1054
1055                 if (ret)
1056                         break;
1057         }
1058         bch2_trans_iter_exit(trans, &iter);
1059         return ret < 0 ? ret : 0;
1060 }
1061
1062 int bch2_check_backpointers_to_extents(struct bch_fs *c)
1063 {
1064         struct btree_trans trans;
1065         struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
1066         int ret;
1067
1068         bch2_trans_init(&trans, c, 0, 0);
1069         while (1) {
1070                 ret = bch2_get_btree_in_memory_pos(&trans,
1071                                                    (1U << BTREE_ID_extents)|
1072                                                    (1U << BTREE_ID_reflink),
1073                                                    ~0,
1074                                                    start, &end);
1075                 if (ret)
1076                         break;
1077
1078                 if (!bbpos_cmp(start, BBPOS_MIN) &&
1079                     bbpos_cmp(end, BBPOS_MAX))
1080                         bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
1081                                     __func__, btree_nodes_fit_in_ram(c));
1082
1083                 if (bbpos_cmp(start, BBPOS_MIN) ||
1084                     bbpos_cmp(end, BBPOS_MAX)) {
1085                         struct printbuf buf = PRINTBUF;
1086
1087                         prt_str(&buf, "check_backpointers_to_extents(): ");
1088                         bch2_bbpos_to_text(&buf, start);
1089                         prt_str(&buf, "-");
1090                         bch2_bbpos_to_text(&buf, end);
1091
1092                         bch_verbose(c, "%s", buf.buf);
1093                         printbuf_exit(&buf);
1094                 }
1095
1096                 ret = bch2_check_backpointers_to_extents_pass(&trans, start, end);
1097                 if (ret || !bbpos_cmp(end, BBPOS_MAX))
1098                         break;
1099
1100                 start = bbpos_successor(end);
1101         }
1102         bch2_trans_exit(&trans);
1103
1104         return ret;
1105 }