]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fsck.c
Update bcachefs sources to d9bb516b2d bcachefs: Move extent overwrite handling out...
[bcachefs-tools-debian] / libbcachefs / fsck.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "btree_update.h"
5 #include "dirent.h"
6 #include "error.h"
7 #include "fs-common.h"
8 #include "fsck.h"
9 #include "inode.h"
10 #include "keylist.h"
11 #include "super.h"
12 #include "xattr.h"
13
14 #include <linux/dcache.h> /* struct qstr */
15 #include <linux/generic-radix-tree.h>
16
17 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
18
19 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
20 {
21         struct btree_iter *iter;
22         struct bkey_s_c k;
23         u64 sectors = 0;
24         int ret;
25
26         for_each_btree_key(trans, iter, BTREE_ID_EXTENTS,
27                            POS(inum, 0), 0, k, ret) {
28                 if (k.k->p.inode != inum)
29                         break;
30
31                 if (bkey_extent_is_allocation(k.k))
32                         sectors += k.k->size;
33         }
34
35         bch2_trans_iter_free(trans, iter);
36
37         return ret ?: sectors;
38 }
39
40 static int __remove_dirent(struct btree_trans *trans,
41                            struct bkey_s_c_dirent dirent)
42 {
43         struct bch_fs *c = trans->c;
44         struct qstr name;
45         struct bch_inode_unpacked dir_inode;
46         struct bch_hash_info dir_hash_info;
47         u64 dir_inum = dirent.k->p.inode;
48         int ret;
49         char *buf;
50
51         name.len = bch2_dirent_name_bytes(dirent);
52         buf = bch2_trans_kmalloc(trans, name.len + 1);
53         if (IS_ERR(buf))
54                 return PTR_ERR(buf);
55
56         memcpy(buf, dirent.v->d_name, name.len);
57         buf[name.len] = '\0';
58         name.name = buf;
59
60         ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode);
61         if (ret && ret != -EINTR)
62                 bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
63         if (ret)
64                 return ret;
65
66         dir_hash_info = bch2_hash_info_init(c, &dir_inode);
67
68         ret = bch2_hash_delete(trans, bch2_dirent_hash_desc,
69                                &dir_hash_info, dir_inum, &name);
70         if (ret && ret != -EINTR)
71                 bch_err(c, "remove_dirent: err %i deleting dirent", ret);
72         if (ret)
73                 return ret;
74
75         return 0;
76 }
77
78 static int remove_dirent(struct btree_trans *trans,
79                          struct bkey_s_c_dirent dirent)
80 {
81         return __bch2_trans_do(trans, NULL, NULL,
82                                BTREE_INSERT_NOFAIL|
83                                BTREE_INSERT_LAZY_RW,
84                                __remove_dirent(trans, dirent));
85 }
86
87 static int reattach_inode(struct bch_fs *c,
88                           struct bch_inode_unpacked *lostfound_inode,
89                           u64 inum)
90 {
91         struct bch_inode_unpacked dir_u, inode_u;
92         char name_buf[20];
93         struct qstr name;
94         int ret;
95
96         snprintf(name_buf, sizeof(name_buf), "%llu", inum);
97         name = (struct qstr) QSTR(name_buf);
98
99         ret = bch2_trans_do(c, NULL, NULL,
100                             BTREE_INSERT_LAZY_RW,
101                 bch2_link_trans(&trans, lostfound_inode->bi_inum,
102                                 inum, &dir_u, &inode_u, &name));
103         if (ret)
104                 bch_err(c, "error %i reattaching inode %llu", ret, inum);
105
106         return ret;
107 }
108
109 struct inode_walker {
110         bool                    first_this_inode;
111         bool                    have_inode;
112         u64                     cur_inum;
113         struct bch_inode_unpacked inode;
114 };
115
116 static struct inode_walker inode_walker_init(void)
117 {
118         return (struct inode_walker) {
119                 .cur_inum       = -1,
120                 .have_inode     = false,
121         };
122 }
123
124 static int walk_inode(struct btree_trans *trans,
125                       struct inode_walker *w, u64 inum)
126 {
127         if (inum != w->cur_inum) {
128                 int ret = bch2_inode_find_by_inum_trans(trans, inum,
129                                                         &w->inode);
130
131                 if (ret && ret != -ENOENT)
132                         return ret;
133
134                 w->have_inode   = !ret;
135                 w->cur_inum     = inum;
136                 w->first_this_inode = true;
137         } else {
138                 w->first_this_inode = false;
139         }
140
141         return 0;
142 }
143
144 struct hash_check {
145         struct bch_hash_info    info;
146
147         /* start of current chain of hash collisions: */
148         struct btree_iter       *chain;
149
150         /* next offset in current chain of hash collisions: */
151         u64                     chain_end;
152 };
153
154 static void hash_check_init(struct hash_check *h)
155 {
156         h->chain = NULL;
157         h->chain_end = 0;
158 }
159
160 static void hash_stop_chain(struct btree_trans *trans,
161                             struct hash_check *h)
162 {
163         if (h->chain)
164                 bch2_trans_iter_free(trans, h->chain);
165         h->chain = NULL;
166 }
167
168 static void hash_check_set_inode(struct btree_trans *trans,
169                                  struct hash_check *h,
170                                  const struct bch_inode_unpacked *bi)
171 {
172         h->info = bch2_hash_info_init(trans->c, bi);
173         hash_stop_chain(trans, h);
174 }
175
176 static int hash_redo_key(const struct bch_hash_desc desc,
177                          struct btree_trans *trans, struct hash_check *h,
178                          struct btree_iter *k_iter, struct bkey_s_c k,
179                          u64 hashed)
180 {
181         struct bkey_i delete;
182         struct bkey_i *tmp;
183
184         tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
185         if (IS_ERR(tmp))
186                 return PTR_ERR(tmp);
187
188         bkey_reassemble(tmp, k);
189
190         bkey_init(&delete.k);
191         delete.k.p = k_iter->pos;
192         bch2_trans_update(trans, k_iter, &delete, 0);
193
194         return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
195                              tmp, BCH_HASH_SET_MUST_CREATE);
196 }
197
198 static int fsck_hash_delete_at(struct btree_trans *trans,
199                                const struct bch_hash_desc desc,
200                                struct bch_hash_info *info,
201                                struct btree_iter *iter)
202 {
203         int ret;
204 retry:
205         ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
206                 bch2_trans_commit(trans, NULL, NULL,
207                                   BTREE_INSERT_NOFAIL|
208                                   BTREE_INSERT_LAZY_RW);
209         if (ret == -EINTR) {
210                 ret = bch2_btree_iter_traverse(iter);
211                 if (!ret)
212                         goto retry;
213         }
214
215         return ret;
216 }
217
218 static int hash_check_duplicates(struct btree_trans *trans,
219                         const struct bch_hash_desc desc, struct hash_check *h,
220                         struct btree_iter *k_iter, struct bkey_s_c k)
221 {
222         struct bch_fs *c = trans->c;
223         struct btree_iter *iter;
224         struct bkey_s_c k2;
225         char buf[200];
226         int ret = 0;
227
228         if (!bkey_cmp(h->chain->pos, k_iter->pos))
229                 return 0;
230
231         iter = bch2_trans_copy_iter(trans, h->chain);
232         BUG_ON(IS_ERR(iter));
233
234         for_each_btree_key_continue(iter, 0, k2, ret) {
235                 if (bkey_cmp(k2.k->p, k.k->p) >= 0)
236                         break;
237
238                 if (fsck_err_on(k2.k->type == desc.key_type &&
239                                 !desc.cmp_bkey(k, k2), c,
240                                 "duplicate hash table keys:\n%s",
241                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
242                                                        k), buf))) {
243                         ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
244                         if (ret)
245                                 return ret;
246                         ret = 1;
247                         break;
248                 }
249         }
250 fsck_err:
251         bch2_trans_iter_free(trans, iter);
252         return ret;
253 }
254
255 static void hash_set_chain_start(struct btree_trans *trans,
256                         const struct bch_hash_desc desc,
257                         struct hash_check *h,
258                         struct btree_iter *k_iter, struct bkey_s_c k)
259 {
260         bool hole = (k.k->type != KEY_TYPE_whiteout &&
261                      k.k->type != desc.key_type);
262
263         if (hole || k.k->p.offset > h->chain_end + 1)
264                 hash_stop_chain(trans, h);
265
266         if (!hole) {
267                 if (!h->chain) {
268                         h->chain = bch2_trans_copy_iter(trans, k_iter);
269                         BUG_ON(IS_ERR(h->chain));
270                 }
271
272                 h->chain_end = k.k->p.offset;
273         }
274 }
275
276 static bool key_has_correct_hash(struct btree_trans *trans,
277                         const struct bch_hash_desc desc,
278                         struct hash_check *h,
279                         struct btree_iter *k_iter, struct bkey_s_c k)
280 {
281         u64 hash;
282
283         hash_set_chain_start(trans, desc, h, k_iter, k);
284
285         if (k.k->type != desc.key_type)
286                 return true;
287
288         hash = desc.hash_bkey(&h->info, k);
289
290         return hash >= h->chain->pos.offset &&
291                 hash <= k.k->p.offset;
292 }
293
294 static int hash_check_key(struct btree_trans *trans,
295                         const struct bch_hash_desc desc, struct hash_check *h,
296                         struct btree_iter *k_iter, struct bkey_s_c k)
297 {
298         struct bch_fs *c = trans->c;
299         char buf[200];
300         u64 hashed;
301         int ret = 0;
302
303         hash_set_chain_start(trans, desc, h, k_iter, k);
304
305         if (k.k->type != desc.key_type)
306                 return 0;
307
308         hashed = desc.hash_bkey(&h->info, k);
309
310         if (fsck_err_on(hashed < h->chain->pos.offset ||
311                         hashed > k.k->p.offset, c,
312                         "hash table key at wrong offset: btree %u, %llu, "
313                         "hashed to %llu chain starts at %llu\n%s",
314                         desc.btree_id, k.k->p.offset,
315                         hashed, h->chain->pos.offset,
316                         (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
317                 ret = __bch2_trans_do(trans, NULL, NULL,
318                                       BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
319                         hash_redo_key(desc, trans, h, k_iter, k, hashed));
320                 if (ret) {
321                         bch_err(c, "hash_redo_key err %i", ret);
322                         return ret;
323                 }
324                 return 1;
325         }
326
327         ret = hash_check_duplicates(trans, desc, h, k_iter, k);
328 fsck_err:
329         return ret;
330 }
331
332 static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
333                              struct btree_iter *iter, struct bkey_s_c *k)
334 {
335         struct bch_fs *c = trans->c;
336         struct bkey_i_dirent *d = NULL;
337         int ret = -EINVAL;
338         char buf[200];
339         unsigned len;
340         u64 hash;
341
342         if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
343                 return 0;
344
345         len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
346         BUG_ON(!len);
347
348         memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
349         buf[len] = '\0';
350
351         d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
352         if (!d) {
353                 bch_err(c, "memory allocation failure");
354                 return -ENOMEM;
355         }
356
357         bkey_reassemble(&d->k_i, *k);
358
359         do {
360                 --len;
361                 if (!len)
362                         goto err_redo;
363
364                 d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
365
366                 BUG_ON(bkey_val_bytes(&d->k) <
367                        offsetof(struct bch_dirent, d_name) + len);
368
369                 memset(d->v.d_name + len, 0,
370                        bkey_val_bytes(&d->k) -
371                        offsetof(struct bch_dirent, d_name) - len);
372
373                 hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
374                                                 bkey_i_to_s_c(&d->k_i));
375         } while (hash < h->chain->pos.offset ||
376                  hash > k->k->p.offset);
377
378         if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
379                      buf, strlen(buf), d->v.d_name, len)) {
380                 ret = __bch2_trans_do(trans, NULL, NULL,
381                                       BTREE_INSERT_NOFAIL|
382                                       BTREE_INSERT_LAZY_RW,
383                         (bch2_trans_update(trans, iter, &d->k_i, 0), 0));
384                 if (ret)
385                         goto err;
386
387                 *k = bch2_btree_iter_peek(iter);
388
389                 BUG_ON(k->k->type != KEY_TYPE_dirent);
390         }
391 err:
392 fsck_err:
393         kfree(d);
394         return ret;
395 err_redo:
396         hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
397
398         if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
399                      "hash table key at wrong offset: btree %u, offset %llu, "
400                      "hashed to %llu chain starts at %llu\n%s",
401                      buf, strlen(buf), BTREE_ID_DIRENTS,
402                      k->k->p.offset, hash, h->chain->pos.offset,
403                      (bch2_bkey_val_to_text(&PBUF(buf), c,
404                                             *k), buf))) {
405                 ret = __bch2_trans_do(trans, NULL, NULL,
406                                       BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
407                         hash_redo_key(bch2_dirent_hash_desc, trans,
408                                       h, iter, *k, hash));
409                 if (ret)
410                         bch_err(c, "hash_redo_key err %i", ret);
411                 else
412                         ret = 1;
413         }
414
415         goto err;
416 }
417
418 static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
419 {
420         return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
421                         POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
422                         POS(inode_nr + 1, 0), NULL);
423 }
424
425 static int bch2_fix_overlapping_extent(struct btree_trans *trans,
426                                        struct btree_iter *iter,
427                                        struct bkey_s_c k, struct bpos cut_at)
428 {
429         struct btree_iter *u_iter;
430         struct bkey_i *u;
431         int ret;
432
433         u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
434         ret = PTR_ERR_OR_ZERO(u);
435         if (ret)
436                 return ret;
437
438         bkey_reassemble(u, k);
439         bch2_cut_front(cut_at, u);
440
441         u_iter = bch2_trans_copy_iter(trans, iter);
442         ret = PTR_ERR_OR_ZERO(u_iter);
443         if (ret)
444                 return ret;
445
446         /*
447          * We don't want to go through the
448          * extent_handle_overwrites path:
449          */
450         __bch2_btree_iter_set_pos(u_iter, u->k.p, false);
451
452         /*
453          * XXX: this is going to leave disk space
454          * accounting slightly wrong
455          */
456         ret = bch2_trans_update(trans, u_iter, u, 0);
457         bch2_trans_iter_put(trans, u_iter);
458         return ret;
459 }
460
461 /*
462  * Walk extents: verify that extents have a corresponding S_ISREG inode, and
463  * that i_size an i_sectors are consistent
464  */
465 noinline_for_stack
466 static int check_extents(struct bch_fs *c)
467 {
468         struct inode_walker w = inode_walker_init();
469         struct btree_trans trans;
470         struct btree_iter *iter;
471         struct bkey_s_c k;
472         struct bkey prev = KEY(0, 0, 0);
473         u64 i_sectors;
474         int ret = 0;
475
476         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
477
478         bch_verbose(c, "checking extents");
479
480         iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
481                                    POS(BCACHEFS_ROOT_INO, 0), 0);
482 retry:
483         for_each_btree_key_continue(iter, 0, k, ret) {
484                 if (bkey_cmp(prev.p, bkey_start_pos(k.k)) > 0) {
485                         char buf1[100];
486                         char buf2[100];
487
488                         bch2_bkey_to_text(&PBUF(buf1), &prev);
489                         bch2_bkey_to_text(&PBUF(buf2), k.k);
490
491                         if (fsck_err(c, "overlapping extents: %s, %s", buf1, buf2)) {
492                                 ret = __bch2_trans_do(&trans, NULL, NULL,
493                                                       BTREE_INSERT_NOFAIL|
494                                                       BTREE_INSERT_LAZY_RW,
495                                                 bch2_fix_overlapping_extent(&trans,
496                                                                 iter, k, prev.p));
497                                 if (ret)
498                                         goto err;
499                         }
500                 }
501                 prev = *k.k;
502
503                 ret = walk_inode(&trans, &w, k.k->p.inode);
504                 if (ret)
505                         break;
506
507                 if (fsck_err_on(!w.have_inode, c,
508                         "extent type %u for missing inode %llu",
509                         k.k->type, k.k->p.inode) ||
510                     fsck_err_on(w.have_inode &&
511                         !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
512                         "extent type %u for non regular file, inode %llu mode %o",
513                         k.k->type, k.k->p.inode, w.inode.bi_mode)) {
514                         bch2_trans_unlock(&trans);
515
516                         ret = bch2_inode_truncate(c, k.k->p.inode, 0);
517                         if (ret)
518                                 goto err;
519                         continue;
520                 }
521
522                 if (fsck_err_on(w.first_this_inode &&
523                         w.have_inode &&
524                         !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
525                         w.inode.bi_sectors !=
526                         (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
527                         c, "i_sectors wrong: got %llu, should be %llu",
528                         w.inode.bi_sectors, i_sectors)) {
529                         struct bkey_inode_buf p;
530
531                         w.inode.bi_sectors = i_sectors;
532
533                         bch2_trans_unlock(&trans);
534
535                         bch2_inode_pack(&p, &w.inode);
536
537                         ret = bch2_btree_insert(c, BTREE_ID_INODES,
538                                                 &p.inode.k_i, NULL, NULL,
539                                                 BTREE_INSERT_NOFAIL|
540                                                 BTREE_INSERT_LAZY_RW);
541                         if (ret) {
542                                 bch_err(c, "error in fsck: error %i updating inode", ret);
543                                 goto err;
544                         }
545
546                         /* revalidate iterator: */
547                         k = bch2_btree_iter_peek(iter);
548                 }
549
550                 if (fsck_err_on(w.have_inode &&
551                         !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
552                         k.k->type != KEY_TYPE_reservation &&
553                         k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
554                         "extent type %u offset %llu past end of inode %llu, i_size %llu",
555                         k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
556                         bch2_trans_unlock(&trans);
557
558                         ret = bch2_inode_truncate(c, k.k->p.inode,
559                                                   w.inode.bi_size);
560                         if (ret)
561                                 goto err;
562                         continue;
563                 }
564         }
565 err:
566 fsck_err:
567         if (ret == -EINTR)
568                 goto retry;
569         return bch2_trans_exit(&trans) ?: ret;
570 }
571
572 /*
573  * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
574  * validate d_type
575  */
576 noinline_for_stack
577 static int check_dirents(struct bch_fs *c)
578 {
579         struct inode_walker w = inode_walker_init();
580         struct hash_check h;
581         struct btree_trans trans;
582         struct btree_iter *iter;
583         struct bkey_s_c k;
584         unsigned name_len;
585         char buf[200];
586         int ret = 0;
587
588         bch_verbose(c, "checking dirents");
589
590         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
591
592         hash_check_init(&h);
593
594         iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
595                                    POS(BCACHEFS_ROOT_INO, 0), 0);
596 retry:
597         for_each_btree_key_continue(iter, 0, k, ret) {
598                 struct bkey_s_c_dirent d;
599                 struct bch_inode_unpacked target;
600                 bool have_target;
601                 u64 d_inum;
602
603                 ret = walk_inode(&trans, &w, k.k->p.inode);
604                 if (ret)
605                         break;
606
607                 if (fsck_err_on(!w.have_inode, c,
608                                 "dirent in nonexisting directory:\n%s",
609                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
610                                                        k), buf)) ||
611                     fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
612                                 "dirent in non directory inode type %u:\n%s",
613                                 mode_to_type(w.inode.bi_mode),
614                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
615                                                        k), buf))) {
616                         ret = bch2_btree_delete_at(&trans, iter, 0);
617                         if (ret)
618                                 goto err;
619                         continue;
620                 }
621
622                 if (w.first_this_inode && w.have_inode)
623                         hash_check_set_inode(&trans, &h, &w.inode);
624
625                 ret = check_dirent_hash(&trans, &h, iter, &k);
626                 if (ret > 0) {
627                         ret = 0;
628                         continue;
629                 }
630                 if (ret)
631                         goto fsck_err;
632
633                 if (ret)
634                         goto fsck_err;
635
636                 if (k.k->type != KEY_TYPE_dirent)
637                         continue;
638
639                 d = bkey_s_c_to_dirent(k);
640                 d_inum = le64_to_cpu(d.v->d_inum);
641
642                 name_len = bch2_dirent_name_bytes(d);
643
644                 if (fsck_err_on(!name_len, c, "empty dirent") ||
645                     fsck_err_on(name_len == 1 &&
646                                 !memcmp(d.v->d_name, ".", 1), c,
647                                 ". dirent") ||
648                     fsck_err_on(name_len == 2 &&
649                                 !memcmp(d.v->d_name, "..", 2), c,
650                                 ".. dirent") ||
651                     fsck_err_on(name_len == 2 &&
652                                 !memcmp(d.v->d_name, "..", 2), c,
653                                 ".. dirent") ||
654                     fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
655                                 "dirent name has invalid chars")) {
656                         ret = remove_dirent(&trans, d);
657                         if (ret)
658                                 goto err;
659                         continue;
660                 }
661
662                 if (fsck_err_on(d_inum == d.k->p.inode, c,
663                                 "dirent points to own directory:\n%s",
664                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
665                                                        k), buf))) {
666                         ret = remove_dirent(&trans, d);
667                         if (ret)
668                                 goto err;
669                         continue;
670                 }
671
672                 ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
673                 if (ret && ret != -ENOENT)
674                         break;
675
676                 have_target = !ret;
677                 ret = 0;
678
679                 if (fsck_err_on(!have_target, c,
680                                 "dirent points to missing inode:\n%s",
681                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
682                                                        k), buf))) {
683                         ret = remove_dirent(&trans, d);
684                         if (ret)
685                                 goto err;
686                         continue;
687                 }
688
689                 if (fsck_err_on(have_target &&
690                                 d.v->d_type !=
691                                 mode_to_type(target.bi_mode), c,
692                                 "incorrect d_type: should be %u:\n%s",
693                                 mode_to_type(target.bi_mode),
694                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
695                                                        k), buf))) {
696                         struct bkey_i_dirent *n;
697
698                         n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
699                         if (!n) {
700                                 ret = -ENOMEM;
701                                 goto err;
702                         }
703
704                         bkey_reassemble(&n->k_i, d.s_c);
705                         n->v.d_type = mode_to_type(target.bi_mode);
706
707                         ret = __bch2_trans_do(&trans, NULL, NULL,
708                                               BTREE_INSERT_NOFAIL|
709                                               BTREE_INSERT_LAZY_RW,
710                                 (bch2_trans_update(&trans, iter, &n->k_i, 0), 0));
711                         kfree(n);
712                         if (ret)
713                                 goto err;
714
715                 }
716         }
717
718         hash_stop_chain(&trans, &h);
719 err:
720 fsck_err:
721         if (ret == -EINTR)
722                 goto retry;
723
724         return bch2_trans_exit(&trans) ?: ret;
725 }
726
727 /*
728  * Walk xattrs: verify that they all have a corresponding inode
729  */
730 noinline_for_stack
731 static int check_xattrs(struct bch_fs *c)
732 {
733         struct inode_walker w = inode_walker_init();
734         struct hash_check h;
735         struct btree_trans trans;
736         struct btree_iter *iter;
737         struct bkey_s_c k;
738         int ret = 0;
739
740         bch_verbose(c, "checking xattrs");
741
742         hash_check_init(&h);
743
744         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
745
746         iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
747                                    POS(BCACHEFS_ROOT_INO, 0), 0);
748 retry:
749         for_each_btree_key_continue(iter, 0, k, ret) {
750                 ret = walk_inode(&trans, &w, k.k->p.inode);
751                 if (ret)
752                         break;
753
754                 if (fsck_err_on(!w.have_inode, c,
755                                 "xattr for missing inode %llu",
756                                 k.k->p.inode)) {
757                         ret = bch2_btree_delete_at(&trans, iter, 0);
758                         if (ret)
759                                 goto err;
760                         continue;
761                 }
762
763                 if (w.first_this_inode && w.have_inode)
764                         hash_check_set_inode(&trans, &h, &w.inode);
765
766                 ret = hash_check_key(&trans, bch2_xattr_hash_desc,
767                                      &h, iter, k);
768                 if (ret)
769                         goto fsck_err;
770         }
771 err:
772 fsck_err:
773         if (ret == -EINTR)
774                 goto retry;
775         return bch2_trans_exit(&trans) ?: ret;
776 }
777
778 /* Get root directory, create if it doesn't exist: */
779 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
780 {
781         struct bkey_inode_buf packed;
782         int ret;
783
784         bch_verbose(c, "checking root directory");
785
786         ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
787         if (ret && ret != -ENOENT)
788                 return ret;
789
790         if (fsck_err_on(ret, c, "root directory missing"))
791                 goto create_root;
792
793         if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
794                         "root inode not a directory"))
795                 goto create_root;
796
797         return 0;
798 fsck_err:
799         return ret;
800 create_root:
801         bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
802                         0, NULL);
803         root_inode->bi_inum = BCACHEFS_ROOT_INO;
804
805         bch2_inode_pack(&packed, root_inode);
806
807         return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
808                                  NULL, NULL,
809                                  BTREE_INSERT_NOFAIL|
810                                  BTREE_INSERT_LAZY_RW);
811 }
812
813 /* Get lost+found, create if it doesn't exist: */
814 static int check_lostfound(struct bch_fs *c,
815                            struct bch_inode_unpacked *root_inode,
816                            struct bch_inode_unpacked *lostfound_inode)
817 {
818         struct qstr lostfound = QSTR("lost+found");
819         struct bch_hash_info root_hash_info =
820                 bch2_hash_info_init(c, root_inode);
821         u64 inum;
822         int ret;
823
824         bch_verbose(c, "checking lost+found");
825
826         inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
827                                  &lostfound);
828         if (!inum) {
829                 bch_notice(c, "creating lost+found");
830                 goto create_lostfound;
831         }
832
833         ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
834         if (ret && ret != -ENOENT)
835                 return ret;
836
837         if (fsck_err_on(ret, c, "lost+found missing"))
838                 goto create_lostfound;
839
840         if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
841                         "lost+found inode not a directory"))
842                 goto create_lostfound;
843
844         return 0;
845 fsck_err:
846         return ret;
847 create_lostfound:
848         bch2_inode_init_early(c, lostfound_inode);
849
850         ret = bch2_trans_do(c, NULL, NULL,
851                             BTREE_INSERT_NOFAIL|
852                             BTREE_INSERT_LAZY_RW,
853                 bch2_create_trans(&trans,
854                                   BCACHEFS_ROOT_INO, root_inode,
855                                   lostfound_inode, &lostfound,
856                                   0, 0, S_IFDIR|0700, 0, NULL, NULL));
857         if (ret)
858                 bch_err(c, "error creating lost+found: %i", ret);
859
860         return ret;
861 }
862
863 struct inode_bitmap {
864         unsigned long   *bits;
865         size_t          size;
866 };
867
868 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
869 {
870         return nr < b->size ? test_bit(nr, b->bits) : false;
871 }
872
873 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
874 {
875         if (nr >= b->size) {
876                 size_t new_size = max_t(size_t, max_t(size_t,
877                                         PAGE_SIZE * 8,
878                                         b->size * 2),
879                                         nr + 1);
880                 void *n;
881
882                 new_size = roundup_pow_of_two(new_size);
883                 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
884                 if (!n) {
885                         return -ENOMEM;
886                 }
887
888                 b->bits = n;
889                 b->size = new_size;
890         }
891
892         __set_bit(nr, b->bits);
893         return 0;
894 }
895
896 struct pathbuf {
897         size_t          nr;
898         size_t          size;
899
900         struct pathbuf_entry {
901                 u64     inum;
902                 u64     offset;
903         }               *entries;
904 };
905
906 static int path_down(struct pathbuf *p, u64 inum)
907 {
908         if (p->nr == p->size) {
909                 size_t new_size = max_t(size_t, 256UL, p->size * 2);
910                 void *n = krealloc(p->entries,
911                                    new_size * sizeof(p->entries[0]),
912                                    GFP_KERNEL);
913                 if (!n)
914                         return -ENOMEM;
915
916                 p->entries = n;
917                 p->size = new_size;
918         };
919
920         p->entries[p->nr++] = (struct pathbuf_entry) {
921                 .inum = inum,
922                 .offset = 0,
923         };
924         return 0;
925 }
926
927 noinline_for_stack
928 static int check_directory_structure(struct bch_fs *c,
929                                      struct bch_inode_unpacked *lostfound_inode)
930 {
931         struct inode_bitmap dirs_done = { NULL, 0 };
932         struct pathbuf path = { 0, 0, NULL };
933         struct pathbuf_entry *e;
934         struct btree_trans trans;
935         struct btree_iter *iter;
936         struct bkey_s_c k;
937         struct bkey_s_c_dirent dirent;
938         bool had_unreachable;
939         u64 d_inum;
940         int ret = 0;
941
942         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
943
944         bch_verbose(c, "checking directory structure");
945
946         /* DFS: */
947 restart_dfs:
948         had_unreachable = false;
949
950         ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
951         if (ret) {
952                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
953                 goto err;
954         }
955
956         ret = path_down(&path, BCACHEFS_ROOT_INO);
957         if (ret)
958                 goto err;
959
960         while (path.nr) {
961 next:
962                 e = &path.entries[path.nr - 1];
963
964                 if (e->offset == U64_MAX)
965                         goto up;
966
967                 for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
968                                    POS(e->inum, e->offset + 1), 0, k, ret) {
969                         if (k.k->p.inode != e->inum)
970                                 break;
971
972                         e->offset = k.k->p.offset;
973
974                         if (k.k->type != KEY_TYPE_dirent)
975                                 continue;
976
977                         dirent = bkey_s_c_to_dirent(k);
978
979                         if (dirent.v->d_type != DT_DIR)
980                                 continue;
981
982                         d_inum = le64_to_cpu(dirent.v->d_inum);
983
984                         if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
985                                         "directory %llu has multiple hardlinks",
986                                         d_inum)) {
987                                 ret = remove_dirent(&trans, dirent);
988                                 if (ret)
989                                         goto err;
990                                 continue;
991                         }
992
993                         ret = inode_bitmap_set(&dirs_done, d_inum);
994                         if (ret) {
995                                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
996                                 goto err;
997                         }
998
999                         ret = path_down(&path, d_inum);
1000                         if (ret) {
1001                                 goto err;
1002                         }
1003
1004                         ret = bch2_trans_iter_free(&trans, iter);
1005                         if (ret) {
1006                                 bch_err(c, "btree error %i in fsck", ret);
1007                                 goto err;
1008                         }
1009                         goto next;
1010                 }
1011                 ret = bch2_trans_iter_free(&trans, iter) ?: ret;
1012                 if (ret) {
1013                         bch_err(c, "btree error %i in fsck", ret);
1014                         goto err;
1015                 }
1016 up:
1017                 path.nr--;
1018         }
1019
1020         iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
1021 retry:
1022         for_each_btree_key_continue(iter, 0, k, ret) {
1023                 if (k.k->type != KEY_TYPE_inode)
1024                         continue;
1025
1026                 if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
1027                         continue;
1028
1029                 ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
1030                 if (ret == -EINTR)
1031                         goto retry;
1032                 if (!ret)
1033                         continue;
1034
1035                 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
1036                                 "unreachable directory found (inum %llu)",
1037                                 k.k->p.inode)) {
1038                         bch2_trans_unlock(&trans);
1039
1040                         ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
1041                         if (ret) {
1042                                 goto err;
1043                         }
1044
1045                         had_unreachable = true;
1046                 }
1047         }
1048         bch2_trans_iter_free(&trans, iter);
1049         if (ret)
1050                 goto err;
1051
1052         if (had_unreachable) {
1053                 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
1054                 kfree(dirs_done.bits);
1055                 kfree(path.entries);
1056                 memset(&dirs_done, 0, sizeof(dirs_done));
1057                 memset(&path, 0, sizeof(path));
1058                 goto restart_dfs;
1059         }
1060 err:
1061 fsck_err:
1062         ret = bch2_trans_exit(&trans) ?: ret;
1063         kfree(dirs_done.bits);
1064         kfree(path.entries);
1065         return ret;
1066 }
1067
1068 struct nlink {
1069         u32     count;
1070         u32     dir_count;
1071 };
1072
1073 typedef GENRADIX(struct nlink) nlink_table;
1074
1075 static void inc_link(struct bch_fs *c, nlink_table *links,
1076                      u64 range_start, u64 *range_end,
1077                      u64 inum, bool dir)
1078 {
1079         struct nlink *link;
1080
1081         if (inum < range_start || inum >= *range_end)
1082                 return;
1083
1084         link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
1085         if (!link) {
1086                 bch_verbose(c, "allocation failed during fsck - will need another pass");
1087                 *range_end = inum;
1088                 return;
1089         }
1090
1091         if (dir)
1092                 link->dir_count++;
1093         else
1094                 link->count++;
1095 }
1096
1097 noinline_for_stack
1098 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
1099                                u64 range_start, u64 *range_end)
1100 {
1101         struct btree_trans trans;
1102         struct btree_iter *iter;
1103         struct bkey_s_c k;
1104         struct bkey_s_c_dirent d;
1105         u64 d_inum;
1106         int ret;
1107
1108         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1109
1110         inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
1111
1112         for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) {
1113                 switch (k.k->type) {
1114                 case KEY_TYPE_dirent:
1115                         d = bkey_s_c_to_dirent(k);
1116                         d_inum = le64_to_cpu(d.v->d_inum);
1117
1118                         if (d.v->d_type == DT_DIR)
1119                                 inc_link(c, links, range_start, range_end,
1120                                          d.k->p.inode, true);
1121
1122                         inc_link(c, links, range_start, range_end,
1123                                  d_inum, false);
1124
1125                         break;
1126                 }
1127
1128                 bch2_trans_cond_resched(&trans);
1129         }
1130         ret = bch2_trans_exit(&trans) ?: ret;
1131         if (ret)
1132                 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1133
1134         return ret;
1135 }
1136
1137 static int check_inode_nlink(struct bch_fs *c,
1138                              struct bch_inode_unpacked *lostfound_inode,
1139                              struct bch_inode_unpacked *u,
1140                              struct nlink *link,
1141                              bool *do_update)
1142 {
1143         u32 i_nlink = bch2_inode_nlink_get(u);
1144         u32 real_i_nlink =
1145                 link->count * nlink_bias(u->bi_mode) +
1146                 link->dir_count;
1147         int ret = 0;
1148
1149         /*
1150          * These should have been caught/fixed by earlier passes, we don't
1151          * repair them here:
1152          */
1153         if (S_ISDIR(u->bi_mode) && link->count > 1) {
1154                 need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
1155                               u->bi_inum, link->count);
1156                 return 0;
1157         }
1158
1159         if (S_ISDIR(u->bi_mode) && !link->count) {
1160                 need_fsck_err(c, "unreachable directory found (inum %llu)",
1161                               u->bi_inum);
1162                 return 0;
1163         }
1164
1165         if (!S_ISDIR(u->bi_mode) && link->dir_count) {
1166                 need_fsck_err(c, "non directory with subdirectories",
1167                               u->bi_inum);
1168                 return 0;
1169         }
1170
1171         if (!link->count &&
1172             !(u->bi_flags & BCH_INODE_UNLINKED) &&
1173             (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
1174                 if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
1175                              u->bi_inum, mode_to_type(u->bi_mode)) ==
1176                     FSCK_ERR_IGNORE)
1177                         return 0;
1178
1179                 ret = reattach_inode(c, lostfound_inode, u->bi_inum);
1180                 if (ret)
1181                         return ret;
1182
1183                 link->count = 1;
1184                 real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
1185                 goto set_i_nlink;
1186         }
1187
1188         if (i_nlink < link->count) {
1189                 if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
1190                              u->bi_inum, i_nlink, link->count,
1191                              mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
1192                         return 0;
1193                 goto set_i_nlink;
1194         }
1195
1196         if (i_nlink != real_i_nlink &&
1197             c->sb.clean) {
1198                 if (fsck_err(c, "filesystem marked clean, "
1199                              "but inode %llu has wrong i_nlink "
1200                              "(type %u i_nlink %u, should be %u)",
1201                              u->bi_inum, mode_to_type(u->bi_mode),
1202                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1203                         return 0;
1204                 goto set_i_nlink;
1205         }
1206
1207         if (i_nlink != real_i_nlink &&
1208             (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
1209                 if (fsck_err(c, "inode %llu has wrong i_nlink "
1210                              "(type %u i_nlink %u, should be %u)",
1211                              u->bi_inum, mode_to_type(u->bi_mode),
1212                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1213                         return 0;
1214                 goto set_i_nlink;
1215         }
1216
1217         if (real_i_nlink && i_nlink != real_i_nlink)
1218                 bch_verbose(c, "setting inode %llu nlink from %u to %u",
1219                             u->bi_inum, i_nlink, real_i_nlink);
1220 set_i_nlink:
1221         if (i_nlink != real_i_nlink) {
1222                 bch2_inode_nlink_set(u, real_i_nlink);
1223                 *do_update = true;
1224         }
1225 fsck_err:
1226         return ret;
1227 }
1228
1229 static int check_inode(struct btree_trans *trans,
1230                        struct bch_inode_unpacked *lostfound_inode,
1231                        struct btree_iter *iter,
1232                        struct bkey_s_c_inode inode,
1233                        struct nlink *link)
1234 {
1235         struct bch_fs *c = trans->c;
1236         struct bch_inode_unpacked u;
1237         bool do_update = false;
1238         int ret = 0;
1239
1240         ret = bch2_inode_unpack(inode, &u);
1241
1242         bch2_trans_unlock(trans);
1243
1244         if (bch2_fs_inconsistent_on(ret, c,
1245                          "error unpacking inode %llu in fsck",
1246                          inode.k->p.inode))
1247                 return ret;
1248
1249         if (link) {
1250                 ret = check_inode_nlink(c, lostfound_inode, &u, link,
1251                                         &do_update);
1252                 if (ret)
1253                         return ret;
1254         }
1255
1256         if (u.bi_flags & BCH_INODE_UNLINKED &&
1257             (!c->sb.clean ||
1258              fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
1259                       u.bi_inum))) {
1260                 bch_verbose(c, "deleting inode %llu", u.bi_inum);
1261
1262                 ret = bch2_inode_rm(c, u.bi_inum);
1263                 if (ret)
1264                         bch_err(c, "error in fsck: error %i while deleting inode", ret);
1265                 return ret;
1266         }
1267
1268         if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
1269             (!c->sb.clean ||
1270              fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
1271                       u.bi_inum))) {
1272                 bch_verbose(c, "truncating inode %llu", u.bi_inum);
1273
1274                 /*
1275                  * XXX: need to truncate partial blocks too here - or ideally
1276                  * just switch units to bytes and that issue goes away
1277                  */
1278
1279                 ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
1280                 if (ret) {
1281                         bch_err(c, "error in fsck: error %i truncating inode", ret);
1282                         return ret;
1283                 }
1284
1285                 /*
1286                  * We truncated without our normal sector accounting hook, just
1287                  * make sure we recalculate it:
1288                  */
1289                 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
1290
1291                 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
1292                 do_update = true;
1293         }
1294
1295         if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
1296             (!c->sb.clean ||
1297              fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
1298                       u.bi_inum))) {
1299                 s64 sectors;
1300
1301                 bch_verbose(c, "recounting sectors for inode %llu",
1302                             u.bi_inum);
1303
1304                 sectors = bch2_count_inode_sectors(trans, u.bi_inum);
1305                 if (sectors < 0) {
1306                         bch_err(c, "error in fsck: error %i recounting inode sectors",
1307                                 (int) sectors);
1308                         return sectors;
1309                 }
1310
1311                 u.bi_sectors = sectors;
1312                 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
1313                 do_update = true;
1314         }
1315
1316         if (do_update) {
1317                 struct bkey_inode_buf p;
1318
1319                 bch2_inode_pack(&p, &u);
1320
1321                 ret = __bch2_trans_do(trans, NULL, NULL,
1322                                       BTREE_INSERT_NOFAIL|
1323                                       BTREE_INSERT_LAZY_RW,
1324                         (bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
1325                 if (ret)
1326                         bch_err(c, "error in fsck: error %i "
1327                                 "updating inode", ret);
1328         }
1329 fsck_err:
1330         return ret;
1331 }
1332
1333 noinline_for_stack
1334 static int bch2_gc_walk_inodes(struct bch_fs *c,
1335                                struct bch_inode_unpacked *lostfound_inode,
1336                                nlink_table *links,
1337                                u64 range_start, u64 range_end)
1338 {
1339         struct btree_trans trans;
1340         struct btree_iter *iter;
1341         struct bkey_s_c k;
1342         struct nlink *link, zero_links = { 0, 0 };
1343         struct genradix_iter nlinks_iter;
1344         int ret = 0, ret2 = 0;
1345         u64 nlinks_pos;
1346
1347         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1348
1349         iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
1350                                    POS(range_start, 0), 0);
1351         nlinks_iter = genradix_iter_init(links, 0);
1352
1353         while ((k = bch2_btree_iter_peek(iter)).k &&
1354                !(ret2 = bkey_err(k))) {
1355 peek_nlinks:    link = genradix_iter_peek(&nlinks_iter, links);
1356
1357                 if (!link && (!k.k || iter->pos.inode >= range_end))
1358                         break;
1359
1360                 nlinks_pos = range_start + nlinks_iter.pos;
1361                 if (iter->pos.inode > nlinks_pos) {
1362                         /* Should have been caught by dirents pass: */
1363                         need_fsck_err_on(link && link->count, c,
1364                                 "missing inode %llu (nlink %u)",
1365                                 nlinks_pos, link->count);
1366                         genradix_iter_advance(&nlinks_iter, links);
1367                         goto peek_nlinks;
1368                 }
1369
1370                 if (iter->pos.inode < nlinks_pos || !link)
1371                         link = &zero_links;
1372
1373                 if (k.k && k.k->type == KEY_TYPE_inode) {
1374                         ret = check_inode(&trans, lostfound_inode, iter,
1375                                           bkey_s_c_to_inode(k), link);
1376                         BUG_ON(ret == -EINTR);
1377                         if (ret)
1378                                 break;
1379                 } else {
1380                         /* Should have been caught by dirents pass: */
1381                         need_fsck_err_on(link->count, c,
1382                                 "missing inode %llu (nlink %u)",
1383                                 nlinks_pos, link->count);
1384                 }
1385
1386                 if (nlinks_pos == iter->pos.inode)
1387                         genradix_iter_advance(&nlinks_iter, links);
1388
1389                 bch2_btree_iter_next(iter);
1390                 bch2_trans_cond_resched(&trans);
1391         }
1392 fsck_err:
1393         bch2_trans_exit(&trans);
1394
1395         if (ret2)
1396                 bch_err(c, "error in fsck: btree error %i while walking inodes", ret2);
1397
1398         return ret ?: ret2;
1399 }
1400
1401 noinline_for_stack
1402 static int check_inode_nlinks(struct bch_fs *c,
1403                               struct bch_inode_unpacked *lostfound_inode)
1404 {
1405         nlink_table links;
1406         u64 this_iter_range_start, next_iter_range_start = 0;
1407         int ret = 0;
1408
1409         bch_verbose(c, "checking inode nlinks");
1410
1411         genradix_init(&links);
1412
1413         do {
1414                 this_iter_range_start = next_iter_range_start;
1415                 next_iter_range_start = U64_MAX;
1416
1417                 ret = bch2_gc_walk_dirents(c, &links,
1418                                           this_iter_range_start,
1419                                           &next_iter_range_start);
1420                 if (ret)
1421                         break;
1422
1423                 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
1424                                          this_iter_range_start,
1425                                          next_iter_range_start);
1426                 if (ret)
1427                         break;
1428
1429                 genradix_free(&links);
1430         } while (next_iter_range_start != U64_MAX);
1431
1432         genradix_free(&links);
1433
1434         return ret;
1435 }
1436
1437 /*
1438  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1439  * Doesn't fix them yet, mainly because they haven't yet been observed:
1440  */
1441 int bch2_fsck_full(struct bch_fs *c)
1442 {
1443         struct bch_inode_unpacked root_inode, lostfound_inode;
1444
1445         return  check_extents(c) ?:
1446                 check_dirents(c) ?:
1447                 check_xattrs(c) ?:
1448                 check_root(c, &root_inode) ?:
1449                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1450                 check_directory_structure(c, &lostfound_inode) ?:
1451                 check_inode_nlinks(c, &lostfound_inode);
1452 }
1453
1454 int bch2_fsck_inode_nlink(struct bch_fs *c)
1455 {
1456         struct bch_inode_unpacked root_inode, lostfound_inode;
1457
1458         return  check_root(c, &root_inode) ?:
1459                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1460                 check_inode_nlinks(c, &lostfound_inode);
1461 }
1462
1463 int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1464 {
1465         struct btree_trans trans;
1466         struct btree_iter *iter;
1467         struct bkey_s_c k;
1468         struct bkey_s_c_inode inode;
1469         int ret;
1470
1471         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1472
1473         for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) {
1474                 if (k.k->type != KEY_TYPE_inode)
1475                         continue;
1476
1477                 inode = bkey_s_c_to_inode(k);
1478
1479                 if (inode.v->bi_flags &
1480                     (BCH_INODE_I_SIZE_DIRTY|
1481                      BCH_INODE_I_SECTORS_DIRTY|
1482                      BCH_INODE_UNLINKED)) {
1483                         ret = check_inode(&trans, NULL, iter, inode, NULL);
1484                         BUG_ON(ret == -EINTR);
1485                         if (ret)
1486                                 break;
1487                 }
1488         }
1489         BUG_ON(ret == -EINTR);
1490
1491         return bch2_trans_exit(&trans) ?: ret;
1492 }