]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fsck.c
Update bcachefs sources to d83b992f65 bcachefs: Rewrite journal_seq_blacklist machinery
[bcachefs-tools-debian] / libbcachefs / fsck.c
1
2 #include "bcachefs.h"
3 #include "btree_update.h"
4 #include "dirent.h"
5 #include "error.h"
6 #include "fs.h"
7 #include "fsck.h"
8 #include "inode.h"
9 #include "keylist.h"
10 #include "super.h"
11 #include "xattr.h"
12
13 #include <linux/dcache.h> /* struct qstr */
14 #include <linux/generic-radix-tree.h>
15
16 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
17
18 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
19 {
20         struct btree_iter *iter;
21         struct bkey_s_c k;
22         u64 sectors = 0;
23
24         for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
25                 if (k.k->p.inode != inum)
26                         break;
27
28                 if (bkey_extent_is_allocation(k.k))
29                         sectors += k.k->size;
30         }
31
32         return bch2_trans_iter_free(trans, iter) ?: sectors;
33 }
34
35 static int remove_dirent(struct btree_trans *trans,
36                          struct bkey_s_c_dirent dirent)
37 {
38         struct bch_fs *c = trans->c;
39         struct qstr name;
40         struct bch_inode_unpacked dir_inode;
41         struct bch_hash_info dir_hash_info;
42         u64 dir_inum = dirent.k->p.inode;
43         int ret;
44         char *buf;
45
46         name.len = bch2_dirent_name_bytes(dirent);
47         buf = kmalloc(name.len + 1, GFP_KERNEL);
48         if (!buf)
49                 return -ENOMEM;
50
51         memcpy(buf, dirent.v->d_name, name.len);
52         buf[name.len] = '\0';
53         name.name = buf;
54
55         /* Unlock so we don't deadlock, after copying name: */
56         bch2_btree_trans_unlock(trans);
57
58         ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
59         if (ret) {
60                 bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
61                 goto err;
62         }
63
64         dir_hash_info = bch2_hash_info_init(c, &dir_inode);
65
66         ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
67         if (ret)
68                 bch_err(c, "remove_dirent: err %i deleting dirent", ret);
69 err:
70         kfree(buf);
71         return ret;
72 }
73
74 static int reattach_inode(struct bch_fs *c,
75                           struct bch_inode_unpacked *lostfound_inode,
76                           u64 inum)
77 {
78         struct bch_hash_info lostfound_hash_info =
79                 bch2_hash_info_init(c, lostfound_inode);
80         struct bkey_inode_buf packed;
81         char name_buf[20];
82         struct qstr name;
83         int ret;
84
85         snprintf(name_buf, sizeof(name_buf), "%llu", inum);
86         name = (struct qstr) QSTR(name_buf);
87
88         lostfound_inode->bi_nlink++;
89
90         bch2_inode_pack(&packed, lostfound_inode);
91
92         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
93                                 NULL, NULL,
94                                 BTREE_INSERT_NOFAIL|
95                                 BTREE_INSERT_LAZY_RW);
96         if (ret) {
97                 bch_err(c, "error %i reattaching inode %llu while updating lost+found",
98                         ret, inum);
99                 return ret;
100         }
101
102         ret = bch2_dirent_create(c, lostfound_inode->bi_inum,
103                                  &lostfound_hash_info,
104                                  DT_DIR, &name, inum, NULL,
105                                  BTREE_INSERT_NOFAIL|
106                                  BTREE_INSERT_LAZY_RW);
107         if (ret) {
108                 bch_err(c, "error %i reattaching inode %llu while creating new dirent",
109                         ret, inum);
110                 return ret;
111         }
112         return ret;
113 }
114
115 struct inode_walker {
116         bool                    first_this_inode;
117         bool                    have_inode;
118         u64                     cur_inum;
119         struct bch_inode_unpacked inode;
120 };
121
122 static struct inode_walker inode_walker_init(void)
123 {
124         return (struct inode_walker) {
125                 .cur_inum       = -1,
126                 .have_inode     = false,
127         };
128 }
129
130 static int walk_inode(struct btree_trans *trans,
131                       struct inode_walker *w, u64 inum)
132 {
133         if (inum != w->cur_inum) {
134                 int ret = bch2_inode_find_by_inum_trans(trans, inum,
135                                                         &w->inode);
136
137                 if (ret && ret != -ENOENT)
138                         return ret;
139
140                 w->have_inode   = !ret;
141                 w->cur_inum     = inum;
142                 w->first_this_inode = true;
143         } else {
144                 w->first_this_inode = false;
145         }
146
147         return 0;
148 }
149
150 struct hash_check {
151         struct bch_hash_info    info;
152
153         /* start of current chain of hash collisions: */
154         struct btree_iter       *chain;
155
156         /* next offset in current chain of hash collisions: */
157         u64                     chain_end;
158 };
159
160 static void hash_check_init(struct hash_check *h)
161 {
162         h->chain = NULL;
163 }
164
165 static void hash_stop_chain(struct btree_trans *trans,
166                             struct hash_check *h)
167 {
168         if (h->chain)
169                 bch2_trans_iter_free(trans, h->chain);
170         h->chain = NULL;
171 }
172
173 static void hash_check_set_inode(struct btree_trans *trans,
174                                  struct hash_check *h,
175                                  const struct bch_inode_unpacked *bi)
176 {
177         h->info = bch2_hash_info_init(trans->c, bi);
178         hash_stop_chain(trans, h);
179 }
180
181 static int hash_redo_key(const struct bch_hash_desc desc,
182                          struct btree_trans *trans, struct hash_check *h,
183                          struct btree_iter *k_iter, struct bkey_s_c k,
184                          u64 hashed)
185 {
186         struct bkey_i *tmp;
187         int ret = 0;
188
189         tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
190         if (!tmp)
191                 return -ENOMEM;
192
193         bkey_reassemble(tmp, k);
194
195         ret = bch2_btree_delete_at(trans, k_iter, 0);
196         if (ret)
197                 goto err;
198
199         bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
200                       tmp, BCH_HASH_SET_MUST_CREATE);
201         ret = bch2_trans_commit(trans, NULL, NULL,
202                                 BTREE_INSERT_NOFAIL|
203                                 BTREE_INSERT_LAZY_RW);
204 err:
205         kfree(tmp);
206         return ret;
207 }
208
209 static int fsck_hash_delete_at(struct btree_trans *trans,
210                                const struct bch_hash_desc desc,
211                                struct bch_hash_info *info,
212                                struct btree_iter *iter)
213 {
214         int ret;
215 retry:
216         ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
217                 bch2_trans_commit(trans, NULL, NULL,
218                                   BTREE_INSERT_ATOMIC|
219                                   BTREE_INSERT_NOFAIL|
220                                   BTREE_INSERT_LAZY_RW);
221         if (ret == -EINTR) {
222                 ret = bch2_btree_iter_traverse(iter);
223                 if (!ret)
224                         goto retry;
225         }
226
227         return ret;
228 }
229
230 static int hash_check_duplicates(struct btree_trans *trans,
231                         const struct bch_hash_desc desc, struct hash_check *h,
232                         struct btree_iter *k_iter, struct bkey_s_c k)
233 {
234         struct bch_fs *c = trans->c;
235         struct btree_iter *iter;
236         struct bkey_s_c k2;
237         char buf[200];
238         int ret = 0;
239
240         if (!bkey_cmp(h->chain->pos, k_iter->pos))
241                 return 0;
242
243         iter = bch2_trans_copy_iter(trans, h->chain);
244         BUG_ON(IS_ERR(iter));
245
246         for_each_btree_key_continue(iter, 0, k2) {
247                 if (bkey_cmp(k2.k->p, k.k->p) >= 0)
248                         break;
249
250                 if (fsck_err_on(k2.k->type == desc.key_type &&
251                                 !desc.cmp_bkey(k, k2), c,
252                                 "duplicate hash table keys:\n%s",
253                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
254                                                        k), buf))) {
255                         ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
256                         if (ret)
257                                 return ret;
258                         ret = 1;
259                         break;
260                 }
261         }
262 fsck_err:
263         bch2_trans_iter_free(trans, iter);
264         return ret;
265 }
266
267 static void hash_set_chain_start(struct btree_trans *trans,
268                         const struct bch_hash_desc desc,
269                         struct hash_check *h,
270                         struct btree_iter *k_iter, struct bkey_s_c k)
271 {
272         bool hole = (k.k->type != KEY_TYPE_whiteout &&
273                      k.k->type != desc.key_type);
274
275         if (hole || k.k->p.offset > h->chain_end + 1)
276                 hash_stop_chain(trans, h);
277
278         if (!hole) {
279                 if (!h->chain) {
280                         h->chain = bch2_trans_copy_iter(trans, k_iter);
281                         BUG_ON(IS_ERR(h->chain));
282                 }
283
284                 h->chain_end = k.k->p.offset;
285         }
286 }
287
288 static bool key_has_correct_hash(struct btree_trans *trans,
289                         const struct bch_hash_desc desc,
290                         struct hash_check *h,
291                         struct btree_iter *k_iter, struct bkey_s_c k)
292 {
293         u64 hash;
294
295         hash_set_chain_start(trans, desc, h, k_iter, k);
296
297         if (k.k->type != desc.key_type)
298                 return true;
299
300         hash = desc.hash_bkey(&h->info, k);
301
302         return hash >= h->chain->pos.offset &&
303                 hash <= k.k->p.offset;
304 }
305
306 static int hash_check_key(struct btree_trans *trans,
307                         const struct bch_hash_desc desc, struct hash_check *h,
308                         struct btree_iter *k_iter, struct bkey_s_c k)
309 {
310         struct bch_fs *c = trans->c;
311         char buf[200];
312         u64 hashed;
313         int ret = 0;
314
315         hash_set_chain_start(trans, desc, h, k_iter, k);
316
317         if (k.k->type != desc.key_type)
318                 return 0;
319
320         hashed = desc.hash_bkey(&h->info, k);
321
322         if (fsck_err_on(hashed < h->chain->pos.offset ||
323                         hashed > k.k->p.offset, c,
324                         "hash table key at wrong offset: btree %u, %llu, "
325                         "hashed to %llu chain starts at %llu\n%s",
326                         desc.btree_id, k.k->p.offset,
327                         hashed, h->chain->pos.offset,
328                         (bch2_bkey_val_to_text(&PBUF(buf), c,
329                                                k), buf))) {
330                 ret = hash_redo_key(desc, trans, h, k_iter, k, hashed);
331                 if (ret) {
332                         bch_err(c, "hash_redo_key err %i", ret);
333                         return ret;
334                 }
335                 return 1;
336         }
337
338         ret = hash_check_duplicates(trans, desc, h, k_iter, k);
339 fsck_err:
340         return ret;
341 }
342
343 static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
344                              struct btree_iter *iter, struct bkey_s_c *k)
345 {
346         struct bch_fs *c = trans->c;
347         struct bkey_i_dirent *d = NULL;
348         int ret = -EINVAL;
349         char buf[200];
350         unsigned len;
351         u64 hash;
352
353         if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
354                 return 0;
355
356         len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
357         BUG_ON(!len);
358
359         memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
360         buf[len] = '\0';
361
362         d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
363         if (!d) {
364                 bch_err(c, "memory allocation failure");
365                 return -ENOMEM;
366         }
367
368         bkey_reassemble(&d->k_i, *k);
369
370         do {
371                 --len;
372                 if (!len)
373                         goto err_redo;
374
375                 d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
376
377                 BUG_ON(bkey_val_bytes(&d->k) <
378                        offsetof(struct bch_dirent, d_name) + len);
379
380                 memset(d->v.d_name + len, 0,
381                        bkey_val_bytes(&d->k) -
382                        offsetof(struct bch_dirent, d_name) - len);
383
384                 hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
385                                                 bkey_i_to_s_c(&d->k_i));
386         } while (hash < h->chain->pos.offset ||
387                  hash > k->k->p.offset);
388
389         if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
390                      buf, strlen(buf), d->v.d_name, len)) {
391                 bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &d->k_i));
392
393                 ret = bch2_trans_commit(trans, NULL, NULL,
394                                         BTREE_INSERT_NOFAIL|
395                                         BTREE_INSERT_LAZY_RW);
396                 if (ret)
397                         goto err;
398
399                 *k = bch2_btree_iter_peek(iter);
400
401                 BUG_ON(k->k->type != KEY_TYPE_dirent);
402         }
403 err:
404 fsck_err:
405         kfree(d);
406         return ret;
407 err_redo:
408         hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
409
410         if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
411                      "hash table key at wrong offset: btree %u, offset %llu, "
412                      "hashed to %llu chain starts at %llu\n%s",
413                      buf, strlen(buf), BTREE_ID_DIRENTS,
414                      k->k->p.offset, hash, h->chain->pos.offset,
415                      (bch2_bkey_val_to_text(&PBUF(buf), c,
416                                             *k), buf))) {
417                 ret = hash_redo_key(bch2_dirent_hash_desc, trans,
418                                     h, iter, *k, hash);
419                 if (ret)
420                         bch_err(c, "hash_redo_key err %i", ret);
421                 else
422                         ret = 1;
423         }
424
425         goto err;
426 }
427
428 static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
429 {
430         return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
431                         POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
432                         POS(inode_nr + 1, 0), NULL);
433 }
434
435 /*
436  * Walk extents: verify that extents have a corresponding S_ISREG inode, and
437  * that i_size an i_sectors are consistent
438  */
439 noinline_for_stack
440 static int check_extents(struct bch_fs *c)
441 {
442         struct inode_walker w = inode_walker_init();
443         struct btree_trans trans;
444         struct btree_iter *iter;
445         struct bkey_s_c k;
446         u64 i_sectors;
447         int ret = 0;
448
449         bch2_trans_init(&trans, c);
450         bch2_trans_preload_iters(&trans);
451
452         bch_verbose(c, "checking extents");
453
454         iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
455                                    POS(BCACHEFS_ROOT_INO, 0), 0);
456 retry:
457         for_each_btree_key_continue(iter, 0, k) {
458                 ret = walk_inode(&trans, &w, k.k->p.inode);
459                 if (ret)
460                         break;
461
462                 if (fsck_err_on(!w.have_inode, c,
463                         "extent type %u for missing inode %llu",
464                         k.k->type, k.k->p.inode) ||
465                     fsck_err_on(w.have_inode &&
466                         !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
467                         "extent type %u for non regular file, inode %llu mode %o",
468                         k.k->type, k.k->p.inode, w.inode.bi_mode)) {
469                         bch2_trans_unlock(&trans);
470
471                         ret = bch2_inode_truncate(c, k.k->p.inode, 0);
472                         if (ret)
473                                 goto err;
474                         continue;
475                 }
476
477                 if (fsck_err_on(w.first_this_inode &&
478                         w.have_inode &&
479                         !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
480                         w.inode.bi_sectors !=
481                         (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
482                         c, "i_sectors wrong: got %llu, should be %llu",
483                         w.inode.bi_sectors, i_sectors)) {
484                         struct bkey_inode_buf p;
485
486                         w.inode.bi_sectors = i_sectors;
487
488                         bch2_trans_unlock(&trans);
489
490                         bch2_inode_pack(&p, &w.inode);
491
492                         ret = bch2_btree_insert(c, BTREE_ID_INODES,
493                                                 &p.inode.k_i, NULL, NULL,
494                                                 BTREE_INSERT_NOFAIL|
495                                                 BTREE_INSERT_LAZY_RW);
496                         if (ret) {
497                                 bch_err(c, "error in fs gc: error %i "
498                                         "updating inode", ret);
499                                 goto err;
500                         }
501
502                         /* revalidate iterator: */
503                         k = bch2_btree_iter_peek(iter);
504                 }
505
506                 if (fsck_err_on(w.have_inode &&
507                         !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
508                         k.k->type != KEY_TYPE_reservation &&
509                         k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
510                         "extent type %u offset %llu past end of inode %llu, i_size %llu",
511                         k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
512                         bch2_trans_unlock(&trans);
513
514                         ret = bch2_inode_truncate(c, k.k->p.inode,
515                                                   w.inode.bi_size);
516                         if (ret)
517                                 goto err;
518                         continue;
519                 }
520         }
521 err:
522 fsck_err:
523         if (ret == -EINTR)
524                 goto retry;
525         return bch2_trans_exit(&trans) ?: ret;
526 }
527
528 /*
529  * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
530  * validate d_type
531  */
532 noinline_for_stack
533 static int check_dirents(struct bch_fs *c)
534 {
535         struct inode_walker w = inode_walker_init();
536         struct hash_check h;
537         struct btree_trans trans;
538         struct btree_iter *iter;
539         struct bkey_s_c k;
540         unsigned name_len;
541         char buf[200];
542         int ret = 0;
543
544         bch_verbose(c, "checking dirents");
545
546         bch2_trans_init(&trans, c);
547         bch2_trans_preload_iters(&trans);
548
549         hash_check_init(&h);
550
551         iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
552                                    POS(BCACHEFS_ROOT_INO, 0), 0);
553 retry:
554         for_each_btree_key_continue(iter, 0, k) {
555                 struct bkey_s_c_dirent d;
556                 struct bch_inode_unpacked target;
557                 bool have_target;
558                 u64 d_inum;
559
560                 ret = walk_inode(&trans, &w, k.k->p.inode);
561                 if (ret)
562                         break;
563
564                 if (fsck_err_on(!w.have_inode, c,
565                                 "dirent in nonexisting directory:\n%s",
566                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
567                                                        k), buf)) ||
568                     fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
569                                 "dirent in non directory inode type %u:\n%s",
570                                 mode_to_type(w.inode.bi_mode),
571                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
572                                                        k), buf))) {
573                         ret = bch2_btree_delete_at(&trans, iter, 0);
574                         if (ret)
575                                 goto err;
576                         continue;
577                 }
578
579                 if (w.first_this_inode && w.have_inode)
580                         hash_check_set_inode(&trans, &h, &w.inode);
581
582                 ret = check_dirent_hash(&trans, &h, iter, &k);
583                 if (ret > 0) {
584                         ret = 0;
585                         continue;
586                 }
587                 if (ret)
588                         goto fsck_err;
589
590                 if (ret)
591                         goto fsck_err;
592
593                 if (k.k->type != KEY_TYPE_dirent)
594                         continue;
595
596                 d = bkey_s_c_to_dirent(k);
597                 d_inum = le64_to_cpu(d.v->d_inum);
598
599                 name_len = bch2_dirent_name_bytes(d);
600
601                 if (fsck_err_on(!name_len, c, "empty dirent") ||
602                     fsck_err_on(name_len == 1 &&
603                                 !memcmp(d.v->d_name, ".", 1), c,
604                                 ". dirent") ||
605                     fsck_err_on(name_len == 2 &&
606                                 !memcmp(d.v->d_name, "..", 2), c,
607                                 ".. dirent") ||
608                     fsck_err_on(name_len == 2 &&
609                                 !memcmp(d.v->d_name, "..", 2), c,
610                                 ".. dirent") ||
611                     fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
612                                 "dirent name has invalid chars")) {
613                         ret = remove_dirent(&trans, d);
614                         if (ret)
615                                 goto err;
616                         continue;
617                 }
618
619                 if (fsck_err_on(d_inum == d.k->p.inode, c,
620                                 "dirent points to own directory:\n%s",
621                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
622                                                        k), buf))) {
623                         ret = remove_dirent(&trans, d);
624                         if (ret)
625                                 goto err;
626                         continue;
627                 }
628
629                 ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
630                 if (ret && ret != -ENOENT)
631                         break;
632
633                 have_target = !ret;
634                 ret = 0;
635
636                 if (fsck_err_on(!have_target, c,
637                                 "dirent points to missing inode:\n%s",
638                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
639                                                        k), buf))) {
640                         ret = remove_dirent(&trans, d);
641                         if (ret)
642                                 goto err;
643                         continue;
644                 }
645
646                 if (fsck_err_on(have_target &&
647                                 d.v->d_type !=
648                                 mode_to_type(target.bi_mode), c,
649                                 "incorrect d_type: should be %u:\n%s",
650                                 mode_to_type(target.bi_mode),
651                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
652                                                        k), buf))) {
653                         struct bkey_i_dirent *n;
654
655                         n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
656                         if (!n) {
657                                 ret = -ENOMEM;
658                                 goto err;
659                         }
660
661                         bkey_reassemble(&n->k_i, d.s_c);
662                         n->v.d_type = mode_to_type(target.bi_mode);
663
664                         bch2_trans_update(&trans,
665                                 BTREE_INSERT_ENTRY(iter, &n->k_i));
666
667                         ret = bch2_trans_commit(&trans, NULL, NULL,
668                                                 BTREE_INSERT_NOFAIL|
669                                                 BTREE_INSERT_LAZY_RW);
670                         kfree(n);
671                         if (ret)
672                                 goto err;
673
674                 }
675         }
676
677         hash_stop_chain(&trans, &h);
678 err:
679 fsck_err:
680         if (ret == -EINTR)
681                 goto retry;
682
683         return bch2_trans_exit(&trans) ?: ret;
684 }
685
686 /*
687  * Walk xattrs: verify that they all have a corresponding inode
688  */
689 noinline_for_stack
690 static int check_xattrs(struct bch_fs *c)
691 {
692         struct inode_walker w = inode_walker_init();
693         struct hash_check h;
694         struct btree_trans trans;
695         struct btree_iter *iter;
696         struct bkey_s_c k;
697         int ret = 0;
698
699         bch_verbose(c, "checking xattrs");
700
701         hash_check_init(&h);
702
703         bch2_trans_init(&trans, c);
704         bch2_trans_preload_iters(&trans);
705
706         iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
707                                    POS(BCACHEFS_ROOT_INO, 0), 0);
708 retry:
709         for_each_btree_key_continue(iter, 0, k) {
710                 ret = walk_inode(&trans, &w, k.k->p.inode);
711                 if (ret)
712                         break;
713
714                 if (fsck_err_on(!w.have_inode, c,
715                                 "xattr for missing inode %llu",
716                                 k.k->p.inode)) {
717                         ret = bch2_btree_delete_at(&trans, iter, 0);
718                         if (ret)
719                                 goto err;
720                         continue;
721                 }
722
723                 if (w.first_this_inode && w.have_inode)
724                         hash_check_set_inode(&trans, &h, &w.inode);
725
726                 ret = hash_check_key(&trans, bch2_xattr_hash_desc,
727                                      &h, iter, k);
728                 if (ret)
729                         goto fsck_err;
730         }
731 err:
732 fsck_err:
733         if (ret == -EINTR)
734                 goto retry;
735         return bch2_trans_exit(&trans) ?: ret;
736 }
737
738 /* Get root directory, create if it doesn't exist: */
739 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
740 {
741         struct bkey_inode_buf packed;
742         int ret;
743
744         bch_verbose(c, "checking root directory");
745
746         ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
747         if (ret && ret != -ENOENT)
748                 return ret;
749
750         if (fsck_err_on(ret, c, "root directory missing"))
751                 goto create_root;
752
753         if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
754                         "root inode not a directory"))
755                 goto create_root;
756
757         return 0;
758 fsck_err:
759         return ret;
760 create_root:
761         bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
762                         0, NULL);
763         root_inode->bi_inum = BCACHEFS_ROOT_INO;
764
765         bch2_inode_pack(&packed, root_inode);
766
767         return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
768                                  NULL, NULL,
769                                  BTREE_INSERT_NOFAIL|
770                                  BTREE_INSERT_LAZY_RW);
771 }
772
773 /* Get lost+found, create if it doesn't exist: */
774 static int check_lostfound(struct bch_fs *c,
775                            struct bch_inode_unpacked *root_inode,
776                            struct bch_inode_unpacked *lostfound_inode)
777 {
778         struct qstr lostfound = QSTR("lost+found");
779         struct bch_hash_info root_hash_info =
780                 bch2_hash_info_init(c, root_inode);
781         struct bkey_inode_buf packed;
782         u64 inum;
783         int ret;
784
785         bch_verbose(c, "checking lost+found");
786
787         inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
788                                  &lostfound);
789         if (!inum) {
790                 bch_notice(c, "creating lost+found");
791                 goto create_lostfound;
792         }
793
794         ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
795         if (ret && ret != -ENOENT)
796                 return ret;
797
798         if (fsck_err_on(ret, c, "lost+found missing"))
799                 goto create_lostfound;
800
801         if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
802                         "lost+found inode not a directory"))
803                 goto create_lostfound;
804
805         return 0;
806 fsck_err:
807         return ret;
808 create_lostfound:
809         root_inode->bi_nlink++;
810
811         bch2_inode_pack(&packed, root_inode);
812
813         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
814                                 NULL, NULL,
815                                 BTREE_INSERT_NOFAIL|
816                                 BTREE_INSERT_LAZY_RW);
817         if (ret)
818                 return ret;
819
820         bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
821                         0, root_inode);
822
823         ret = bch2_inode_create(c, lostfound_inode, BLOCKDEV_INODE_MAX, 0,
824                                &c->unused_inode_hint);
825         if (ret)
826                 return ret;
827
828         ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR,
829                                  &lostfound, lostfound_inode->bi_inum, NULL,
830                                  BTREE_INSERT_NOFAIL|
831                                  BTREE_INSERT_LAZY_RW);
832         if (ret)
833                 return ret;
834
835         return 0;
836 }
837
838 struct inode_bitmap {
839         unsigned long   *bits;
840         size_t          size;
841 };
842
843 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
844 {
845         return nr < b->size ? test_bit(nr, b->bits) : false;
846 }
847
848 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
849 {
850         if (nr >= b->size) {
851                 size_t new_size = max_t(size_t, max_t(size_t,
852                                         PAGE_SIZE * 8,
853                                         b->size * 2),
854                                         nr + 1);
855                 void *n;
856
857                 new_size = roundup_pow_of_two(new_size);
858                 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
859                 if (!n) {
860                         return -ENOMEM;
861                 }
862
863                 b->bits = n;
864                 b->size = new_size;
865         }
866
867         __set_bit(nr, b->bits);
868         return 0;
869 }
870
871 struct pathbuf {
872         size_t          nr;
873         size_t          size;
874
875         struct pathbuf_entry {
876                 u64     inum;
877                 u64     offset;
878         }               *entries;
879 };
880
881 static int path_down(struct pathbuf *p, u64 inum)
882 {
883         if (p->nr == p->size) {
884                 size_t new_size = max_t(size_t, 256UL, p->size * 2);
885                 void *n = krealloc(p->entries,
886                                    new_size * sizeof(p->entries[0]),
887                                    GFP_KERNEL);
888                 if (!n)
889                         return -ENOMEM;
890
891                 p->entries = n;
892                 p->size = new_size;
893         };
894
895         p->entries[p->nr++] = (struct pathbuf_entry) {
896                 .inum = inum,
897                 .offset = 0,
898         };
899         return 0;
900 }
901
902 noinline_for_stack
903 static int check_directory_structure(struct bch_fs *c,
904                                      struct bch_inode_unpacked *lostfound_inode)
905 {
906         struct inode_bitmap dirs_done = { NULL, 0 };
907         struct pathbuf path = { 0, 0, NULL };
908         struct pathbuf_entry *e;
909         struct btree_trans trans;
910         struct btree_iter *iter;
911         struct bkey_s_c k;
912         struct bkey_s_c_dirent dirent;
913         bool had_unreachable;
914         u64 d_inum;
915         int ret = 0;
916
917         bch2_trans_init(&trans, c);
918         bch2_trans_preload_iters(&trans);
919
920         bch_verbose(c, "checking directory structure");
921
922         /* DFS: */
923 restart_dfs:
924         had_unreachable = false;
925
926         ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
927         if (ret) {
928                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
929                 goto err;
930         }
931
932         ret = path_down(&path, BCACHEFS_ROOT_INO);
933         if (ret)
934                 goto err;
935
936         while (path.nr) {
937 next:
938                 e = &path.entries[path.nr - 1];
939
940                 if (e->offset == U64_MAX)
941                         goto up;
942
943                 for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
944                                    POS(e->inum, e->offset + 1), 0, k) {
945                         if (k.k->p.inode != e->inum)
946                                 break;
947
948                         e->offset = k.k->p.offset;
949
950                         if (k.k->type != KEY_TYPE_dirent)
951                                 continue;
952
953                         dirent = bkey_s_c_to_dirent(k);
954
955                         if (dirent.v->d_type != DT_DIR)
956                                 continue;
957
958                         d_inum = le64_to_cpu(dirent.v->d_inum);
959
960                         if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
961                                         "directory %llu has multiple hardlinks",
962                                         d_inum)) {
963                                 ret = remove_dirent(&trans, dirent);
964                                 if (ret)
965                                         goto err;
966                                 continue;
967                         }
968
969                         ret = inode_bitmap_set(&dirs_done, d_inum);
970                         if (ret) {
971                                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
972                                 goto err;
973                         }
974
975                         ret = path_down(&path, d_inum);
976                         if (ret) {
977                                 goto err;
978                         }
979
980                         ret = bch2_trans_iter_free(&trans, iter);
981                         if (ret) {
982                                 bch_err(c, "btree error %i in fsck", ret);
983                                 goto err;
984                         }
985                         goto next;
986                 }
987                 ret = bch2_trans_iter_free(&trans, iter);
988                 if (ret) {
989                         bch_err(c, "btree error %i in fsck", ret);
990                         goto err;
991                 }
992 up:
993                 path.nr--;
994         }
995
996         iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
997 retry:
998         for_each_btree_key_continue(iter, 0, k) {
999                 if (k.k->type != KEY_TYPE_inode)
1000                         continue;
1001
1002                 if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
1003                         continue;
1004
1005                 ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
1006                 if (ret == -EINTR)
1007                         goto retry;
1008                 if (!ret)
1009                         continue;
1010
1011                 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
1012                                 "unreachable directory found (inum %llu)",
1013                                 k.k->p.inode)) {
1014                         bch2_btree_trans_unlock(&trans);
1015
1016                         ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
1017                         if (ret) {
1018                                 goto err;
1019                         }
1020
1021                         had_unreachable = true;
1022                 }
1023         }
1024         ret = bch2_trans_iter_free(&trans, iter);
1025         if (ret)
1026                 goto err;
1027
1028         if (had_unreachable) {
1029                 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
1030                 kfree(dirs_done.bits);
1031                 kfree(path.entries);
1032                 memset(&dirs_done, 0, sizeof(dirs_done));
1033                 memset(&path, 0, sizeof(path));
1034                 goto restart_dfs;
1035         }
1036 err:
1037 fsck_err:
1038         ret = bch2_trans_exit(&trans) ?: ret;
1039         kfree(dirs_done.bits);
1040         kfree(path.entries);
1041         return ret;
1042 }
1043
1044 struct nlink {
1045         u32     count;
1046         u32     dir_count;
1047 };
1048
1049 typedef GENRADIX(struct nlink) nlink_table;
1050
1051 static void inc_link(struct bch_fs *c, nlink_table *links,
1052                      u64 range_start, u64 *range_end,
1053                      u64 inum, bool dir)
1054 {
1055         struct nlink *link;
1056
1057         if (inum < range_start || inum >= *range_end)
1058                 return;
1059
1060         link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
1061         if (!link) {
1062                 bch_verbose(c, "allocation failed during fs gc - will need another pass");
1063                 *range_end = inum;
1064                 return;
1065         }
1066
1067         if (dir)
1068                 link->dir_count++;
1069         else
1070                 link->count++;
1071 }
1072
1073 noinline_for_stack
1074 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
1075                                u64 range_start, u64 *range_end)
1076 {
1077         struct btree_trans trans;
1078         struct btree_iter *iter;
1079         struct bkey_s_c k;
1080         struct bkey_s_c_dirent d;
1081         u64 d_inum;
1082         int ret;
1083
1084         bch2_trans_init(&trans, c);
1085         bch2_trans_preload_iters(&trans);
1086
1087         inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
1088
1089         for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
1090                 switch (k.k->type) {
1091                 case KEY_TYPE_dirent:
1092                         d = bkey_s_c_to_dirent(k);
1093                         d_inum = le64_to_cpu(d.v->d_inum);
1094
1095                         if (d.v->d_type == DT_DIR)
1096                                 inc_link(c, links, range_start, range_end,
1097                                          d.k->p.inode, true);
1098
1099                         inc_link(c, links, range_start, range_end,
1100                                  d_inum, false);
1101
1102                         break;
1103                 }
1104
1105                 bch2_trans_cond_resched(&trans);
1106         }
1107         ret = bch2_trans_exit(&trans);
1108         if (ret)
1109                 bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
1110
1111         return ret;
1112 }
1113
1114 static int check_inode_nlink(struct bch_fs *c,
1115                              struct bch_inode_unpacked *lostfound_inode,
1116                              struct bch_inode_unpacked *u,
1117                              struct nlink *link,
1118                              bool *do_update)
1119 {
1120         u32 i_nlink = u->bi_flags & BCH_INODE_UNLINKED
1121                 ? 0
1122                 : u->bi_nlink + nlink_bias(u->bi_mode);
1123         u32 real_i_nlink =
1124                 link->count * nlink_bias(u->bi_mode) +
1125                 link->dir_count;
1126         int ret = 0;
1127
1128         /*
1129          * These should have been caught/fixed by earlier passes, we don't
1130          * repair them here:
1131          */
1132         if (S_ISDIR(u->bi_mode) && link->count > 1) {
1133                 need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
1134                               u->bi_inum, link->count);
1135                 return 0;
1136         }
1137
1138         if (S_ISDIR(u->bi_mode) && !link->count) {
1139                 need_fsck_err(c, "unreachable directory found (inum %llu)",
1140                               u->bi_inum);
1141                 return 0;
1142         }
1143
1144         if (!S_ISDIR(u->bi_mode) && link->dir_count) {
1145                 need_fsck_err(c, "non directory with subdirectories",
1146                               u->bi_inum);
1147                 return 0;
1148         }
1149
1150         if (!link->count &&
1151             !(u->bi_flags & BCH_INODE_UNLINKED) &&
1152             (c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
1153                 if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
1154                              u->bi_inum, mode_to_type(u->bi_mode)) ==
1155                     FSCK_ERR_IGNORE)
1156                         return 0;
1157
1158                 ret = reattach_inode(c, lostfound_inode, u->bi_inum);
1159                 if (ret)
1160                         return ret;
1161
1162                 link->count = 1;
1163                 real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
1164                 goto set_i_nlink;
1165         }
1166
1167         if (i_nlink < link->count) {
1168                 if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
1169                              u->bi_inum, i_nlink, link->count,
1170                              mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
1171                         return 0;
1172                 goto set_i_nlink;
1173         }
1174
1175         if (i_nlink != real_i_nlink &&
1176             c->sb.clean) {
1177                 if (fsck_err(c, "filesystem marked clean, "
1178                              "but inode %llu has wrong i_nlink "
1179                              "(type %u i_nlink %u, should be %u)",
1180                              u->bi_inum, mode_to_type(u->bi_mode),
1181                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1182                         return 0;
1183                 goto set_i_nlink;
1184         }
1185
1186         if (i_nlink != real_i_nlink &&
1187             (c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
1188                 if (fsck_err(c, "inode %llu has wrong i_nlink "
1189                              "(type %u i_nlink %u, should be %u)",
1190                              u->bi_inum, mode_to_type(u->bi_mode),
1191                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1192                         return 0;
1193                 goto set_i_nlink;
1194         }
1195
1196         if (real_i_nlink && i_nlink != real_i_nlink)
1197                 bch_verbose(c, "setting inode %llu nlink from %u to %u",
1198                             u->bi_inum, i_nlink, real_i_nlink);
1199 set_i_nlink:
1200         if (i_nlink != real_i_nlink) {
1201                 if (real_i_nlink) {
1202                         u->bi_nlink = real_i_nlink - nlink_bias(u->bi_mode);
1203                         u->bi_flags &= ~BCH_INODE_UNLINKED;
1204                 } else {
1205                         u->bi_nlink = 0;
1206                         u->bi_flags |= BCH_INODE_UNLINKED;
1207                 }
1208
1209                 *do_update = true;
1210         }
1211 fsck_err:
1212         return ret;
1213 }
1214
1215 static int check_inode(struct btree_trans *trans,
1216                        struct bch_inode_unpacked *lostfound_inode,
1217                        struct btree_iter *iter,
1218                        struct bkey_s_c_inode inode,
1219                        struct nlink *link)
1220 {
1221         struct bch_fs *c = trans->c;
1222         struct bch_inode_unpacked u;
1223         bool do_update = false;
1224         int ret = 0;
1225
1226         ret = bch2_inode_unpack(inode, &u);
1227
1228         bch2_btree_trans_unlock(trans);
1229
1230         if (bch2_fs_inconsistent_on(ret, c,
1231                          "error unpacking inode %llu in fsck",
1232                          inode.k->p.inode))
1233                 return ret;
1234
1235         if (link) {
1236                 ret = check_inode_nlink(c, lostfound_inode, &u, link,
1237                                         &do_update);
1238                 if (ret)
1239                         return ret;
1240         }
1241
1242         if (u.bi_flags & BCH_INODE_UNLINKED &&
1243             (!c->sb.clean ||
1244              fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
1245                       u.bi_inum))) {
1246                 bch_verbose(c, "deleting inode %llu", u.bi_inum);
1247
1248                 ret = bch2_inode_rm(c, u.bi_inum);
1249                 if (ret)
1250                         bch_err(c, "error in fs gc: error %i "
1251                                 "while deleting inode", ret);
1252                 return ret;
1253         }
1254
1255         if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
1256             (!c->sb.clean ||
1257              fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
1258                       u.bi_inum))) {
1259                 bch_verbose(c, "truncating inode %llu", u.bi_inum);
1260
1261                 /*
1262                  * XXX: need to truncate partial blocks too here - or ideally
1263                  * just switch units to bytes and that issue goes away
1264                  */
1265
1266                 ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
1267                 if (ret) {
1268                         bch_err(c, "error in fs gc: error %i "
1269                                 "truncating inode", ret);
1270                         return ret;
1271                 }
1272
1273                 /*
1274                  * We truncated without our normal sector accounting hook, just
1275                  * make sure we recalculate it:
1276                  */
1277                 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
1278
1279                 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
1280                 do_update = true;
1281         }
1282
1283         if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
1284             (!c->sb.clean ||
1285              fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
1286                       u.bi_inum))) {
1287                 s64 sectors;
1288
1289                 bch_verbose(c, "recounting sectors for inode %llu",
1290                             u.bi_inum);
1291
1292                 sectors = bch2_count_inode_sectors(trans, u.bi_inum);
1293                 if (sectors < 0) {
1294                         bch_err(c, "error in fs gc: error %i "
1295                                 "recounting inode sectors",
1296                                 (int) sectors);
1297                         return sectors;
1298                 }
1299
1300                 u.bi_sectors = sectors;
1301                 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
1302                 do_update = true;
1303         }
1304
1305         if (do_update) {
1306                 struct bkey_inode_buf p;
1307
1308                 bch2_inode_pack(&p, &u);
1309                 bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
1310
1311                 ret = bch2_trans_commit(trans, NULL, NULL,
1312                                         BTREE_INSERT_NOFAIL|
1313                                         BTREE_INSERT_LAZY_RW);
1314                 if (ret && ret != -EINTR)
1315                         bch_err(c, "error in fs gc: error %i "
1316                                 "updating inode", ret);
1317         }
1318 fsck_err:
1319         return ret;
1320 }
1321
1322 noinline_for_stack
1323 static int bch2_gc_walk_inodes(struct bch_fs *c,
1324                                struct bch_inode_unpacked *lostfound_inode,
1325                                nlink_table *links,
1326                                u64 range_start, u64 range_end)
1327 {
1328         struct btree_trans trans;
1329         struct btree_iter *iter;
1330         struct bkey_s_c k;
1331         struct nlink *link, zero_links = { 0, 0 };
1332         struct genradix_iter nlinks_iter;
1333         int ret = 0, ret2 = 0;
1334         u64 nlinks_pos;
1335
1336         bch2_trans_init(&trans, c);
1337         bch2_trans_preload_iters(&trans);
1338
1339         iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
1340                                    POS(range_start, 0), 0);
1341         nlinks_iter = genradix_iter_init(links, 0);
1342
1343         while ((k = bch2_btree_iter_peek(iter)).k &&
1344                !(ret2 = bkey_err(k))) {
1345 peek_nlinks:    link = genradix_iter_peek(&nlinks_iter, links);
1346
1347                 if (!link && (!k.k || iter->pos.inode >= range_end))
1348                         break;
1349
1350                 nlinks_pos = range_start + nlinks_iter.pos;
1351                 if (iter->pos.inode > nlinks_pos) {
1352                         /* Should have been caught by dirents pass: */
1353                         need_fsck_err_on(link && link->count, c,
1354                                 "missing inode %llu (nlink %u)",
1355                                 nlinks_pos, link->count);
1356                         genradix_iter_advance(&nlinks_iter, links);
1357                         goto peek_nlinks;
1358                 }
1359
1360                 if (iter->pos.inode < nlinks_pos || !link)
1361                         link = &zero_links;
1362
1363                 if (k.k && k.k->type == KEY_TYPE_inode) {
1364                         ret = check_inode(&trans, lostfound_inode, iter,
1365                                           bkey_s_c_to_inode(k), link);
1366                         BUG_ON(ret == -EINTR);
1367                         if (ret)
1368                                 break;
1369                 } else {
1370                         /* Should have been caught by dirents pass: */
1371                         need_fsck_err_on(link->count, c,
1372                                 "missing inode %llu (nlink %u)",
1373                                 nlinks_pos, link->count);
1374                 }
1375
1376                 if (nlinks_pos == iter->pos.inode)
1377                         genradix_iter_advance(&nlinks_iter, links);
1378
1379                 bch2_btree_iter_next(iter);
1380                 bch2_trans_cond_resched(&trans);
1381         }
1382 fsck_err:
1383         bch2_trans_exit(&trans);
1384
1385         if (ret2)
1386                 bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
1387
1388         return ret ?: ret2;
1389 }
1390
1391 noinline_for_stack
1392 static int check_inode_nlinks(struct bch_fs *c,
1393                               struct bch_inode_unpacked *lostfound_inode)
1394 {
1395         nlink_table links;
1396         u64 this_iter_range_start, next_iter_range_start = 0;
1397         int ret = 0;
1398
1399         bch_verbose(c, "checking inode nlinks");
1400
1401         genradix_init(&links);
1402
1403         do {
1404                 this_iter_range_start = next_iter_range_start;
1405                 next_iter_range_start = U64_MAX;
1406
1407                 ret = bch2_gc_walk_dirents(c, &links,
1408                                           this_iter_range_start,
1409                                           &next_iter_range_start);
1410                 if (ret)
1411                         break;
1412
1413                 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
1414                                          this_iter_range_start,
1415                                          next_iter_range_start);
1416                 if (ret)
1417                         break;
1418
1419                 genradix_free(&links);
1420         } while (next_iter_range_start != U64_MAX);
1421
1422         genradix_free(&links);
1423
1424         return ret;
1425 }
1426
1427 noinline_for_stack
1428 static int check_inodes_fast(struct bch_fs *c)
1429 {
1430         struct btree_trans trans;
1431         struct btree_iter *iter;
1432         struct bkey_s_c k;
1433         struct bkey_s_c_inode inode;
1434         int ret = 0, ret2;
1435
1436         bch2_trans_init(&trans, c);
1437         bch2_trans_preload_iters(&trans);
1438
1439         iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
1440                                    POS_MIN, 0);
1441
1442         for_each_btree_key_continue(iter, 0, k) {
1443                 if (k.k->type != KEY_TYPE_inode)
1444                         continue;
1445
1446                 inode = bkey_s_c_to_inode(k);
1447
1448                 if (inode.v->bi_flags &
1449                     (BCH_INODE_I_SIZE_DIRTY|
1450                      BCH_INODE_I_SECTORS_DIRTY|
1451                      BCH_INODE_UNLINKED)) {
1452                         ret = check_inode(&trans, NULL, iter, inode, NULL);
1453                         BUG_ON(ret == -EINTR);
1454                         if (ret)
1455                                 break;
1456                 }
1457         }
1458
1459         ret2 = bch2_trans_exit(&trans);
1460
1461         return ret ?: ret2;
1462 }
1463
1464 /*
1465  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1466  * Doesn't fix them yet, mainly because they haven't yet been observed:
1467  */
1468 static int bch2_fsck_full(struct bch_fs *c)
1469 {
1470         struct bch_inode_unpacked root_inode, lostfound_inode;
1471         int ret;
1472
1473         bch_verbose(c, "starting fsck:");
1474         ret =   check_extents(c) ?:
1475                 check_dirents(c) ?:
1476                 check_xattrs(c) ?:
1477                 check_root(c, &root_inode) ?:
1478                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1479                 check_directory_structure(c, &lostfound_inode) ?:
1480                 check_inode_nlinks(c, &lostfound_inode);
1481
1482         bch2_flush_fsck_errs(c);
1483         bch_verbose(c, "fsck done");
1484
1485         return ret;
1486 }
1487
1488 static int bch2_fsck_inode_nlink(struct bch_fs *c)
1489 {
1490         struct bch_inode_unpacked root_inode, lostfound_inode;
1491         int ret;
1492
1493         bch_verbose(c, "checking inode link counts:");
1494         ret =   check_root(c, &root_inode) ?:
1495                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1496                 check_inode_nlinks(c, &lostfound_inode);
1497
1498         bch2_flush_fsck_errs(c);
1499         bch_verbose(c, "done");
1500
1501         return ret;
1502 }
1503
1504 static int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1505 {
1506         int ret;
1507
1508         bch_verbose(c, "walking inodes:");
1509         ret = check_inodes_fast(c);
1510
1511         bch2_flush_fsck_errs(c);
1512         bch_verbose(c, "done");
1513
1514         return ret;
1515 }
1516
1517 int bch2_fsck(struct bch_fs *c)
1518 {
1519         if (c->opts.fsck)
1520                 return bch2_fsck_full(c);
1521
1522         if (c->sb.clean)
1523                 return 0;
1524
1525         return c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)
1526                 ? bch2_fsck_walk_inodes_only(c)
1527                 : bch2_fsck_inode_nlink(c);
1528 }