]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fsck.c
Update bcachefs sources to b964c6cba8 bcachefs: Change lockrestart_do() to always...
[bcachefs-tools-debian] / libbcachefs / fsck.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "bkey_buf.h"
5 #include "btree_update.h"
6 #include "dirent.h"
7 #include "error.h"
8 #include "fs-common.h"
9 #include "fsck.h"
10 #include "inode.h"
11 #include "keylist.h"
12 #include "super.h"
13 #include "xattr.h"
14
15 #include <linux/bsearch.h>
16 #include <linux/dcache.h> /* struct qstr */
17
18 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
19
20 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
21 {
22         struct btree_iter *iter;
23         struct bkey_s_c k;
24         u64 sectors = 0;
25         int ret;
26
27         for_each_btree_key(trans, iter, BTREE_ID_extents,
28                            POS(inum, 0), 0, k, ret) {
29                 if (k.k->p.inode != inum)
30                         break;
31
32                 if (bkey_extent_is_allocation(k.k))
33                         sectors += k.k->size;
34         }
35
36         bch2_trans_iter_free(trans, iter);
37
38         return ret ?: sectors;
39 }
40
41 static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
42                           struct bch_inode_unpacked *inode,
43                           u32 *snapshot)
44 {
45         struct btree_iter *iter;
46         struct bkey_s_c k;
47         int ret;
48
49         iter = bch2_trans_get_iter(trans, BTREE_ID_inodes,
50                         POS(0, inode_nr), 0);
51         k = bch2_btree_iter_peek_slot(iter);
52         ret = bkey_err(k);
53         if (ret)
54                 goto err;
55
56         if (snapshot)
57                 *snapshot = iter->pos.snapshot;
58         ret = k.k->type == KEY_TYPE_inode
59                 ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
60                 : -ENOENT;
61 err:
62         bch2_trans_iter_free(trans, iter);
63         return ret;
64 }
65
66 static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
67                         struct bch_inode_unpacked *inode,
68                         u32 *snapshot)
69 {
70         return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
71 }
72
73 static int __write_inode(struct btree_trans *trans,
74                          struct bch_inode_unpacked *inode,
75                          u32 snapshot)
76 {
77         struct btree_iter *inode_iter =
78                 bch2_trans_get_iter(trans, BTREE_ID_inodes,
79                                     SPOS(0, inode->bi_inum, snapshot),
80                                     BTREE_ITER_INTENT);
81         int ret = bch2_btree_iter_traverse(inode_iter) ?:
82                 bch2_inode_write(trans, inode_iter, inode);
83         bch2_trans_iter_put(trans, inode_iter);
84         return ret;
85 }
86
87 static int write_inode(struct btree_trans *trans,
88                        struct bch_inode_unpacked *inode,
89                        u32 snapshot)
90 {
91         int ret = __bch2_trans_do(trans, NULL, NULL,
92                                   BTREE_INSERT_NOFAIL|
93                                   BTREE_INSERT_LAZY_RW,
94                                   __write_inode(trans, inode, snapshot));
95         if (ret)
96                 bch_err(trans->c, "error in fsck: error %i updating inode", ret);
97         return ret;
98 }
99
100 static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
101 {
102         struct bch_fs *c = trans->c;
103         struct btree_iter *iter;
104         struct bch_inode_unpacked dir_inode;
105         struct bch_hash_info dir_hash_info;
106         int ret;
107
108         ret = lookup_inode(trans, pos.inode, &dir_inode, NULL);
109         if (ret)
110                 return ret;
111
112         dir_hash_info = bch2_hash_info_init(c, &dir_inode);
113
114         iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
115
116         ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
117                                   &dir_hash_info, iter);
118         bch2_trans_iter_put(trans, iter);
119         return ret;
120 }
121
122 static int remove_dirent(struct btree_trans *trans, struct bpos pos)
123 {
124         int ret = __bch2_trans_do(trans, NULL, NULL,
125                                   BTREE_INSERT_NOFAIL|
126                                   BTREE_INSERT_LAZY_RW,
127                                   __remove_dirent(trans, pos));
128         if (ret)
129                 bch_err(trans->c, "remove_dirent: err %i deleting dirent", ret);
130         return ret;
131 }
132
133 /* Get lost+found, create if it doesn't exist: */
134 static int lookup_lostfound(struct btree_trans *trans,
135                             struct bch_inode_unpacked *lostfound)
136 {
137         struct bch_fs *c = trans->c;
138         struct bch_inode_unpacked root;
139         struct bch_hash_info root_hash_info;
140         struct qstr lostfound_str = QSTR("lost+found");
141         u64 inum;
142         u32 snapshot;
143         int ret;
144
145         ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root, &snapshot);
146         if (ret && ret != -ENOENT)
147                 return ret;
148
149         root_hash_info = bch2_hash_info_init(c, &root);
150         inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
151                                   &lostfound_str);
152         if (!inum) {
153                 bch_notice(c, "creating lost+found");
154                 goto create_lostfound;
155         }
156
157         ret = lookup_inode(trans, inum, lostfound, &snapshot);
158         if (ret && ret != -ENOENT) {
159                 /*
160                  * The check_dirents pass has already run, dangling dirents
161                  * shouldn't exist here:
162                  */
163                 bch_err(c, "error looking up lost+found: %i", ret);
164                 return ret;
165         }
166
167         if (ret == -ENOENT) {
168 create_lostfound:
169                 bch2_inode_init_early(c, lostfound);
170
171                 ret = __bch2_trans_do(trans, NULL, NULL,
172                                       BTREE_INSERT_NOFAIL|
173                                       BTREE_INSERT_LAZY_RW,
174                         bch2_create_trans(trans,
175                                           BCACHEFS_ROOT_INO, &root,
176                                           lostfound,
177                                           &lostfound_str,
178                                           0, 0, S_IFDIR|0700, 0, NULL, NULL));
179                 if (ret)
180                         bch_err(c, "error creating lost+found: %i", ret);
181         }
182
183         return 0;
184 }
185
186 static int reattach_inode(struct btree_trans *trans,
187                           struct bch_inode_unpacked *inode)
188 {
189         struct bch_hash_info dir_hash;
190         struct bch_inode_unpacked lostfound;
191         char name_buf[20];
192         struct qstr name;
193         u64 dir_offset = 0;
194         int ret;
195
196         ret = lookup_lostfound(trans, &lostfound);
197         if (ret)
198                 return ret;
199
200         if (S_ISDIR(inode->bi_mode)) {
201                 lostfound.bi_nlink++;
202
203                 ret = write_inode(trans, &lostfound, U32_MAX);
204                 if (ret)
205                         return ret;
206         }
207
208         dir_hash = bch2_hash_info_init(trans->c, &lostfound);
209
210         snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
211         name = (struct qstr) QSTR(name_buf);
212
213         ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
214                 bch2_dirent_create(trans, lostfound.bi_inum, &dir_hash,
215                                    mode_to_type(inode->bi_mode),
216                                    &name, inode->bi_inum, &dir_offset,
217                                    BCH_HASH_SET_MUST_CREATE));
218         if (ret) {
219                 bch_err(trans->c, "error %i reattaching inode %llu",
220                         ret, inode->bi_inum);
221                 return ret;
222         }
223
224         inode->bi_dir           = lostfound.bi_inum;
225         inode->bi_dir_offset    = dir_offset;
226
227         return write_inode(trans, inode, U32_MAX);
228 }
229
230 static int remove_backpointer(struct btree_trans *trans,
231                               struct bch_inode_unpacked *inode)
232 {
233         struct btree_iter *iter;
234         struct bkey_s_c k;
235         int ret;
236
237         iter = bch2_trans_get_iter(trans, BTREE_ID_dirents,
238                                    POS(inode->bi_dir, inode->bi_dir_offset), 0);
239         k = bch2_btree_iter_peek_slot(iter);
240         ret = bkey_err(k);
241         if (ret)
242                 goto out;
243         if (k.k->type != KEY_TYPE_dirent) {
244                 ret = -ENOENT;
245                 goto out;
246         }
247
248         ret = remove_dirent(trans, k.k->p);
249 out:
250         bch2_trans_iter_put(trans, iter);
251         return ret;
252 }
253
254 struct inode_walker {
255         bool                    first_this_inode;
256         bool                    have_inode;
257         u64                     cur_inum;
258         u32                     snapshot;
259         struct bch_inode_unpacked inode;
260 };
261
262 static struct inode_walker inode_walker_init(void)
263 {
264         return (struct inode_walker) {
265                 .cur_inum       = -1,
266                 .have_inode     = false,
267         };
268 }
269
270 static int __walk_inode(struct btree_trans *trans,
271                         struct inode_walker *w, u64 inum)
272 {
273         if (inum != w->cur_inum) {
274                 int ret = __lookup_inode(trans, inum, &w->inode, &w->snapshot);
275
276                 if (ret && ret != -ENOENT)
277                         return ret;
278
279                 w->have_inode   = !ret;
280                 w->cur_inum     = inum;
281                 w->first_this_inode = true;
282         } else {
283                 w->first_this_inode = false;
284         }
285
286         return 0;
287 }
288
289 static int walk_inode(struct btree_trans *trans,
290                       struct inode_walker *w, u64 inum)
291 {
292         return lockrestart_do(trans, __walk_inode(trans, w, inum));
293 }
294
295 static int hash_redo_key(struct btree_trans *trans,
296                          const struct bch_hash_desc desc,
297                          struct bch_hash_info *hash_info,
298                          struct btree_iter *k_iter, struct bkey_s_c k)
299 {
300         struct bkey_i *delete;
301         struct bkey_i *tmp;
302
303         delete = bch2_trans_kmalloc(trans, sizeof(*delete));
304         if (IS_ERR(delete))
305                 return PTR_ERR(delete);
306
307         tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
308         if (IS_ERR(tmp))
309                 return PTR_ERR(tmp);
310
311         bkey_reassemble(tmp, k);
312
313         bkey_init(&delete->k);
314         delete->k.p = k_iter->pos;
315         return  bch2_btree_iter_traverse(k_iter) ?:
316                 bch2_trans_update(trans, k_iter, delete, 0) ?:
317                 bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
318 }
319
320 static int fsck_hash_delete_at(struct btree_trans *trans,
321                                const struct bch_hash_desc desc,
322                                struct bch_hash_info *info,
323                                struct btree_iter *iter)
324 {
325         int ret;
326 retry:
327         ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
328                 bch2_trans_commit(trans, NULL, NULL,
329                                   BTREE_INSERT_NOFAIL|
330                                   BTREE_INSERT_LAZY_RW);
331         if (ret == -EINTR) {
332                 ret = bch2_btree_iter_traverse(iter);
333                 if (!ret)
334                         goto retry;
335         }
336
337         return ret;
338 }
339
340 static int hash_check_key(struct btree_trans *trans,
341                           const struct bch_hash_desc desc,
342                           struct bch_hash_info *hash_info,
343                           struct btree_iter *k_iter, struct bkey_s_c hash_k)
344 {
345         struct bch_fs *c = trans->c;
346         struct btree_iter *iter = NULL;
347         char buf[200];
348         struct bkey_s_c k;
349         u64 hash;
350         int ret = 0;
351
352         if (hash_k.k->type != desc.key_type)
353                 return 0;
354
355         hash = desc.hash_bkey(hash_info, hash_k);
356
357         if (likely(hash == hash_k.k->p.offset))
358                 return 0;
359
360         if (hash_k.k->p.offset < hash)
361                 goto bad_hash;
362
363         for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash),
364                            BTREE_ITER_SLOTS, k, ret) {
365                 if (!bkey_cmp(k.k->p, hash_k.k->p))
366                         break;
367
368                 if (fsck_err_on(k.k->type == desc.key_type &&
369                                 !desc.cmp_bkey(k, hash_k), c,
370                                 "duplicate hash table keys:\n%s",
371                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
372                                                        hash_k), buf))) {
373                         ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter);
374                         if (ret)
375                                 return ret;
376                         ret = 1;
377                         break;
378                 }
379
380                 if (bkey_deleted(k.k)) {
381                         bch2_trans_iter_free(trans, iter);
382                         goto bad_hash;
383                 }
384
385         }
386         bch2_trans_iter_free(trans, iter);
387         return ret;
388 bad_hash:
389         if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
390                      "hashed to %llu\n%s",
391                      desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash,
392                      (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
393                 return 0;
394
395         ret = __bch2_trans_do(trans, NULL, NULL,
396                               BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
397                 hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
398         if (ret) {
399                 bch_err(c, "hash_redo_key err %i", ret);
400                 return ret;
401         }
402         return -EINTR;
403 fsck_err:
404         return ret;
405 }
406
407 static int check_inode(struct btree_trans *trans,
408                        struct btree_iter *iter,
409                        struct bkey_s_c_inode inode)
410 {
411         struct bch_fs *c = trans->c;
412         struct bch_inode_unpacked u;
413         bool do_update = false;
414         int ret = 0;
415
416         ret = bch2_inode_unpack(inode, &u);
417
418         if (bch2_fs_inconsistent_on(ret, c,
419                          "error unpacking inode %llu in fsck",
420                          inode.k->p.inode))
421                 return ret;
422
423         if (u.bi_flags & BCH_INODE_UNLINKED &&
424             (!c->sb.clean ||
425              fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
426                       u.bi_inum))) {
427                 bch_verbose(c, "deleting inode %llu", u.bi_inum);
428
429                 bch2_trans_unlock(trans);
430                 bch2_fs_lazy_rw(c);
431
432                 ret = bch2_inode_rm(c, u.bi_inum, false);
433                 if (ret)
434                         bch_err(c, "error in fsck: error %i while deleting inode", ret);
435                 return ret;
436         }
437
438         if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
439             (!c->sb.clean ||
440              fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
441                       u.bi_inum))) {
442                 bch_verbose(c, "truncating inode %llu", u.bi_inum);
443
444                 bch2_trans_unlock(trans);
445                 bch2_fs_lazy_rw(c);
446
447                 /*
448                  * XXX: need to truncate partial blocks too here - or ideally
449                  * just switch units to bytes and that issue goes away
450                  */
451                 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
452                                 POS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9),
453                                 POS(u.bi_inum, U64_MAX),
454                                 NULL);
455                 if (ret) {
456                         bch_err(c, "error in fsck: error %i truncating inode", ret);
457                         return ret;
458                 }
459
460                 /*
461                  * We truncated without our normal sector accounting hook, just
462                  * make sure we recalculate it:
463                  */
464                 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
465
466                 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
467                 do_update = true;
468         }
469
470         if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
471             (!c->sb.clean ||
472              fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
473                       u.bi_inum))) {
474                 s64 sectors;
475
476                 bch_verbose(c, "recounting sectors for inode %llu",
477                             u.bi_inum);
478
479                 sectors = bch2_count_inode_sectors(trans, u.bi_inum);
480                 if (sectors < 0) {
481                         bch_err(c, "error in fsck: error %i recounting inode sectors",
482                                 (int) sectors);
483                         return sectors;
484                 }
485
486                 u.bi_sectors = sectors;
487                 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
488                 do_update = true;
489         }
490
491         if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
492                 u.bi_dir = 0;
493                 u.bi_dir_offset = 0;
494                 u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
495                 do_update = true;
496         }
497
498         if (do_update) {
499                 ret = __bch2_trans_do(trans, NULL, NULL,
500                                       BTREE_INSERT_NOFAIL|
501                                       BTREE_INSERT_LAZY_RW,
502                                 bch2_btree_iter_traverse(iter) ?:
503                                 bch2_inode_write(trans, iter, &u));
504                 if (ret)
505                         bch_err(c, "error in fsck: error %i "
506                                 "updating inode", ret);
507         }
508 fsck_err:
509         return ret;
510 }
511
512 noinline_for_stack
513 static int check_inodes(struct bch_fs *c, bool full)
514 {
515         struct btree_trans trans;
516         struct btree_iter *iter;
517         struct bkey_s_c k;
518         struct bkey_s_c_inode inode;
519         int ret;
520
521         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
522
523         for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
524                            BTREE_ITER_INTENT|
525                            BTREE_ITER_PREFETCH, k, ret) {
526                 if (k.k->type != KEY_TYPE_inode)
527                         continue;
528
529                 inode = bkey_s_c_to_inode(k);
530
531                 if (full ||
532                     (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
533                                           BCH_INODE_I_SECTORS_DIRTY|
534                                           BCH_INODE_UNLINKED))) {
535                         ret = check_inode(&trans, iter, inode);
536                         if (ret)
537                                 break;
538                 }
539         }
540         bch2_trans_iter_put(&trans, iter);
541
542         BUG_ON(ret == -EINTR);
543
544         return bch2_trans_exit(&trans) ?: ret;
545 }
546
547 static int fix_overlapping_extent(struct btree_trans *trans,
548                                        struct bkey_s_c k, struct bpos cut_at)
549 {
550         struct btree_iter *iter;
551         struct bkey_i *u;
552         int ret;
553
554         u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
555         ret = PTR_ERR_OR_ZERO(u);
556         if (ret)
557                 return ret;
558
559         bkey_reassemble(u, k);
560         bch2_cut_front(cut_at, u);
561
562
563         /*
564          * We don't want to go through the extent_handle_overwrites path:
565          *
566          * XXX: this is going to screw up disk accounting, extent triggers
567          * assume things about extent overwrites - we should be running the
568          * triggers manually here
569          */
570         iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p,
571                                    BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
572
573         BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
574         ret   = bch2_btree_iter_traverse(iter) ?:
575                 bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?:
576                 bch2_trans_commit(trans, NULL, NULL,
577                                   BTREE_INSERT_NOFAIL|
578                                   BTREE_INSERT_LAZY_RW);
579         bch2_trans_iter_put(trans, iter);
580         return ret;
581 }
582
583 static int inode_backpointer_exists(struct btree_trans *trans,
584                                     struct bch_inode_unpacked *inode)
585 {
586         struct btree_iter *iter;
587         struct bkey_s_c k;
588         int ret;
589
590         iter = bch2_trans_get_iter(trans, BTREE_ID_dirents,
591                                    POS(inode->bi_dir, inode->bi_dir_offset), 0);
592         k = bch2_btree_iter_peek_slot(iter);
593         ret = bkey_err(k);
594         if (ret)
595                 goto out;
596         if (k.k->type != KEY_TYPE_dirent)
597                 goto out;
598
599         ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum;
600 out:
601         bch2_trans_iter_free(trans, iter);
602         return ret;
603 }
604
605 static bool inode_backpointer_matches(struct bkey_s_c_dirent d,
606                                       struct bch_inode_unpacked *inode)
607 {
608         return d.k->p.inode == inode->bi_dir &&
609                 d.k->p.offset == inode->bi_dir_offset;
610 }
611
612 /*
613  * Walk extents: verify that extents have a corresponding S_ISREG inode, and
614  * that i_size an i_sectors are consistent
615  */
616 noinline_for_stack
617 static int check_extents(struct bch_fs *c)
618 {
619         struct inode_walker w = inode_walker_init();
620         struct btree_trans trans;
621         struct btree_iter *iter;
622         struct bkey_s_c k;
623         struct bkey_buf prev;
624         u64 i_sectors = 0;
625         int ret = 0;
626
627         bch2_bkey_buf_init(&prev);
628         prev.k->k = KEY(0, 0, 0);
629         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
630
631         bch_verbose(c, "checking extents");
632
633         iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
634                                    POS(BCACHEFS_ROOT_INO, 0),
635                                    BTREE_ITER_INTENT|
636                                    BTREE_ITER_PREFETCH);
637 retry:
638         while ((k = bch2_btree_iter_peek(iter)).k &&
639                !(ret = bkey_err(k))) {
640                 if (w.have_inode &&
641                     w.cur_inum != k.k->p.inode &&
642                     !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
643                     fsck_err_on(w.inode.bi_sectors != i_sectors, c,
644                                 "inode %llu has incorrect i_sectors: got %llu, should be %llu",
645                                 w.inode.bi_inum,
646                                 w.inode.bi_sectors, i_sectors)) {
647                         w.inode.bi_sectors = i_sectors;
648
649                         ret = write_inode(&trans, &w.inode, w.snapshot);
650                         if (ret)
651                                 break;
652                 }
653
654                 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
655                         char buf1[200];
656                         char buf2[200];
657
658                         bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
659                         bch2_bkey_val_to_text(&PBUF(buf2), c, k);
660
661                         if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2))
662                                 return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR;
663                 }
664
665                 ret = walk_inode(&trans, &w, k.k->p.inode);
666                 if (ret)
667                         break;
668
669                 if (w.first_this_inode)
670                         i_sectors = 0;
671
672                 if (fsck_err_on(!w.have_inode, c,
673                                 "extent type %u for missing inode %llu",
674                                 k.k->type, k.k->p.inode) ||
675                     fsck_err_on(w.have_inode &&
676                                 !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
677                                 "extent type %u for non regular file, inode %llu mode %o",
678                                 k.k->type, k.k->p.inode, w.inode.bi_mode)) {
679                         bch2_fs_lazy_rw(c);
680                         return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
681                                                        POS(k.k->p.inode, 0),
682                                                        POS(k.k->p.inode, U64_MAX),
683                                                        NULL) ?: -EINTR;
684                 }
685
686                 if (fsck_err_on(w.have_inode &&
687                                 !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
688                                 k.k->type != KEY_TYPE_reservation &&
689                                 k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
690                                 "extent type %u offset %llu past end of inode %llu, i_size %llu",
691                                 k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
692                         bch2_fs_lazy_rw(c);
693                         return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
694                                         POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c)) >> 9),
695                                         POS(k.k->p.inode, U64_MAX),
696                                         NULL) ?: -EINTR;
697                 }
698
699                 if (bkey_extent_is_allocation(k.k))
700                         i_sectors += k.k->size;
701                 bch2_bkey_buf_reassemble(&prev, c, k);
702
703                 bch2_btree_iter_advance(iter);
704         }
705 fsck_err:
706         if (ret == -EINTR)
707                 goto retry;
708         bch2_trans_iter_put(&trans, iter);
709         bch2_bkey_buf_exit(&prev, c);
710         return bch2_trans_exit(&trans) ?: ret;
711 }
712
713 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
714                         struct bch_hash_info *hash_info,
715                         struct inode_walker *w, unsigned *nr_subdirs)
716 {
717         struct bch_fs *c = trans->c;
718         struct bkey_s_c k;
719         struct bkey_s_c_dirent d;
720         struct bch_inode_unpacked target;
721         u32 target_snapshot;
722         bool have_target;
723         bool backpointer_exists = true;
724         u64 d_inum;
725         char buf[200];
726         int ret;
727
728         k = bch2_btree_iter_peek(iter);
729         if (!k.k)
730                 return 0;
731
732         ret = bkey_err(k);
733         if (ret)
734                 return ret;
735
736         if (w->have_inode &&
737             w->cur_inum != k.k->p.inode &&
738             fsck_err_on(w->inode.bi_nlink != *nr_subdirs, c,
739                         "directory %llu with wrong i_nlink: got %u, should be %u",
740                         w->inode.bi_inum, w->inode.bi_nlink, *nr_subdirs)) {
741                 w->inode.bi_nlink = *nr_subdirs;
742                 ret = write_inode(trans, &w->inode, w->snapshot);
743                 return ret ?: -EINTR;
744         }
745
746         ret = __walk_inode(trans, w, k.k->p.inode);
747         if (ret)
748                 return ret;
749
750         if (w->first_this_inode)
751                 *nr_subdirs = 0;
752
753         if (fsck_err_on(!w->have_inode, c,
754                         "dirent in nonexisting directory:\n%s",
755                         (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)) ||
756             fsck_err_on(!S_ISDIR(w->inode.bi_mode), c,
757                         "dirent in non directory inode type %u:\n%s",
758                         mode_to_type(w->inode.bi_mode),
759                         (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
760                 return __bch2_trans_do(trans, NULL, NULL, 0,
761                                 bch2_btree_delete_at(trans, iter, 0));
762
763         if (!w->have_inode)
764                 return 0;
765
766         if (w->first_this_inode)
767                 *hash_info = bch2_hash_info_init(c, &w->inode);
768
769         ret = hash_check_key(trans, bch2_dirent_hash_desc,
770                              hash_info, iter, k);
771         if (ret < 0)
772                 return ret;
773         if (ret) /* dirent has been deleted */
774                 return 0;
775
776         if (k.k->type != KEY_TYPE_dirent)
777                 return 0;
778
779         d = bkey_s_c_to_dirent(k);
780         d_inum = le64_to_cpu(d.v->d_inum);
781
782         ret = __lookup_inode(trans, d_inum, &target, &target_snapshot);
783         if (ret && ret != -ENOENT)
784                 return ret;
785
786         have_target = !ret;
787         ret = 0;
788
789         if (fsck_err_on(!have_target, c,
790                         "dirent points to missing inode:\n%s",
791                         (bch2_bkey_val_to_text(&PBUF(buf), c,
792                                                k), buf)))
793                 return remove_dirent(trans, d.k->p);
794
795         if (!have_target)
796                 return 0;
797
798         if (!target.bi_dir &&
799             !target.bi_dir_offset) {
800                 target.bi_dir           = k.k->p.inode;
801                 target.bi_dir_offset    = k.k->p.offset;
802
803                 ret = __write_inode(trans, &target, target_snapshot) ?:
804                         bch2_trans_commit(trans, NULL, NULL,
805                                           BTREE_INSERT_NOFAIL|
806                                           BTREE_INSERT_LAZY_RW);
807                 if (ret)
808                         return ret;
809                 return -EINTR;
810         }
811
812         if (!inode_backpointer_matches(d, &target)) {
813                 ret = inode_backpointer_exists(trans, &target);
814                 if (ret < 0)
815                         return ret;
816
817                 backpointer_exists = ret;
818                 ret = 0;
819
820                 if (fsck_err_on(S_ISDIR(target.bi_mode) &&
821                                 backpointer_exists, c,
822                                 "directory %llu with multiple links",
823                                 target.bi_inum))
824                         return remove_dirent(trans, d.k->p);
825
826                 if (fsck_err_on(backpointer_exists &&
827                                 !target.bi_nlink, c,
828                                 "inode %llu has multiple links but i_nlink 0",
829                                 d_inum)) {
830                         target.bi_nlink++;
831                         target.bi_flags &= ~BCH_INODE_UNLINKED;
832
833                         ret = write_inode(trans, &target, target_snapshot);
834                         return ret ?: -EINTR;
835                 }
836
837                 if (fsck_err_on(!backpointer_exists, c,
838                                 "inode %llu has wrong backpointer:\n"
839                                 "got       %llu:%llu\n"
840                                 "should be %llu:%llu",
841                                 d_inum,
842                                 target.bi_dir,
843                                 target.bi_dir_offset,
844                                 k.k->p.inode,
845                                 k.k->p.offset)) {
846                         target.bi_dir           = k.k->p.inode;
847                         target.bi_dir_offset    = k.k->p.offset;
848
849                         ret = write_inode(trans, &target, target_snapshot);
850                         return ret ?: -EINTR;
851                 }
852         }
853
854         if (fsck_err_on(d.v->d_type != mode_to_type(target.bi_mode), c,
855                         "incorrect d_type: should be %u:\n%s",
856                         mode_to_type(target.bi_mode),
857                         (bch2_bkey_val_to_text(&PBUF(buf), c,
858                                                k), buf))) {
859                 struct bkey_i_dirent *n;
860
861                 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
862                 if (!n)
863                         return -ENOMEM;
864
865                 bkey_reassemble(&n->k_i, d.s_c);
866                 n->v.d_type = mode_to_type(target.bi_mode);
867
868                 ret = __bch2_trans_do(trans, NULL, NULL,
869                                       BTREE_INSERT_NOFAIL|
870                                       BTREE_INSERT_LAZY_RW,
871                         bch2_btree_iter_traverse(iter) ?:
872                         bch2_trans_update(trans, iter, &n->k_i, 0));
873                 kfree(n);
874                 return ret ?: -EINTR;
875         }
876
877         *nr_subdirs += d.v->d_type == DT_DIR;
878         return 0;
879 fsck_err:
880         return ret;
881 }
882
883 /*
884  * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
885  * validate d_type
886  */
887 noinline_for_stack
888 static int check_dirents(struct bch_fs *c)
889 {
890         struct inode_walker w = inode_walker_init();
891         struct bch_hash_info hash_info;
892         struct btree_trans trans;
893         struct btree_iter *iter;
894         unsigned nr_subdirs = 0;
895         int ret = 0;
896
897         bch_verbose(c, "checking dirents");
898
899         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
900
901         iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
902                                    POS(BCACHEFS_ROOT_INO, 0),
903                                    BTREE_ITER_INTENT|
904                                    BTREE_ITER_PREFETCH);
905
906         do {
907                 ret = lockrestart_do(&trans,
908                                 check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs));
909                 if (ret)
910                         break;
911         } while (bch2_btree_iter_advance(iter));
912         bch2_trans_iter_put(&trans, iter);
913
914         return bch2_trans_exit(&trans) ?: ret;
915 }
916
917 /*
918  * Walk xattrs: verify that they all have a corresponding inode
919  */
920 noinline_for_stack
921 static int check_xattrs(struct bch_fs *c)
922 {
923         struct inode_walker w = inode_walker_init();
924         struct bch_hash_info hash_info;
925         struct btree_trans trans;
926         struct btree_iter *iter;
927         struct bkey_s_c k;
928         int ret = 0;
929
930         bch_verbose(c, "checking xattrs");
931
932         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
933
934         iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs,
935                                    POS(BCACHEFS_ROOT_INO, 0),
936                                    BTREE_ITER_INTENT|
937                                    BTREE_ITER_PREFETCH);
938 retry:
939         while ((k = bch2_btree_iter_peek(iter)).k &&
940                !(ret = bkey_err(k))) {
941                 ret = walk_inode(&trans, &w, k.k->p.inode);
942                 if (ret)
943                         break;
944
945                 if (fsck_err_on(!w.have_inode, c,
946                                 "xattr for missing inode %llu",
947                                 k.k->p.inode)) {
948                         ret = bch2_btree_delete_at(&trans, iter, 0);
949                         if (ret)
950                                 break;
951                         continue;
952                 }
953
954                 if (w.first_this_inode && w.have_inode)
955                         hash_info = bch2_hash_info_init(c, &w.inode);
956
957                 ret = hash_check_key(&trans, bch2_xattr_hash_desc,
958                                      &hash_info, iter, k);
959                 if (ret)
960                         break;
961
962                 bch2_btree_iter_advance(iter);
963         }
964 fsck_err:
965         if (ret == -EINTR)
966                 goto retry;
967
968         bch2_trans_iter_put(&trans, iter);
969         return bch2_trans_exit(&trans) ?: ret;
970 }
971
972 /* Get root directory, create if it doesn't exist: */
973 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
974 {
975         struct bkey_inode_buf packed;
976         u32 snapshot;
977         int ret;
978
979         bch_verbose(c, "checking root directory");
980
981         ret = bch2_trans_do(c, NULL, NULL, 0,
982                 lookup_inode(&trans, BCACHEFS_ROOT_INO, root_inode, &snapshot));
983         if (ret && ret != -ENOENT)
984                 return ret;
985
986         if (fsck_err_on(ret, c, "root directory missing"))
987                 goto create_root;
988
989         if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
990                         "root inode not a directory"))
991                 goto create_root;
992
993         return 0;
994 fsck_err:
995         return ret;
996 create_root:
997         bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
998                         0, NULL);
999         root_inode->bi_inum = BCACHEFS_ROOT_INO;
1000
1001         bch2_inode_pack(c, &packed, root_inode);
1002
1003         return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
1004                                  NULL, NULL,
1005                                  BTREE_INSERT_NOFAIL|
1006                                  BTREE_INSERT_LAZY_RW);
1007 }
1008
1009 struct pathbuf {
1010         size_t          nr;
1011         size_t          size;
1012
1013         struct pathbuf_entry {
1014                 u64     inum;
1015         }               *entries;
1016 };
1017
1018 static int path_down(struct pathbuf *p, u64 inum)
1019 {
1020         if (p->nr == p->size) {
1021                 size_t new_size = max_t(size_t, 256UL, p->size * 2);
1022                 void *n = krealloc(p->entries,
1023                                    new_size * sizeof(p->entries[0]),
1024                                    GFP_KERNEL);
1025                 if (!n) {
1026                         return -ENOMEM;
1027                 }
1028
1029                 p->entries = n;
1030                 p->size = new_size;
1031         };
1032
1033         p->entries[p->nr++] = (struct pathbuf_entry) {
1034                 .inum = inum,
1035         };
1036         return 0;
1037 }
1038
1039 static int check_path(struct btree_trans *trans,
1040                       struct pathbuf *p,
1041                       struct bch_inode_unpacked *inode)
1042 {
1043         struct bch_fs *c = trans->c;
1044         u32 snapshot;
1045         size_t i;
1046         int ret = 0;
1047
1048         p->nr = 0;
1049
1050         while (inode->bi_inum != BCACHEFS_ROOT_INO) {
1051                 ret = lockrestart_do(trans,
1052                         inode_backpointer_exists(trans, inode));
1053                 if (ret < 0)
1054                         break;
1055
1056                 if (!ret) {
1057                         if (fsck_err(c,  "unreachable inode %llu, type %u nlink %u backptr %llu:%llu",
1058                                      inode->bi_inum,
1059                                      mode_to_type(inode->bi_mode),
1060                                      inode->bi_nlink,
1061                                      inode->bi_dir,
1062                                      inode->bi_dir_offset))
1063                                 ret = reattach_inode(trans, inode);
1064                         break;
1065                 }
1066                 ret = 0;
1067
1068                 if (!S_ISDIR(inode->bi_mode))
1069                         break;
1070
1071                 ret = path_down(p, inode->bi_inum);
1072                 if (ret) {
1073                         bch_err(c, "memory allocation failure");
1074                         return ret;
1075                 }
1076
1077                 for (i = 0; i < p->nr; i++) {
1078                         if (inode->bi_dir != p->entries[i].inum)
1079                                 continue;
1080
1081                         /* XXX print path */
1082                         if (!fsck_err(c, "directory structure loop"))
1083                                 return 0;
1084
1085                         ret = lockrestart_do(trans,
1086                                          remove_backpointer(trans, inode));
1087                         if (ret) {
1088                                 bch_err(c, "error removing dirent: %i", ret);
1089                                 break;
1090                         }
1091
1092                         ret = reattach_inode(trans, inode);
1093                         break;
1094                 }
1095
1096                 ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
1097                 if (ret) {
1098                         /* Should have been caught in dirents pass */
1099                         bch_err(c, "error looking up parent directory: %i", ret);
1100                         break;
1101                 }
1102         }
1103 fsck_err:
1104         if (ret)
1105                 bch_err(c, "%s: err %i", __func__, ret);
1106         return ret;
1107 }
1108
1109 /*
1110  * Check for unreachable inodes, as well as loops in the directory structure:
1111  * After check_dirents(), if an inode backpointer doesn't exist that means it's
1112  * unreachable:
1113  */
1114 static int check_directory_structure(struct bch_fs *c)
1115 {
1116         struct btree_trans trans;
1117         struct btree_iter *iter;
1118         struct bkey_s_c k;
1119         struct bch_inode_unpacked u;
1120         struct pathbuf path = { 0, 0, NULL };
1121         int ret;
1122
1123         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1124
1125         for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
1126                            BTREE_ITER_INTENT|
1127                            BTREE_ITER_PREFETCH, k, ret) {
1128                 if (k.k->type != KEY_TYPE_inode)
1129                         continue;
1130
1131                 ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
1132                 if (ret) {
1133                         /* Should have been caught earlier in fsck: */
1134                         bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
1135                         break;
1136                 }
1137
1138                 ret = check_path(&trans, &path, &u);
1139                 if (ret)
1140                         break;
1141         }
1142         bch2_trans_iter_put(&trans, iter);
1143
1144         BUG_ON(ret == -EINTR);
1145
1146         kfree(path.entries);
1147
1148         return bch2_trans_exit(&trans) ?: ret;
1149 }
1150
1151 struct nlink_table {
1152         size_t          nr;
1153         size_t          size;
1154
1155         struct nlink {
1156                 u64     inum;
1157                 u32     snapshot;
1158                 u32     count;
1159         }               *d;
1160 };
1161
1162 static int add_nlink(struct nlink_table *t, u64 inum, u32 snapshot)
1163 {
1164         if (t->nr == t->size) {
1165                 size_t new_size = max_t(size_t, 128UL, t->size * 2);
1166                 void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
1167                 if (!d) {
1168                         return -ENOMEM;
1169                 }
1170
1171                 if (t->d)
1172                         memcpy(d, t->d, t->size * sizeof(t->d[0]));
1173                 kvfree(t->d);
1174
1175                 t->d = d;
1176                 t->size = new_size;
1177         }
1178
1179
1180         t->d[t->nr++] = (struct nlink) {
1181                 .inum           = inum,
1182                 .snapshot       = snapshot,
1183         };
1184
1185         return 0;
1186 }
1187
1188 static int nlink_cmp(const void *_l, const void *_r)
1189 {
1190         const struct nlink *l = _l;
1191         const struct nlink *r = _r;
1192
1193         return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
1194 }
1195
1196 static void inc_link(struct bch_fs *c, struct nlink_table *links,
1197                      u64 range_start, u64 range_end, u64 inum)
1198 {
1199         struct nlink *link, key = {
1200                 .inum = inum, .snapshot = U32_MAX,
1201         };
1202
1203         if (inum < range_start || inum >= range_end)
1204                 return;
1205
1206         link = __inline_bsearch(&key, links->d, links->nr,
1207                                 sizeof(links->d[0]), nlink_cmp);
1208         if (link)
1209                 link->count++;
1210 }
1211
1212 noinline_for_stack
1213 static int check_nlinks_find_hardlinks(struct bch_fs *c,
1214                                        struct nlink_table *t,
1215                                        u64 start, u64 *end)
1216 {
1217         struct btree_trans trans;
1218         struct btree_iter *iter;
1219         struct bkey_s_c k;
1220         struct bkey_s_c_inode inode;
1221         struct bch_inode_unpacked u;
1222         int ret = 0;
1223
1224         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1225
1226         for_each_btree_key(&trans, iter, BTREE_ID_inodes,
1227                            POS(0, start),
1228                            BTREE_ITER_INTENT|
1229                            BTREE_ITER_PREFETCH, k, ret) {
1230                 if (k.k->type != KEY_TYPE_inode)
1231                         continue;
1232
1233                 inode = bkey_s_c_to_inode(k);
1234
1235                 /*
1236                  * Backpointer and directory structure checks are sufficient for
1237                  * directories, since they can't have hardlinks:
1238                  */
1239                 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
1240                         continue;
1241
1242                 /* Should never fail, checked by bch2_inode_invalid: */
1243                 BUG_ON(bch2_inode_unpack(inode, &u));
1244
1245                 if (!u.bi_nlink)
1246                         continue;
1247
1248                 ret = add_nlink(t, k.k->p.offset, k.k->p.snapshot);
1249                 if (ret) {
1250                         *end = k.k->p.offset;
1251                         ret = 0;
1252                         break;
1253                 }
1254
1255         }
1256         bch2_trans_iter_put(&trans, iter);
1257         bch2_trans_exit(&trans);
1258
1259         if (ret)
1260                 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1261
1262         return ret;
1263 }
1264
1265 noinline_for_stack
1266 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
1267                                      u64 range_start, u64 range_end)
1268 {
1269         struct btree_trans trans;
1270         struct btree_iter *iter;
1271         struct bkey_s_c k;
1272         struct bkey_s_c_dirent d;
1273         int ret;
1274
1275         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1276
1277         for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
1278                            BTREE_ITER_INTENT|
1279                            BTREE_ITER_PREFETCH, k, ret) {
1280                 switch (k.k->type) {
1281                 case KEY_TYPE_dirent:
1282                         d = bkey_s_c_to_dirent(k);
1283
1284                         if (d.v->d_type != DT_DIR)
1285                                 inc_link(c, links, range_start, range_end,
1286                                          le64_to_cpu(d.v->d_inum));
1287                         break;
1288                 }
1289
1290                 bch2_trans_cond_resched(&trans);
1291         }
1292         bch2_trans_iter_put(&trans, iter);
1293
1294         ret = bch2_trans_exit(&trans) ?: ret;
1295         if (ret)
1296                 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1297
1298         return ret;
1299 }
1300
1301 noinline_for_stack
1302 static int check_nlinks_update_hardlinks(struct bch_fs *c,
1303                                struct nlink_table *links,
1304                                u64 range_start, u64 range_end)
1305 {
1306         struct btree_trans trans;
1307         struct btree_iter *iter;
1308         struct bkey_s_c k;
1309         struct bkey_s_c_inode inode;
1310         struct bch_inode_unpacked u;
1311         struct nlink *link = links->d;
1312         int ret = 0;
1313
1314         bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1315
1316         for_each_btree_key(&trans, iter, BTREE_ID_inodes,
1317                            POS(0, range_start),
1318                            BTREE_ITER_INTENT|
1319                            BTREE_ITER_PREFETCH, k, ret) {
1320                 if (k.k->p.offset >= range_end)
1321                         break;
1322
1323                 if (k.k->type != KEY_TYPE_inode)
1324                         continue;
1325
1326                 inode = bkey_s_c_to_inode(k);
1327                 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
1328                         continue;
1329
1330                 BUG_ON(bch2_inode_unpack(inode, &u));
1331
1332                 if (!u.bi_nlink)
1333                         continue;
1334
1335                 while (link->inum < k.k->p.offset) {
1336                         link++;
1337                         BUG_ON(link >= links->d + links->nr);
1338                 }
1339
1340                 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
1341                                 "inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)",
1342                                 u.bi_inum, mode_to_type(u.bi_mode),
1343                                 bch2_inode_nlink_get(&u), link->count)) {
1344                         bch2_inode_nlink_set(&u, link->count);
1345
1346                         ret = __bch2_trans_do(&trans, NULL, NULL,
1347                                               BTREE_INSERT_NOFAIL|
1348                                               BTREE_INSERT_LAZY_RW,
1349                                               bch2_btree_iter_traverse(iter) ?:
1350                                         bch2_inode_write(&trans, iter, &u));
1351                         if (ret)
1352                                 bch_err(c, "error in fsck: error %i updating inode", ret);
1353                 }
1354         }
1355 fsck_err:
1356         bch2_trans_iter_put(&trans, iter);
1357         bch2_trans_exit(&trans);
1358
1359         if (ret)
1360                 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1361
1362         return ret;
1363 }
1364
1365 noinline_for_stack
1366 static int check_nlinks(struct bch_fs *c)
1367 {
1368         struct nlink_table links = { 0 };
1369         u64 this_iter_range_start, next_iter_range_start = 0;
1370         int ret = 0;
1371
1372         bch_verbose(c, "checking inode nlinks");
1373
1374         do {
1375                 this_iter_range_start = next_iter_range_start;
1376                 next_iter_range_start = U64_MAX;
1377
1378                 ret = check_nlinks_find_hardlinks(c, &links,
1379                                                   this_iter_range_start,
1380                                                   &next_iter_range_start);
1381
1382                 ret = check_nlinks_walk_dirents(c, &links,
1383                                           this_iter_range_start,
1384                                           next_iter_range_start);
1385                 if (ret)
1386                         break;
1387
1388                 ret = check_nlinks_update_hardlinks(c, &links,
1389                                          this_iter_range_start,
1390                                          next_iter_range_start);
1391                 if (ret)
1392                         break;
1393
1394                 links.nr = 0;
1395         } while (next_iter_range_start != U64_MAX);
1396
1397         kvfree(links.d);
1398
1399         return ret;
1400 }
1401
1402 /*
1403  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1404  * Doesn't fix them yet, mainly because they haven't yet been observed:
1405  */
1406 int bch2_fsck_full(struct bch_fs *c)
1407 {
1408         struct bch_inode_unpacked root_inode;
1409
1410         return  check_inodes(c, true) ?:
1411                 check_extents(c) ?:
1412                 check_dirents(c) ?:
1413                 check_xattrs(c) ?:
1414                 check_root(c, &root_inode) ?:
1415                 check_directory_structure(c) ?:
1416                 check_nlinks(c);
1417 }
1418
1419 int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1420 {
1421         return check_inodes(c, false);
1422 }