]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fsck.c
Update bcachefs sources to d7dbddc450 bcachefs: revamp to_text methods
[bcachefs-tools-debian] / libbcachefs / fsck.c
1
2 #include "bcachefs.h"
3 #include "btree_update.h"
4 #include "dirent.h"
5 #include "error.h"
6 #include "fs.h"
7 #include "fsck.h"
8 #include "inode.h"
9 #include "keylist.h"
10 #include "super.h"
11 #include "xattr.h"
12
13 #include <linux/dcache.h> /* struct qstr */
14 #include <linux/generic-radix-tree.h>
15
16 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
17
18 static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
19                          struct bkey_s_c_dirent dirent)
20 {
21         struct qstr name;
22         struct bch_inode_unpacked dir_inode;
23         struct bch_hash_info dir_hash_info;
24         u64 dir_inum = dirent.k->p.inode;
25         int ret;
26         char *buf;
27
28         name.len = bch2_dirent_name_bytes(dirent);
29         buf = kmalloc(name.len + 1, GFP_KERNEL);
30         if (!buf)
31                 return -ENOMEM;
32
33         memcpy(buf, dirent.v->d_name, name.len);
34         buf[name.len] = '\0';
35         name.name = buf;
36
37         /* Unlock iter so we don't deadlock, after copying name: */
38         bch2_btree_iter_unlock(iter);
39
40         ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
41         if (ret) {
42                 bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
43                 goto err;
44         }
45
46         dir_hash_info = bch2_hash_info_init(c, &dir_inode);
47
48         ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
49         if (ret)
50                 bch_err(c, "remove_dirent: err %i deleting dirent", ret);
51 err:
52         kfree(buf);
53         return ret;
54 }
55
56 static int reattach_inode(struct bch_fs *c,
57                           struct bch_inode_unpacked *lostfound_inode,
58                           u64 inum)
59 {
60         struct bch_hash_info lostfound_hash_info =
61                 bch2_hash_info_init(c, lostfound_inode);
62         struct bkey_inode_buf packed;
63         char name_buf[20];
64         struct qstr name;
65         int ret;
66
67         snprintf(name_buf, sizeof(name_buf), "%llu", inum);
68         name = (struct qstr) QSTR(name_buf);
69
70         lostfound_inode->bi_nlink++;
71
72         bch2_inode_pack(&packed, lostfound_inode);
73
74         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
75                                 NULL, NULL, BTREE_INSERT_NOFAIL);
76         if (ret) {
77                 bch_err(c, "error %i reattaching inode %llu while updating lost+found",
78                         ret, inum);
79                 return ret;
80         }
81
82         ret = bch2_dirent_create(c, lostfound_inode->bi_inum,
83                                  &lostfound_hash_info,
84                                  DT_DIR, &name, inum, NULL,
85                                  BTREE_INSERT_NOFAIL);
86         if (ret) {
87                 bch_err(c, "error %i reattaching inode %llu while creating new dirent",
88                         ret, inum);
89                 return ret;
90         }
91         return ret;
92 }
93
94 struct inode_walker {
95         bool                    first_this_inode;
96         bool                    have_inode;
97         u64                     cur_inum;
98         struct bch_inode_unpacked inode;
99 };
100
101 static struct inode_walker inode_walker_init(void)
102 {
103         return (struct inode_walker) {
104                 .cur_inum       = -1,
105                 .have_inode     = false,
106         };
107 }
108
109 static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
110 {
111         w->first_this_inode     = inum != w->cur_inum;
112         w->cur_inum             = inum;
113
114         if (w->first_this_inode) {
115                 int ret = bch2_inode_find_by_inum(c, inum, &w->inode);
116
117                 if (ret && ret != -ENOENT)
118                         return ret;
119
120                 w->have_inode = !ret;
121         }
122
123         return 0;
124 }
125
126 struct hash_check {
127         struct bch_hash_info    info;
128         struct btree_trans      *trans;
129
130         /* start of current chain of hash collisions: */
131         struct btree_iter       *chain;
132
133         /* next offset in current chain of hash collisions: */
134         u64                     next;
135 };
136
137 static void hash_check_init(const struct bch_hash_desc desc,
138                             struct btree_trans *trans,
139                             struct hash_check *h)
140 {
141         h->trans = trans;
142         h->chain = bch2_trans_get_iter(trans, desc.btree_id, POS_MIN, 0);
143         h->next = -1;
144 }
145
146 static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
147                                  const struct bch_inode_unpacked *bi)
148 {
149         h->info = bch2_hash_info_init(c, bi);
150         h->next = -1;
151 }
152
153 static int hash_redo_key(const struct bch_hash_desc desc,
154                          struct hash_check *h, struct bch_fs *c,
155                          struct btree_iter *k_iter, struct bkey_s_c k,
156                          u64 hashed)
157 {
158         struct bkey_i *tmp;
159         int ret = 0;
160
161         tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
162         if (!tmp)
163                 return -ENOMEM;
164
165         bkey_reassemble(tmp, k);
166
167         ret = bch2_btree_delete_at(k_iter, 0);
168         if (ret)
169                 goto err;
170
171         bch2_btree_iter_unlock(k_iter);
172
173         bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL, tmp,
174                       BTREE_INSERT_NOFAIL|
175                       BCH_HASH_SET_MUST_CREATE);
176 err:
177         kfree(tmp);
178         return ret;
179 }
180
181 /* fsck hasn't been converted to new transactions yet: */
182 static int fsck_hash_delete_at(const struct bch_hash_desc desc,
183                                struct bch_hash_info *info,
184                                struct btree_iter *orig_iter)
185 {
186         struct btree_trans trans;
187         struct btree_iter *iter;
188         int ret;
189
190         bch2_btree_iter_unlock(orig_iter);
191
192         bch2_trans_init(&trans, orig_iter->c);
193 retry:
194         bch2_trans_begin(&trans);
195
196         iter = bch2_trans_copy_iter(&trans, orig_iter);
197         if (IS_ERR(iter)) {
198                 ret = PTR_ERR(iter);
199                 goto err;
200         }
201
202         ret   = bch2_hash_delete_at(&trans, desc, info, iter) ?:
203                 bch2_trans_commit(&trans, NULL, NULL,
204                                   BTREE_INSERT_ATOMIC|
205                                   BTREE_INSERT_NOFAIL);
206 err:
207         if (ret == -EINTR)
208                 goto retry;
209
210         bch2_trans_exit(&trans);
211         return ret;
212 }
213
214 static int hash_check_duplicates(const struct bch_hash_desc desc,
215                                  struct hash_check *h, struct bch_fs *c,
216                                  struct btree_iter *k_iter, struct bkey_s_c k)
217 {
218         struct btree_iter *iter;
219         struct bkey_s_c k2;
220         char buf[200];
221         int ret = 0;
222
223         if (!bkey_cmp(h->chain->pos, k_iter->pos))
224                 return 0;
225
226         iter = bch2_trans_copy_iter(h->trans, h->chain);
227         BUG_ON(IS_ERR(iter));
228
229         for_each_btree_key_continue(iter, 0, k2) {
230                 if (bkey_cmp(k2.k->p, k.k->p) >= 0)
231                         break;
232
233                 if (fsck_err_on(k2.k->type == desc.key_type &&
234                                 !desc.cmp_bkey(k, k2), c,
235                                 "duplicate hash table keys:\n%s",
236                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
237                                                        bkey_type(0, desc.btree_id),
238                                                        k), buf))) {
239                         ret = fsck_hash_delete_at(desc, &h->info, k_iter);
240                         if (ret)
241                                 return ret;
242                         ret = 1;
243                         break;
244                 }
245         }
246 fsck_err:
247         bch2_trans_iter_free(h->trans, iter);
248         return ret;
249 }
250
251 static bool key_has_correct_hash(const struct bch_hash_desc desc,
252                                  struct hash_check *h, struct bch_fs *c,
253                                  struct btree_iter *k_iter, struct bkey_s_c k)
254 {
255         u64 hash;
256
257         if (k.k->type != desc.whiteout_type &&
258             k.k->type != desc.key_type)
259                 return true;
260
261         if (k.k->p.offset != h->next)
262                 bch2_btree_iter_copy(h->chain, k_iter);
263         h->next = k.k->p.offset + 1;
264
265         if (k.k->type != desc.key_type)
266                 return true;
267
268         hash = desc.hash_bkey(&h->info, k);
269
270         return hash >= h->chain->pos.offset &&
271                 hash <= k.k->p.offset;
272 }
273
274 static int hash_check_key(const struct bch_hash_desc desc,
275                           struct hash_check *h, struct bch_fs *c,
276                           struct btree_iter *k_iter, struct bkey_s_c k)
277 {
278         char buf[200];
279         u64 hashed;
280         int ret = 0;
281
282         if (k.k->type != desc.whiteout_type &&
283             k.k->type != desc.key_type)
284                 return 0;
285
286         if (k.k->p.offset != h->next)
287                 bch2_btree_iter_copy(h->chain, k_iter);
288         h->next = k.k->p.offset + 1;
289
290         if (k.k->type != desc.key_type)
291                 return 0;
292
293         hashed = desc.hash_bkey(&h->info, k);
294
295         if (fsck_err_on(hashed < h->chain->pos.offset ||
296                         hashed > k.k->p.offset, c,
297                         "hash table key at wrong offset: btree %u, %llu, "
298                         "hashed to %llu chain starts at %llu\n%s",
299                         desc.btree_id, k.k->p.offset,
300                         hashed, h->chain->pos.offset,
301                         (bch2_bkey_val_to_text(&PBUF(buf), c,
302                                                bkey_type(0, desc.btree_id),
303                                                k), buf))) {
304                 ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
305                 if (ret) {
306                         bch_err(c, "hash_redo_key err %i", ret);
307                         return ret;
308                 }
309                 return 1;
310         }
311
312         ret = hash_check_duplicates(desc, h, c, k_iter, k);
313 fsck_err:
314         return ret;
315 }
316
317 static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
318                              struct btree_iter *iter, struct bkey_s_c *k)
319 {
320         struct bkey_i_dirent *d = NULL;
321         int ret = -EINVAL;
322         char buf[200];
323         unsigned len;
324         u64 hash;
325
326         if (key_has_correct_hash(bch2_dirent_hash_desc, h, c, iter, *k))
327                 return 0;
328
329         len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
330         BUG_ON(!len);
331
332         memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
333         buf[len] = '\0';
334
335         d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
336         if (!d) {
337                 bch_err(c, "memory allocation failure");
338                 return -ENOMEM;
339         }
340
341         bkey_reassemble(&d->k_i, *k);
342
343         do {
344                 --len;
345                 if (!len)
346                         goto err_redo;
347
348                 d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
349
350                 BUG_ON(bkey_val_bytes(&d->k) <
351                        offsetof(struct bch_dirent, d_name) + len);
352
353                 memset(d->v.d_name + len, 0,
354                        bkey_val_bytes(&d->k) -
355                        offsetof(struct bch_dirent, d_name) - len);
356
357                 hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
358                                                 bkey_i_to_s_c(&d->k_i));
359         } while (hash < h->chain->pos.offset ||
360                  hash > k->k->p.offset);
361
362         if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
363                      buf, strlen(buf), d->v.d_name, len)) {
364                 ret = bch2_btree_insert_at(c, NULL, NULL,
365                                            BTREE_INSERT_NOFAIL,
366                                            BTREE_INSERT_ENTRY(iter, &d->k_i));
367                 if (ret)
368                         goto err;
369
370                 *k = bch2_btree_iter_peek(iter);
371
372                 BUG_ON(k->k->type != BCH_DIRENT);
373         }
374 err:
375 fsck_err:
376         kfree(d);
377         return ret;
378 err_redo:
379         hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
380
381         if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
382                      "hash table key at wrong offset: btree %u, offset %llu, "
383                      "hashed to %llu chain starts at %llu\n%s",
384                      buf, strlen(buf), BTREE_ID_DIRENTS,
385                      k->k->p.offset, hash, h->chain->pos.offset,
386                      (bch2_bkey_val_to_text(&PBUF(buf), c,
387                                             bkey_type(0, BTREE_ID_DIRENTS),
388                                             *k), buf))) {
389                 ret = hash_redo_key(bch2_dirent_hash_desc,
390                                     h, c, iter, *k, hash);
391                 if (ret)
392                         bch_err(c, "hash_redo_key err %i", ret);
393                 else
394                         ret = 1;
395         }
396
397         goto err;
398 }
399
400 static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
401 {
402         return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
403                         POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
404                         POS(inode_nr + 1, 0), NULL);
405 }
406
407 /*
408  * Walk extents: verify that extents have a corresponding S_ISREG inode, and
409  * that i_size an i_sectors are consistent
410  */
411 noinline_for_stack
412 static int check_extents(struct bch_fs *c)
413 {
414         struct inode_walker w = inode_walker_init();
415         struct btree_iter iter;
416         struct bkey_s_c k;
417         u64 i_sectors;
418         int ret = 0;
419
420         bch_verbose(c, "checking extents");
421
422         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
423                            POS(BCACHEFS_ROOT_INO, 0), 0, k) {
424                 ret = walk_inode(c, &w, k.k->p.inode);
425                 if (ret)
426                         break;
427
428                 if (fsck_err_on(!w.have_inode, c,
429                         "extent type %u for missing inode %llu",
430                         k.k->type, k.k->p.inode) ||
431                     fsck_err_on(w.have_inode &&
432                         !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
433                         "extent type %u for non regular file, inode %llu mode %o",
434                         k.k->type, k.k->p.inode, w.inode.bi_mode)) {
435                         bch2_btree_iter_unlock(&iter);
436
437                         ret = bch2_inode_truncate(c, k.k->p.inode, 0);
438                         if (ret)
439                                 goto err;
440                         continue;
441                 }
442
443                 if (fsck_err_on(w.first_this_inode &&
444                         w.have_inode &&
445                         !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
446                         w.inode.bi_sectors !=
447                         (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)),
448                         c, "i_sectors wrong: got %llu, should be %llu",
449                         w.inode.bi_sectors, i_sectors)) {
450                         struct bkey_inode_buf p;
451
452                         w.inode.bi_sectors = i_sectors;
453
454                         bch2_btree_iter_unlock(&iter);
455
456                         bch2_inode_pack(&p, &w.inode);
457
458                         ret = bch2_btree_insert(c, BTREE_ID_INODES,
459                                                 &p.inode.k_i, NULL, NULL,
460                                                 BTREE_INSERT_NOFAIL);
461                         if (ret) {
462                                 bch_err(c, "error in fs gc: error %i "
463                                         "updating inode", ret);
464                                 goto err;
465                         }
466
467                         /* revalidate iterator: */
468                         k = bch2_btree_iter_peek(&iter);
469                 }
470
471                 if (fsck_err_on(w.have_inode &&
472                         !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
473                         k.k->type != BCH_RESERVATION &&
474                         k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
475                         "extent type %u offset %llu past end of inode %llu, i_size %llu",
476                         k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
477                         bch2_btree_iter_unlock(&iter);
478
479                         ret = bch2_inode_truncate(c, k.k->p.inode,
480                                                   w.inode.bi_size);
481                         if (ret)
482                                 goto err;
483                         continue;
484                 }
485         }
486 err:
487 fsck_err:
488         return bch2_btree_iter_unlock(&iter) ?: ret;
489 }
490
491 /*
492  * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
493  * validate d_type
494  */
495 noinline_for_stack
496 static int check_dirents(struct bch_fs *c)
497 {
498         struct inode_walker w = inode_walker_init();
499         struct hash_check h;
500         struct btree_trans trans;
501         struct btree_iter *iter;
502         struct bkey_s_c k;
503         unsigned name_len;
504         char buf[200];
505         int ret = 0;
506
507         bch_verbose(c, "checking dirents");
508
509         bch2_trans_init(&trans, c);
510
511         bch2_trans_preload_iters(&trans);
512
513         iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
514                                    POS(BCACHEFS_ROOT_INO, 0), 0);
515
516         hash_check_init(bch2_dirent_hash_desc, &trans, &h);
517
518         for_each_btree_key_continue(iter, 0, k) {
519                 struct bkey_s_c_dirent d;
520                 struct bch_inode_unpacked target;
521                 bool have_target;
522                 u64 d_inum;
523
524                 ret = walk_inode(c, &w, k.k->p.inode);
525                 if (ret)
526                         break;
527
528                 if (fsck_err_on(!w.have_inode, c,
529                                 "dirent in nonexisting directory:\n%s",
530                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
531                                                        BTREE_ID_DIRENTS,
532                                                        k), buf)) ||
533                     fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
534                                 "dirent in non directory inode type %u:\n%s",
535                                 mode_to_type(w.inode.bi_mode),
536                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
537                                                        BTREE_ID_DIRENTS,
538                                                        k), buf))) {
539                         ret = bch2_btree_delete_at(iter, 0);
540                         if (ret)
541                                 goto err;
542                         continue;
543                 }
544
545                 if (w.first_this_inode && w.have_inode)
546                         hash_check_set_inode(&h, c, &w.inode);
547
548                 ret = check_dirent_hash(&h, c, iter, &k);
549                 if (ret > 0) {
550                         ret = 0;
551                         continue;
552                 }
553                 if (ret)
554                         goto fsck_err;
555
556                 if (ret)
557                         goto fsck_err;
558
559                 if (k.k->type != BCH_DIRENT)
560                         continue;
561
562                 d = bkey_s_c_to_dirent(k);
563                 d_inum = le64_to_cpu(d.v->d_inum);
564
565                 name_len = bch2_dirent_name_bytes(d);
566
567                 if (fsck_err_on(!name_len, c, "empty dirent") ||
568                     fsck_err_on(name_len == 1 &&
569                                 !memcmp(d.v->d_name, ".", 1), c,
570                                 ". dirent") ||
571                     fsck_err_on(name_len == 2 &&
572                                 !memcmp(d.v->d_name, "..", 2), c,
573                                 ".. dirent") ||
574                     fsck_err_on(name_len == 2 &&
575                                 !memcmp(d.v->d_name, "..", 2), c,
576                                 ".. dirent") ||
577                     fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
578                                 "dirent name has invalid chars")) {
579                         ret = remove_dirent(c, iter, d);
580                         if (ret)
581                                 goto err;
582                         continue;
583                 }
584
585                 if (fsck_err_on(d_inum == d.k->p.inode, c,
586                                 "dirent points to own directory:\n%s",
587                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
588                                                        BTREE_ID_DIRENTS,
589                                                        k), buf))) {
590                         ret = remove_dirent(c, iter, d);
591                         if (ret)
592                                 goto err;
593                         continue;
594                 }
595
596                 ret = bch2_inode_find_by_inum(c, d_inum, &target);
597                 if (ret && ret != -ENOENT)
598                         break;
599
600                 have_target = !ret;
601                 ret = 0;
602
603                 if (fsck_err_on(!have_target, c,
604                                 "dirent points to missing inode:\n%s",
605                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
606                                                        BTREE_ID_DIRENTS,
607                                                        k), buf))) {
608                         ret = remove_dirent(c, iter, d);
609                         if (ret)
610                                 goto err;
611                         continue;
612                 }
613
614                 if (fsck_err_on(have_target &&
615                                 d.v->d_type !=
616                                 mode_to_type(target.bi_mode), c,
617                                 "incorrect d_type: should be %u:\n%s",
618                                 mode_to_type(target.bi_mode),
619                                 (bch2_bkey_val_to_text(&PBUF(buf), c,
620                                                        BTREE_ID_DIRENTS,
621                                                        k), buf))) {
622                         struct bkey_i_dirent *n;
623
624                         n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
625                         if (!n) {
626                                 ret = -ENOMEM;
627                                 goto err;
628                         }
629
630                         bkey_reassemble(&n->k_i, d.s_c);
631                         n->v.d_type = mode_to_type(target.bi_mode);
632
633                         ret = bch2_btree_insert_at(c, NULL, NULL,
634                                         BTREE_INSERT_NOFAIL,
635                                         BTREE_INSERT_ENTRY(iter, &n->k_i));
636                         kfree(n);
637                         if (ret)
638                                 goto err;
639
640                 }
641         }
642 err:
643 fsck_err:
644         return bch2_trans_exit(&trans) ?: ret;
645 }
646
647 /*
648  * Walk xattrs: verify that they all have a corresponding inode
649  */
650 noinline_for_stack
651 static int check_xattrs(struct bch_fs *c)
652 {
653         struct inode_walker w = inode_walker_init();
654         struct hash_check h;
655         struct btree_trans trans;
656         struct btree_iter *iter;
657         struct bkey_s_c k;
658         int ret = 0;
659
660         bch_verbose(c, "checking xattrs");
661
662         bch2_trans_init(&trans, c);
663
664         bch2_trans_preload_iters(&trans);
665
666         iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
667                                    POS(BCACHEFS_ROOT_INO, 0), 0);
668
669         hash_check_init(bch2_xattr_hash_desc, &trans, &h);
670
671         for_each_btree_key_continue(iter, 0, k) {
672                 ret = walk_inode(c, &w, k.k->p.inode);
673                 if (ret)
674                         break;
675
676                 if (fsck_err_on(!w.have_inode, c,
677                                 "xattr for missing inode %llu",
678                                 k.k->p.inode)) {
679                         ret = bch2_btree_delete_at(iter, 0);
680                         if (ret)
681                                 goto err;
682                         continue;
683                 }
684
685                 if (w.first_this_inode && w.have_inode)
686                         hash_check_set_inode(&h, c, &w.inode);
687
688                 ret = hash_check_key(bch2_xattr_hash_desc, &h, c, iter, k);
689                 if (ret)
690                         goto fsck_err;
691         }
692 err:
693 fsck_err:
694         return bch2_trans_exit(&trans) ?: ret;
695 }
696
697 /* Get root directory, create if it doesn't exist: */
698 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
699 {
700         struct bkey_inode_buf packed;
701         int ret;
702
703         bch_verbose(c, "checking root directory");
704
705         ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
706         if (ret && ret != -ENOENT)
707                 return ret;
708
709         if (fsck_err_on(ret, c, "root directory missing"))
710                 goto create_root;
711
712         if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
713                         "root inode not a directory"))
714                 goto create_root;
715
716         return 0;
717 fsck_err:
718         return ret;
719 create_root:
720         bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
721                         0, NULL);
722         root_inode->bi_inum = BCACHEFS_ROOT_INO;
723
724         bch2_inode_pack(&packed, root_inode);
725
726         return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
727                                  NULL, NULL, BTREE_INSERT_NOFAIL);
728 }
729
730 /* Get lost+found, create if it doesn't exist: */
731 static int check_lostfound(struct bch_fs *c,
732                            struct bch_inode_unpacked *root_inode,
733                            struct bch_inode_unpacked *lostfound_inode)
734 {
735         struct qstr lostfound = QSTR("lost+found");
736         struct bch_hash_info root_hash_info =
737                 bch2_hash_info_init(c, root_inode);
738         struct bkey_inode_buf packed;
739         u64 inum;
740         int ret;
741
742         bch_verbose(c, "checking lost+found");
743
744         inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
745                                  &lostfound);
746         if (!inum) {
747                 bch_notice(c, "creating lost+found");
748                 goto create_lostfound;
749         }
750
751         ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
752         if (ret && ret != -ENOENT)
753                 return ret;
754
755         if (fsck_err_on(ret, c, "lost+found missing"))
756                 goto create_lostfound;
757
758         if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
759                         "lost+found inode not a directory"))
760                 goto create_lostfound;
761
762         return 0;
763 fsck_err:
764         return ret;
765 create_lostfound:
766         root_inode->bi_nlink++;
767
768         bch2_inode_pack(&packed, root_inode);
769
770         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
771                                 NULL, NULL, BTREE_INSERT_NOFAIL);
772         if (ret)
773                 return ret;
774
775         bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
776                         0, root_inode);
777
778         ret = bch2_inode_create(c, lostfound_inode, BLOCKDEV_INODE_MAX, 0,
779                                &c->unused_inode_hint);
780         if (ret)
781                 return ret;
782
783         ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR,
784                                  &lostfound, lostfound_inode->bi_inum, NULL,
785                                  BTREE_INSERT_NOFAIL);
786         if (ret)
787                 return ret;
788
789         return 0;
790 }
791
792 struct inode_bitmap {
793         unsigned long   *bits;
794         size_t          size;
795 };
796
797 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
798 {
799         return nr < b->size ? test_bit(nr, b->bits) : false;
800 }
801
802 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
803 {
804         if (nr >= b->size) {
805                 size_t new_size = max_t(size_t, max_t(size_t,
806                                         PAGE_SIZE * 8,
807                                         b->size * 2),
808                                         nr + 1);
809                 void *n;
810
811                 new_size = roundup_pow_of_two(new_size);
812                 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
813                 if (!n) {
814                         return -ENOMEM;
815                 }
816
817                 b->bits = n;
818                 b->size = new_size;
819         }
820
821         __set_bit(nr, b->bits);
822         return 0;
823 }
824
825 struct pathbuf {
826         size_t          nr;
827         size_t          size;
828
829         struct pathbuf_entry {
830                 u64     inum;
831                 u64     offset;
832         }               *entries;
833 };
834
835 static int path_down(struct pathbuf *p, u64 inum)
836 {
837         if (p->nr == p->size) {
838                 size_t new_size = max_t(size_t, 256UL, p->size * 2);
839                 void *n = krealloc(p->entries,
840                                    new_size * sizeof(p->entries[0]),
841                                    GFP_KERNEL);
842                 if (!n)
843                         return -ENOMEM;
844
845                 p->entries = n;
846                 p->size = new_size;
847         };
848
849         p->entries[p->nr++] = (struct pathbuf_entry) {
850                 .inum = inum,
851                 .offset = 0,
852         };
853         return 0;
854 }
855
856 noinline_for_stack
857 static int check_directory_structure(struct bch_fs *c,
858                                      struct bch_inode_unpacked *lostfound_inode)
859 {
860         struct inode_bitmap dirs_done = { NULL, 0 };
861         struct pathbuf path = { 0, 0, NULL };
862         struct pathbuf_entry *e;
863         struct btree_iter iter;
864         struct bkey_s_c k;
865         struct bkey_s_c_dirent dirent;
866         bool had_unreachable;
867         u64 d_inum;
868         int ret = 0;
869
870         bch_verbose(c, "checking directory structure");
871
872         /* DFS: */
873 restart_dfs:
874         had_unreachable = false;
875
876         ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
877         if (ret) {
878                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
879                 goto err;
880         }
881
882         ret = path_down(&path, BCACHEFS_ROOT_INO);
883         if (ret) {
884                 return ret;
885         }
886
887         while (path.nr) {
888 next:
889                 e = &path.entries[path.nr - 1];
890
891                 if (e->offset == U64_MAX)
892                         goto up;
893
894                 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
895                                    POS(e->inum, e->offset + 1), 0, k) {
896                         if (k.k->p.inode != e->inum)
897                                 break;
898
899                         e->offset = k.k->p.offset;
900
901                         if (k.k->type != BCH_DIRENT)
902                                 continue;
903
904                         dirent = bkey_s_c_to_dirent(k);
905
906                         if (dirent.v->d_type != DT_DIR)
907                                 continue;
908
909                         d_inum = le64_to_cpu(dirent.v->d_inum);
910
911                         if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
912                                         "directory %llu has multiple hardlinks",
913                                         d_inum)) {
914                                 ret = remove_dirent(c, &iter, dirent);
915                                 if (ret)
916                                         goto err;
917                                 continue;
918                         }
919
920                         ret = inode_bitmap_set(&dirs_done, d_inum);
921                         if (ret) {
922                                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
923                                 goto err;
924                         }
925
926                         ret = path_down(&path, d_inum);
927                         if (ret) {
928                                 goto err;
929                         }
930
931                         bch2_btree_iter_unlock(&iter);
932                         goto next;
933                 }
934                 ret = bch2_btree_iter_unlock(&iter);
935                 if (ret) {
936                         bch_err(c, "btree error %i in fsck", ret);
937                         goto err;
938                 }
939 up:
940                 path.nr--;
941         }
942
943         for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
944                 if (k.k->type != BCH_INODE_FS)
945                         continue;
946
947                 if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
948                         continue;
949
950                 if (!bch2_empty_dir(c, k.k->p.inode))
951                         continue;
952
953                 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
954                                 "unreachable directory found (inum %llu)",
955                                 k.k->p.inode)) {
956                         bch2_btree_iter_unlock(&iter);
957
958                         ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
959                         if (ret) {
960                                 goto err;
961                         }
962
963                         had_unreachable = true;
964                 }
965         }
966         ret = bch2_btree_iter_unlock(&iter);
967         if (ret)
968                 goto err;
969
970         if (had_unreachable) {
971                 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
972                 kfree(dirs_done.bits);
973                 kfree(path.entries);
974                 memset(&dirs_done, 0, sizeof(dirs_done));
975                 memset(&path, 0, sizeof(path));
976                 goto restart_dfs;
977         }
978
979 out:
980         kfree(dirs_done.bits);
981         kfree(path.entries);
982         return ret;
983 err:
984 fsck_err:
985         ret = bch2_btree_iter_unlock(&iter) ?: ret;
986         goto out;
987 }
988
989 struct nlink {
990         u32     count;
991         u32     dir_count;
992 };
993
994 typedef GENRADIX(struct nlink) nlink_table;
995
996 static void inc_link(struct bch_fs *c, nlink_table *links,
997                      u64 range_start, u64 *range_end,
998                      u64 inum, bool dir)
999 {
1000         struct nlink *link;
1001
1002         if (inum < range_start || inum >= *range_end)
1003                 return;
1004
1005         link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
1006         if (!link) {
1007                 bch_verbose(c, "allocation failed during fs gc - will need another pass");
1008                 *range_end = inum;
1009                 return;
1010         }
1011
1012         if (dir)
1013                 link->dir_count++;
1014         else
1015                 link->count++;
1016 }
1017
1018 noinline_for_stack
1019 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
1020                                u64 range_start, u64 *range_end)
1021 {
1022         struct btree_iter iter;
1023         struct bkey_s_c k;
1024         struct bkey_s_c_dirent d;
1025         u64 d_inum;
1026         int ret;
1027
1028         inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
1029
1030         for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
1031                 switch (k.k->type) {
1032                 case BCH_DIRENT:
1033                         d = bkey_s_c_to_dirent(k);
1034                         d_inum = le64_to_cpu(d.v->d_inum);
1035
1036                         if (d.v->d_type == DT_DIR)
1037                                 inc_link(c, links, range_start, range_end,
1038                                          d.k->p.inode, true);
1039
1040                         inc_link(c, links, range_start, range_end,
1041                                  d_inum, false);
1042
1043                         break;
1044                 }
1045
1046                 bch2_btree_iter_cond_resched(&iter);
1047         }
1048         ret = bch2_btree_iter_unlock(&iter);
1049         if (ret)
1050                 bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
1051
1052         return ret;
1053 }
1054
1055 s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
1056 {
1057         struct btree_iter iter;
1058         struct bkey_s_c k;
1059         u64 sectors = 0;
1060
1061         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
1062                 if (k.k->p.inode != inum)
1063                         break;
1064
1065                 if (bkey_extent_is_allocation(k.k))
1066                         sectors += k.k->size;
1067         }
1068
1069         return bch2_btree_iter_unlock(&iter) ?: sectors;
1070 }
1071
1072 static int check_inode_nlink(struct bch_fs *c,
1073                              struct bch_inode_unpacked *lostfound_inode,
1074                              struct bch_inode_unpacked *u,
1075                              struct nlink *link,
1076                              bool *do_update)
1077 {
1078         u32 i_nlink = u->bi_flags & BCH_INODE_UNLINKED
1079                 ? 0
1080                 : u->bi_nlink + nlink_bias(u->bi_mode);
1081         u32 real_i_nlink =
1082                 link->count * nlink_bias(u->bi_mode) +
1083                 link->dir_count;
1084         int ret = 0;
1085
1086         /*
1087          * These should have been caught/fixed by earlier passes, we don't
1088          * repair them here:
1089          */
1090         if (S_ISDIR(u->bi_mode) && link->count > 1) {
1091                 need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
1092                               u->bi_inum, link->count);
1093                 return 0;
1094         }
1095
1096         if (S_ISDIR(u->bi_mode) && !link->count) {
1097                 need_fsck_err(c, "unreachable directory found (inum %llu)",
1098                               u->bi_inum);
1099                 return 0;
1100         }
1101
1102         if (!S_ISDIR(u->bi_mode) && link->dir_count) {
1103                 need_fsck_err(c, "non directory with subdirectories",
1104                               u->bi_inum);
1105                 return 0;
1106         }
1107
1108         if (!link->count &&
1109             !(u->bi_flags & BCH_INODE_UNLINKED) &&
1110             (c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
1111                 if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
1112                              u->bi_inum, mode_to_type(u->bi_mode)) ==
1113                     FSCK_ERR_IGNORE)
1114                         return 0;
1115
1116                 ret = reattach_inode(c, lostfound_inode, u->bi_inum);
1117                 if (ret)
1118                         return ret;
1119
1120                 link->count = 1;
1121                 real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
1122                 goto set_i_nlink;
1123         }
1124
1125         if (i_nlink < link->count) {
1126                 if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
1127                              u->bi_inum, i_nlink, link->count,
1128                              mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
1129                         return 0;
1130                 goto set_i_nlink;
1131         }
1132
1133         if (i_nlink != real_i_nlink &&
1134             c->sb.clean) {
1135                 if (fsck_err(c, "filesystem marked clean, "
1136                              "but inode %llu has wrong i_nlink "
1137                              "(type %u i_nlink %u, should be %u)",
1138                              u->bi_inum, mode_to_type(u->bi_mode),
1139                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1140                         return 0;
1141                 goto set_i_nlink;
1142         }
1143
1144         if (i_nlink != real_i_nlink &&
1145             (c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
1146                 if (fsck_err(c, "inode %llu has wrong i_nlink "
1147                              "(type %u i_nlink %u, should be %u)",
1148                              u->bi_inum, mode_to_type(u->bi_mode),
1149                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1150                         return 0;
1151                 goto set_i_nlink;
1152         }
1153
1154         if (real_i_nlink && i_nlink != real_i_nlink)
1155                 bch_verbose(c, "setting inode %llu nlink from %u to %u",
1156                             u->bi_inum, i_nlink, real_i_nlink);
1157 set_i_nlink:
1158         if (i_nlink != real_i_nlink) {
1159                 if (real_i_nlink) {
1160                         u->bi_nlink = real_i_nlink - nlink_bias(u->bi_mode);
1161                         u->bi_flags &= ~BCH_INODE_UNLINKED;
1162                 } else {
1163                         u->bi_nlink = 0;
1164                         u->bi_flags |= BCH_INODE_UNLINKED;
1165                 }
1166
1167                 *do_update = true;
1168         }
1169 fsck_err:
1170         return ret;
1171 }
1172
1173 static int check_inode(struct bch_fs *c,
1174                        struct bch_inode_unpacked *lostfound_inode,
1175                        struct btree_iter *iter,
1176                        struct bkey_s_c_inode inode,
1177                        struct nlink *link)
1178 {
1179         struct bch_inode_unpacked u;
1180         bool do_update = false;
1181         int ret = 0;
1182
1183         ret = bch2_inode_unpack(inode, &u);
1184         if (bch2_fs_inconsistent_on(ret, c,
1185                          "error unpacking inode %llu in fsck",
1186                          inode.k->p.inode))
1187                 return ret;
1188
1189         if (link) {
1190                 ret = check_inode_nlink(c, lostfound_inode, &u, link,
1191                                         &do_update);
1192                 if (ret)
1193                         return ret;
1194         }
1195
1196         if (u.bi_flags & BCH_INODE_UNLINKED) {
1197                 bch_verbose(c, "deleting inode %llu", u.bi_inum);
1198
1199                 ret = bch2_inode_rm(c, u.bi_inum);
1200                 if (ret)
1201                         bch_err(c, "error in fs gc: error %i "
1202                                 "while deleting inode", ret);
1203                 return ret;
1204         }
1205
1206         if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY) {
1207                 fsck_err_on(c->sb.clean, c,
1208                             "filesystem marked clean, "
1209                             "but inode %llu has i_size dirty",
1210                             u.bi_inum);
1211
1212                 bch_verbose(c, "truncating inode %llu", u.bi_inum);
1213
1214                 /*
1215                  * XXX: need to truncate partial blocks too here - or ideally
1216                  * just switch units to bytes and that issue goes away
1217                  */
1218
1219                 ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
1220                 if (ret) {
1221                         bch_err(c, "error in fs gc: error %i "
1222                                 "truncating inode", ret);
1223                         return ret;
1224                 }
1225
1226                 /*
1227                  * We truncated without our normal sector accounting hook, just
1228                  * make sure we recalculate it:
1229                  */
1230                 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
1231
1232                 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
1233                 do_update = true;
1234         }
1235
1236         if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY) {
1237                 s64 sectors;
1238
1239                 fsck_err_on(c->sb.clean, c,
1240                             "filesystem marked clean, "
1241                             "but inode %llu has i_sectors dirty",
1242                             u.bi_inum);
1243
1244                 bch_verbose(c, "recounting sectors for inode %llu",
1245                             u.bi_inum);
1246
1247                 sectors = bch2_count_inode_sectors(c, u.bi_inum);
1248                 if (sectors < 0) {
1249                         bch_err(c, "error in fs gc: error %i "
1250                                 "recounting inode sectors",
1251                                 (int) sectors);
1252                         return sectors;
1253                 }
1254
1255                 u.bi_sectors = sectors;
1256                 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
1257                 do_update = true;
1258         }
1259
1260         if (do_update) {
1261                 struct bkey_inode_buf p;
1262
1263                 bch2_inode_pack(&p, &u);
1264
1265                 ret = bch2_btree_insert_at(c, NULL, NULL,
1266                                           BTREE_INSERT_NOFAIL,
1267                                           BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
1268                 if (ret && ret != -EINTR)
1269                         bch_err(c, "error in fs gc: error %i "
1270                                 "updating inode", ret);
1271         }
1272 fsck_err:
1273         return ret;
1274 }
1275
1276 noinline_for_stack
1277 static int bch2_gc_walk_inodes(struct bch_fs *c,
1278                                struct bch_inode_unpacked *lostfound_inode,
1279                                nlink_table *links,
1280                                u64 range_start, u64 range_end)
1281 {
1282         struct btree_iter iter;
1283         struct bkey_s_c k;
1284         struct nlink *link, zero_links = { 0, 0 };
1285         struct genradix_iter nlinks_iter;
1286         int ret = 0, ret2 = 0;
1287         u64 nlinks_pos;
1288
1289         bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
1290         nlinks_iter = genradix_iter_init(links, 0);
1291
1292         while ((k = bch2_btree_iter_peek(&iter)).k &&
1293                !btree_iter_err(k)) {
1294 peek_nlinks:    link = genradix_iter_peek(&nlinks_iter, links);
1295
1296                 if (!link && (!k.k || iter.pos.inode >= range_end))
1297                         break;
1298
1299                 nlinks_pos = range_start + nlinks_iter.pos;
1300                 if (iter.pos.inode > nlinks_pos) {
1301                         /* Should have been caught by dirents pass: */
1302                         need_fsck_err_on(link && link->count, c,
1303                                 "missing inode %llu (nlink %u)",
1304                                 nlinks_pos, link->count);
1305                         genradix_iter_advance(&nlinks_iter, links);
1306                         goto peek_nlinks;
1307                 }
1308
1309                 if (iter.pos.inode < nlinks_pos || !link)
1310                         link = &zero_links;
1311
1312                 if (k.k && k.k->type == BCH_INODE_FS) {
1313                         /*
1314                          * Avoid potential deadlocks with iter for
1315                          * truncate/rm/etc.:
1316                          */
1317                         bch2_btree_iter_unlock(&iter);
1318
1319                         ret = check_inode(c, lostfound_inode, &iter,
1320                                           bkey_s_c_to_inode(k), link);
1321                         BUG_ON(ret == -EINTR);
1322                         if (ret)
1323                                 break;
1324
1325                         if (link->count)
1326                                 atomic_long_inc(&c->nr_inodes);
1327                 } else {
1328                         /* Should have been caught by dirents pass: */
1329                         need_fsck_err_on(link->count, c,
1330                                 "missing inode %llu (nlink %u)",
1331                                 nlinks_pos, link->count);
1332                 }
1333
1334                 if (nlinks_pos == iter.pos.inode)
1335                         genradix_iter_advance(&nlinks_iter, links);
1336
1337                 bch2_btree_iter_next(&iter);
1338                 bch2_btree_iter_cond_resched(&iter);
1339         }
1340 fsck_err:
1341         ret2 = bch2_btree_iter_unlock(&iter);
1342         if (ret2)
1343                 bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
1344
1345         return ret ?: ret2;
1346 }
1347
1348 noinline_for_stack
1349 static int check_inode_nlinks(struct bch_fs *c,
1350                               struct bch_inode_unpacked *lostfound_inode)
1351 {
1352         nlink_table links;
1353         u64 this_iter_range_start, next_iter_range_start = 0;
1354         int ret = 0;
1355
1356         bch_verbose(c, "checking inode nlinks");
1357
1358         genradix_init(&links);
1359
1360         do {
1361                 this_iter_range_start = next_iter_range_start;
1362                 next_iter_range_start = U64_MAX;
1363
1364                 ret = bch2_gc_walk_dirents(c, &links,
1365                                           this_iter_range_start,
1366                                           &next_iter_range_start);
1367                 if (ret)
1368                         break;
1369
1370                 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
1371                                          this_iter_range_start,
1372                                          next_iter_range_start);
1373                 if (ret)
1374                         break;
1375
1376                 genradix_free(&links);
1377         } while (next_iter_range_start != U64_MAX);
1378
1379         genradix_free(&links);
1380
1381         return ret;
1382 }
1383
1384 noinline_for_stack
1385 static int check_inodes_fast(struct bch_fs *c)
1386 {
1387         struct btree_iter iter;
1388         struct bkey_s_c k;
1389         struct bkey_s_c_inode inode;
1390         unsigned long nr_inodes = 0;
1391         int ret = 0;
1392
1393         for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
1394                 if (k.k->type != BCH_INODE_FS)
1395                         continue;
1396
1397                 inode = bkey_s_c_to_inode(k);
1398
1399                 if (!(inode.v->bi_flags & BCH_INODE_UNLINKED))
1400                         nr_inodes++;
1401
1402                 if (inode.v->bi_flags &
1403                     (BCH_INODE_I_SIZE_DIRTY|
1404                      BCH_INODE_I_SECTORS_DIRTY|
1405                      BCH_INODE_UNLINKED)) {
1406                         fsck_err_on(c->sb.clean, c,
1407                                 "filesystem marked clean but found inode %llu with flags %x",
1408                                 inode.k->p.inode, inode.v->bi_flags);
1409                         ret = check_inode(c, NULL, &iter, inode, NULL);
1410                         BUG_ON(ret == -EINTR);
1411                         if (ret)
1412                                 break;
1413                 }
1414         }
1415         atomic_long_set(&c->nr_inodes, nr_inodes);
1416 fsck_err:
1417         return bch2_btree_iter_unlock(&iter) ?: ret;
1418 }
1419
1420 /*
1421  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1422  * Doesn't fix them yet, mainly because they haven't yet been observed:
1423  */
1424 static int bch2_fsck_full(struct bch_fs *c)
1425 {
1426         struct bch_inode_unpacked root_inode, lostfound_inode;
1427         int ret;
1428
1429         bch_verbose(c, "starting fsck:");
1430         ret =   check_extents(c) ?:
1431                 check_dirents(c) ?:
1432                 check_xattrs(c) ?:
1433                 check_root(c, &root_inode) ?:
1434                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1435                 check_directory_structure(c, &lostfound_inode) ?:
1436                 check_inode_nlinks(c, &lostfound_inode);
1437
1438         bch2_flush_fsck_errs(c);
1439         bch_verbose(c, "fsck done");
1440
1441         return ret;
1442 }
1443
1444 static int bch2_fsck_inode_nlink(struct bch_fs *c)
1445 {
1446         struct bch_inode_unpacked root_inode, lostfound_inode;
1447         int ret;
1448
1449         bch_verbose(c, "checking inode link counts:");
1450         ret =   check_root(c, &root_inode) ?:
1451                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1452                 check_inode_nlinks(c, &lostfound_inode);
1453
1454         bch2_flush_fsck_errs(c);
1455         bch_verbose(c, "done");
1456
1457         return ret;
1458 }
1459
1460 static int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1461 {
1462         int ret;
1463
1464         bch_verbose(c, "walking inodes:");
1465         ret = check_inodes_fast(c);
1466
1467         bch2_flush_fsck_errs(c);
1468         bch_verbose(c, "done");
1469
1470         return ret;
1471 }
1472
1473 int bch2_fsck(struct bch_fs *c)
1474 {
1475         if (c->opts.fsck)
1476                 return bch2_fsck_full(c);
1477
1478         if (!c->sb.clean &&
1479             !(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)))
1480                 return bch2_fsck_inode_nlink(c);
1481
1482         return bch2_fsck_walk_inodes_only(c);
1483 }