]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fsck.c
Update bcachefs sources to defaad6d47 bcachefs: Fix an assertion when rebuilding...
[bcachefs-tools-debian] / libbcachefs / fsck.c
1
2 #include "bcachefs.h"
3 #include "btree_update.h"
4 #include "dirent.h"
5 #include "error.h"
6 #include "fs.h"
7 #include "fsck.h"
8 #include "inode.h"
9 #include "keylist.h"
10 #include "super.h"
11 #include "xattr.h"
12
13 #include <linux/dcache.h> /* struct qstr */
14 #include <linux/generic-radix-tree.h>
15
16 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
17
18 static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
19                          struct bkey_s_c_dirent dirent)
20 {
21         struct qstr name;
22         struct bch_inode_unpacked dir_inode;
23         struct bch_hash_info dir_hash_info;
24         u64 dir_inum = dirent.k->p.inode;
25         int ret;
26         char *buf;
27
28         name.len = bch2_dirent_name_bytes(dirent);
29         buf = kmalloc(name.len + 1, GFP_KERNEL);
30         if (!buf)
31                 return -ENOMEM;
32
33         memcpy(buf, dirent.v->d_name, name.len);
34         buf[name.len] = '\0';
35         name.name = buf;
36
37         /* Unlock iter so we don't deadlock, after copying name: */
38         bch2_btree_iter_unlock(iter);
39
40         ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
41         if (ret) {
42                 bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
43                 goto err;
44         }
45
46         dir_hash_info = bch2_hash_info_init(c, &dir_inode);
47
48         ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
49         if (ret)
50                 bch_err(c, "remove_dirent: err %i deleting dirent", ret);
51 err:
52         kfree(buf);
53         return ret;
54 }
55
56 static int reattach_inode(struct bch_fs *c,
57                           struct bch_inode_unpacked *lostfound_inode,
58                           u64 inum)
59 {
60         struct bch_hash_info lostfound_hash_info =
61                 bch2_hash_info_init(c, lostfound_inode);
62         struct bkey_inode_buf packed;
63         char name_buf[20];
64         struct qstr name;
65         int ret;
66
67         snprintf(name_buf, sizeof(name_buf), "%llu", inum);
68         name = (struct qstr) QSTR(name_buf);
69
70         lostfound_inode->bi_nlink++;
71
72         bch2_inode_pack(&packed, lostfound_inode);
73
74         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
75                                 NULL, NULL, BTREE_INSERT_NOFAIL);
76         if (ret) {
77                 bch_err(c, "error %i reattaching inode %llu while updating lost+found",
78                         ret, inum);
79                 return ret;
80         }
81
82         ret = bch2_dirent_create(c, lostfound_inode->bi_inum,
83                                  &lostfound_hash_info,
84                                  DT_DIR, &name, inum, NULL,
85                                  BTREE_INSERT_NOFAIL);
86         if (ret) {
87                 bch_err(c, "error %i reattaching inode %llu while creating new dirent",
88                         ret, inum);
89                 return ret;
90         }
91         return ret;
92 }
93
94 struct inode_walker {
95         bool                    first_this_inode;
96         bool                    have_inode;
97         u64                     cur_inum;
98         struct bch_inode_unpacked inode;
99 };
100
101 static struct inode_walker inode_walker_init(void)
102 {
103         return (struct inode_walker) {
104                 .cur_inum       = -1,
105                 .have_inode     = false,
106         };
107 }
108
109 static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
110 {
111         w->first_this_inode     = inum != w->cur_inum;
112         w->cur_inum             = inum;
113
114         if (w->first_this_inode) {
115                 int ret = bch2_inode_find_by_inum(c, inum, &w->inode);
116
117                 if (ret && ret != -ENOENT)
118                         return ret;
119
120                 w->have_inode = !ret;
121         }
122
123         return 0;
124 }
125
126 struct hash_check {
127         struct bch_hash_info    info;
128         struct btree_trans      *trans;
129
130         /* start of current chain of hash collisions: */
131         struct btree_iter       *chain;
132
133         /* next offset in current chain of hash collisions: */
134         u64                     next;
135 };
136
137 static void hash_check_init(const struct bch_hash_desc desc,
138                             struct btree_trans *trans,
139                             struct hash_check *h)
140 {
141         h->trans = trans;
142         h->chain = bch2_trans_get_iter(trans, desc.btree_id, POS_MIN, 0);
143         h->next = -1;
144 }
145
146 static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
147                                  const struct bch_inode_unpacked *bi)
148 {
149         h->info = bch2_hash_info_init(c, bi);
150         h->next = -1;
151 }
152
153 static int hash_redo_key(const struct bch_hash_desc desc,
154                          struct hash_check *h, struct bch_fs *c,
155                          struct btree_iter *k_iter, struct bkey_s_c k,
156                          u64 hashed)
157 {
158         struct bkey_i *tmp;
159         int ret = 0;
160
161         tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
162         if (!tmp)
163                 return -ENOMEM;
164
165         bkey_reassemble(tmp, k);
166
167         ret = bch2_btree_delete_at(k_iter, 0);
168         if (ret)
169                 goto err;
170
171         bch2_btree_iter_unlock(k_iter);
172
173         bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL, tmp,
174                       BTREE_INSERT_NOFAIL|
175                       BCH_HASH_SET_MUST_CREATE);
176 err:
177         kfree(tmp);
178         return ret;
179 }
180
181 /* fsck hasn't been converted to new transactions yet: */
182 static int fsck_hash_delete_at(const struct bch_hash_desc desc,
183                                struct bch_hash_info *info,
184                                struct btree_iter *orig_iter)
185 {
186         struct btree_trans trans;
187         struct btree_iter *iter;
188         int ret;
189
190         bch2_btree_iter_unlock(orig_iter);
191
192         bch2_trans_init(&trans, orig_iter->c);
193 retry:
194         bch2_trans_begin(&trans);
195
196         iter = bch2_trans_copy_iter(&trans, orig_iter);
197         if (IS_ERR(iter)) {
198                 ret = PTR_ERR(iter);
199                 goto err;
200         }
201
202         ret   = bch2_hash_delete_at(&trans, desc, info, iter) ?:
203                 bch2_trans_commit(&trans, NULL, NULL,
204                                   BTREE_INSERT_ATOMIC|
205                                   BTREE_INSERT_NOFAIL);
206 err:
207         if (ret == -EINTR)
208                 goto retry;
209
210         bch2_trans_exit(&trans);
211         return ret;
212 }
213
214 static int hash_check_duplicates(const struct bch_hash_desc desc,
215                                  struct hash_check *h, struct bch_fs *c,
216                                  struct btree_iter *k_iter, struct bkey_s_c k)
217 {
218         struct btree_iter *iter;
219         struct bkey_s_c k2;
220         char buf[200];
221         int ret = 0;
222
223         if (!bkey_cmp(h->chain->pos, k_iter->pos))
224                 return 0;
225
226         iter = bch2_trans_copy_iter(h->trans, h->chain);
227         BUG_ON(IS_ERR(iter));
228
229         for_each_btree_key_continue(iter, 0, k2) {
230                 if (bkey_cmp(k2.k->p, k.k->p) >= 0)
231                         break;
232
233                 if (fsck_err_on(k2.k->type == desc.key_type &&
234                                 !desc.cmp_bkey(k, k2), c,
235                                 "duplicate hash table keys:\n%s",
236                                 (bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
237                                                        buf, sizeof(buf), k), buf))) {
238                         ret = fsck_hash_delete_at(desc, &h->info, k_iter);
239                         if (ret)
240                                 return ret;
241                         ret = 1;
242                         break;
243                 }
244         }
245 fsck_err:
246         bch2_trans_iter_free(h->trans, iter);
247         return ret;
248 }
249
250 static bool key_has_correct_hash(const struct bch_hash_desc desc,
251                                  struct hash_check *h, struct bch_fs *c,
252                                  struct btree_iter *k_iter, struct bkey_s_c k)
253 {
254         u64 hash;
255
256         if (k.k->type != desc.whiteout_type &&
257             k.k->type != desc.key_type)
258                 return true;
259
260         if (k.k->p.offset != h->next)
261                 bch2_btree_iter_copy(h->chain, k_iter);
262         h->next = k.k->p.offset + 1;
263
264         if (k.k->type != desc.key_type)
265                 return true;
266
267         hash = desc.hash_bkey(&h->info, k);
268
269         return hash >= h->chain->pos.offset &&
270                 hash <= k.k->p.offset;
271 }
272
273 static int hash_check_key(const struct bch_hash_desc desc,
274                           struct hash_check *h, struct bch_fs *c,
275                           struct btree_iter *k_iter, struct bkey_s_c k)
276 {
277         char buf[200];
278         u64 hashed;
279         int ret = 0;
280
281         if (k.k->type != desc.whiteout_type &&
282             k.k->type != desc.key_type)
283                 return 0;
284
285         if (k.k->p.offset != h->next)
286                 bch2_btree_iter_copy(h->chain, k_iter);
287         h->next = k.k->p.offset + 1;
288
289         if (k.k->type != desc.key_type)
290                 return 0;
291
292         hashed = desc.hash_bkey(&h->info, k);
293
294         if (fsck_err_on(hashed < h->chain->pos.offset ||
295                         hashed > k.k->p.offset, c,
296                         "hash table key at wrong offset: btree %u, %llu, "
297                         "hashed to %llu chain starts at %llu\n%s",
298                         desc.btree_id, k.k->p.offset,
299                         hashed, h->chain->pos.offset,
300                         (bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
301                                                buf, sizeof(buf), k), buf))) {
302                 ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
303                 if (ret) {
304                         bch_err(c, "hash_redo_key err %i", ret);
305                         return ret;
306                 }
307                 return 1;
308         }
309
310         ret = hash_check_duplicates(desc, h, c, k_iter, k);
311 fsck_err:
312         return ret;
313 }
314
315 static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
316                              struct btree_iter *iter, struct bkey_s_c *k)
317 {
318         struct bkey_i_dirent *d = NULL;
319         int ret = -EINVAL;
320         char buf[200];
321         unsigned len;
322         u64 hash;
323
324         if (key_has_correct_hash(bch2_dirent_hash_desc, h, c, iter, *k))
325                 return 0;
326
327         len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
328         BUG_ON(!len);
329
330         memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
331         buf[len] = '\0';
332
333         d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
334         if (!d) {
335                 bch_err(c, "memory allocation failure");
336                 return -ENOMEM;
337         }
338
339         bkey_reassemble(&d->k_i, *k);
340
341         do {
342                 --len;
343                 if (!len)
344                         goto err_redo;
345
346                 d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
347
348                 BUG_ON(bkey_val_bytes(&d->k) <
349                        offsetof(struct bch_dirent, d_name) + len);
350
351                 memset(d->v.d_name + len, 0,
352                        bkey_val_bytes(&d->k) -
353                        offsetof(struct bch_dirent, d_name) - len);
354
355                 hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
356                                                 bkey_i_to_s_c(&d->k_i));
357         } while (hash < h->chain->pos.offset ||
358                  hash > k->k->p.offset);
359
360         if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
361                      buf, strlen(buf), d->v.d_name, len)) {
362                 ret = bch2_btree_insert_at(c, NULL, NULL,
363                                            BTREE_INSERT_NOFAIL,
364                                            BTREE_INSERT_ENTRY(iter, &d->k_i));
365                 if (ret)
366                         goto err;
367
368                 *k = bch2_btree_iter_peek(iter);
369
370                 BUG_ON(k->k->type != BCH_DIRENT);
371         }
372 err:
373 fsck_err:
374         kfree(d);
375         return ret;
376 err_redo:
377         hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
378
379         if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
380                      "hash table key at wrong offset: btree %u, offset %llu, "
381                      "hashed to %llu chain starts at %llu\n%s",
382                      buf, strlen(buf), BTREE_ID_DIRENTS,
383                      k->k->p.offset, hash, h->chain->pos.offset,
384                      (bch2_bkey_val_to_text(c, bkey_type(0, BTREE_ID_DIRENTS),
385                                             buf, sizeof(buf), *k), buf))) {
386                 ret = hash_redo_key(bch2_dirent_hash_desc,
387                                     h, c, iter, *k, hash);
388                 if (ret)
389                         bch_err(c, "hash_redo_key err %i", ret);
390                 else
391                         ret = 1;
392         }
393
394         goto err;
395 }
396
397 static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
398 {
399         return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
400                         POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
401                         POS(inode_nr + 1, 0), NULL);
402 }
403
404 /*
405  * Walk extents: verify that extents have a corresponding S_ISREG inode, and
406  * that i_size an i_sectors are consistent
407  */
408 noinline_for_stack
409 static int check_extents(struct bch_fs *c)
410 {
411         struct inode_walker w = inode_walker_init();
412         struct btree_iter iter;
413         struct bkey_s_c k;
414         u64 i_sectors;
415         int ret = 0;
416
417         bch_verbose(c, "checking extents");
418
419         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
420                            POS(BCACHEFS_ROOT_INO, 0), 0, k) {
421                 ret = walk_inode(c, &w, k.k->p.inode);
422                 if (ret)
423                         break;
424
425                 if (fsck_err_on(!w.have_inode, c,
426                         "extent type %u for missing inode %llu",
427                         k.k->type, k.k->p.inode) ||
428                     fsck_err_on(w.have_inode &&
429                         !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
430                         "extent type %u for non regular file, inode %llu mode %o",
431                         k.k->type, k.k->p.inode, w.inode.bi_mode)) {
432                         bch2_btree_iter_unlock(&iter);
433
434                         ret = bch2_inode_truncate(c, k.k->p.inode, 0);
435                         if (ret)
436                                 goto err;
437                         continue;
438                 }
439
440                 if (fsck_err_on(w.first_this_inode &&
441                         w.have_inode &&
442                         !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
443                         w.inode.bi_sectors !=
444                         (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)),
445                         c, "i_sectors wrong: got %llu, should be %llu",
446                         w.inode.bi_sectors, i_sectors)) {
447                         struct bkey_inode_buf p;
448
449                         w.inode.bi_sectors = i_sectors;
450
451                         bch2_btree_iter_unlock(&iter);
452
453                         bch2_inode_pack(&p, &w.inode);
454
455                         ret = bch2_btree_insert(c, BTREE_ID_INODES,
456                                                 &p.inode.k_i, NULL, NULL,
457                                                 BTREE_INSERT_NOFAIL);
458                         if (ret) {
459                                 bch_err(c, "error in fs gc: error %i "
460                                         "updating inode", ret);
461                                 goto err;
462                         }
463
464                         /* revalidate iterator: */
465                         k = bch2_btree_iter_peek(&iter);
466                 }
467
468                 if (fsck_err_on(w.have_inode &&
469                         !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
470                         k.k->type != BCH_RESERVATION &&
471                         k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
472                         "extent type %u offset %llu past end of inode %llu, i_size %llu",
473                         k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
474                         bch2_btree_iter_unlock(&iter);
475
476                         ret = bch2_inode_truncate(c, k.k->p.inode,
477                                                   w.inode.bi_size);
478                         if (ret)
479                                 goto err;
480                         continue;
481                 }
482         }
483 err:
484 fsck_err:
485         return bch2_btree_iter_unlock(&iter) ?: ret;
486 }
487
488 /*
489  * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
490  * validate d_type
491  */
492 noinline_for_stack
493 static int check_dirents(struct bch_fs *c)
494 {
495         struct inode_walker w = inode_walker_init();
496         struct hash_check h;
497         struct btree_trans trans;
498         struct btree_iter *iter;
499         struct bkey_s_c k;
500         unsigned name_len;
501         char buf[200];
502         int ret = 0;
503
504         bch_verbose(c, "checking dirents");
505
506         bch2_trans_init(&trans, c);
507
508         bch2_trans_preload_iters(&trans);
509
510         iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
511                                    POS(BCACHEFS_ROOT_INO, 0), 0);
512
513         hash_check_init(bch2_dirent_hash_desc, &trans, &h);
514
515         for_each_btree_key_continue(iter, 0, k) {
516                 struct bkey_s_c_dirent d;
517                 struct bch_inode_unpacked target;
518                 bool have_target;
519                 u64 d_inum;
520
521                 ret = walk_inode(c, &w, k.k->p.inode);
522                 if (ret)
523                         break;
524
525                 if (fsck_err_on(!w.have_inode, c,
526                                 "dirent in nonexisting directory:\n%s",
527                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
528                                                        buf, sizeof(buf), k), buf)) ||
529                     fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
530                                 "dirent in non directory inode type %u:\n%s",
531                                 mode_to_type(w.inode.bi_mode),
532                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
533                                                        buf, sizeof(buf), k), buf))) {
534                         ret = bch2_btree_delete_at(iter, 0);
535                         if (ret)
536                                 goto err;
537                         continue;
538                 }
539
540                 if (w.first_this_inode && w.have_inode)
541                         hash_check_set_inode(&h, c, &w.inode);
542
543                 ret = check_dirent_hash(&h, c, iter, &k);
544                 if (ret > 0) {
545                         ret = 0;
546                         continue;
547                 }
548                 if (ret)
549                         goto fsck_err;
550
551                 if (ret)
552                         goto fsck_err;
553
554                 if (k.k->type != BCH_DIRENT)
555                         continue;
556
557                 d = bkey_s_c_to_dirent(k);
558                 d_inum = le64_to_cpu(d.v->d_inum);
559
560                 name_len = bch2_dirent_name_bytes(d);
561
562                 if (fsck_err_on(!name_len, c, "empty dirent") ||
563                     fsck_err_on(name_len == 1 &&
564                                 !memcmp(d.v->d_name, ".", 1), c,
565                                 ". dirent") ||
566                     fsck_err_on(name_len == 2 &&
567                                 !memcmp(d.v->d_name, "..", 2), c,
568                                 ".. dirent") ||
569                     fsck_err_on(name_len == 2 &&
570                                 !memcmp(d.v->d_name, "..", 2), c,
571                                 ".. dirent") ||
572                     fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
573                                 "dirent name has invalid chars")) {
574                         ret = remove_dirent(c, iter, d);
575                         if (ret)
576                                 goto err;
577                         continue;
578                 }
579
580                 if (fsck_err_on(d_inum == d.k->p.inode, c,
581                                 "dirent points to own directory:\n%s",
582                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
583                                                        buf, sizeof(buf), k), buf))) {
584                         ret = remove_dirent(c, iter, d);
585                         if (ret)
586                                 goto err;
587                         continue;
588                 }
589
590                 ret = bch2_inode_find_by_inum(c, d_inum, &target);
591                 if (ret && ret != -ENOENT)
592                         break;
593
594                 have_target = !ret;
595                 ret = 0;
596
597                 if (fsck_err_on(!have_target, c,
598                                 "dirent points to missing inode:\n%s",
599                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
600                                                        buf, sizeof(buf), k), buf))) {
601                         ret = remove_dirent(c, iter, d);
602                         if (ret)
603                                 goto err;
604                         continue;
605                 }
606
607                 if (fsck_err_on(have_target &&
608                                 d.v->d_type !=
609                                 mode_to_type(target.bi_mode), c,
610                                 "incorrect d_type: should be %u:\n%s",
611                                 mode_to_type(target.bi_mode),
612                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
613                                                        buf, sizeof(buf), k), buf))) {
614                         struct bkey_i_dirent *n;
615
616                         n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
617                         if (!n) {
618                                 ret = -ENOMEM;
619                                 goto err;
620                         }
621
622                         bkey_reassemble(&n->k_i, d.s_c);
623                         n->v.d_type = mode_to_type(target.bi_mode);
624
625                         ret = bch2_btree_insert_at(c, NULL, NULL,
626                                         BTREE_INSERT_NOFAIL,
627                                         BTREE_INSERT_ENTRY(iter, &n->k_i));
628                         kfree(n);
629                         if (ret)
630                                 goto err;
631
632                 }
633         }
634 err:
635 fsck_err:
636         return bch2_trans_exit(&trans) ?: ret;
637 }
638
639 /*
640  * Walk xattrs: verify that they all have a corresponding inode
641  */
642 noinline_for_stack
643 static int check_xattrs(struct bch_fs *c)
644 {
645         struct inode_walker w = inode_walker_init();
646         struct hash_check h;
647         struct btree_trans trans;
648         struct btree_iter *iter;
649         struct bkey_s_c k;
650         int ret = 0;
651
652         bch_verbose(c, "checking xattrs");
653
654         bch2_trans_init(&trans, c);
655
656         bch2_trans_preload_iters(&trans);
657
658         iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
659                                    POS(BCACHEFS_ROOT_INO, 0), 0);
660
661         hash_check_init(bch2_xattr_hash_desc, &trans, &h);
662
663         for_each_btree_key_continue(iter, 0, k) {
664                 ret = walk_inode(c, &w, k.k->p.inode);
665                 if (ret)
666                         break;
667
668                 if (fsck_err_on(!w.have_inode, c,
669                                 "xattr for missing inode %llu",
670                                 k.k->p.inode)) {
671                         ret = bch2_btree_delete_at(iter, 0);
672                         if (ret)
673                                 goto err;
674                         continue;
675                 }
676
677                 if (w.first_this_inode && w.have_inode)
678                         hash_check_set_inode(&h, c, &w.inode);
679
680                 ret = hash_check_key(bch2_xattr_hash_desc, &h, c, iter, k);
681                 if (ret)
682                         goto fsck_err;
683         }
684 err:
685 fsck_err:
686         return bch2_trans_exit(&trans) ?: ret;
687 }
688
689 /* Get root directory, create if it doesn't exist: */
690 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
691 {
692         struct bkey_inode_buf packed;
693         int ret;
694
695         bch_verbose(c, "checking root directory");
696
697         ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
698         if (ret && ret != -ENOENT)
699                 return ret;
700
701         if (fsck_err_on(ret, c, "root directory missing"))
702                 goto create_root;
703
704         if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
705                         "root inode not a directory"))
706                 goto create_root;
707
708         return 0;
709 fsck_err:
710         return ret;
711 create_root:
712         bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
713                         0, NULL);
714         root_inode->bi_inum = BCACHEFS_ROOT_INO;
715
716         bch2_inode_pack(&packed, root_inode);
717
718         return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
719                                  NULL, NULL, BTREE_INSERT_NOFAIL);
720 }
721
722 /* Get lost+found, create if it doesn't exist: */
723 static int check_lostfound(struct bch_fs *c,
724                            struct bch_inode_unpacked *root_inode,
725                            struct bch_inode_unpacked *lostfound_inode)
726 {
727         struct qstr lostfound = QSTR("lost+found");
728         struct bch_hash_info root_hash_info =
729                 bch2_hash_info_init(c, root_inode);
730         struct bkey_inode_buf packed;
731         u64 inum;
732         int ret;
733
734         bch_verbose(c, "checking lost+found");
735
736         inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
737                                  &lostfound);
738         if (!inum) {
739                 bch_notice(c, "creating lost+found");
740                 goto create_lostfound;
741         }
742
743         ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
744         if (ret && ret != -ENOENT)
745                 return ret;
746
747         if (fsck_err_on(ret, c, "lost+found missing"))
748                 goto create_lostfound;
749
750         if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
751                         "lost+found inode not a directory"))
752                 goto create_lostfound;
753
754         return 0;
755 fsck_err:
756         return ret;
757 create_lostfound:
758         root_inode->bi_nlink++;
759
760         bch2_inode_pack(&packed, root_inode);
761
762         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
763                                 NULL, NULL, BTREE_INSERT_NOFAIL);
764         if (ret)
765                 return ret;
766
767         bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
768                         0, root_inode);
769
770         ret = bch2_inode_create(c, lostfound_inode, BLOCKDEV_INODE_MAX, 0,
771                                &c->unused_inode_hint);
772         if (ret)
773                 return ret;
774
775         ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR,
776                                  &lostfound, lostfound_inode->bi_inum, NULL,
777                                  BTREE_INSERT_NOFAIL);
778         if (ret)
779                 return ret;
780
781         return 0;
782 }
783
784 struct inode_bitmap {
785         unsigned long   *bits;
786         size_t          size;
787 };
788
789 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
790 {
791         return nr < b->size ? test_bit(nr, b->bits) : false;
792 }
793
794 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
795 {
796         if (nr >= b->size) {
797                 size_t new_size = max_t(size_t, max_t(size_t,
798                                         PAGE_SIZE * 8,
799                                         b->size * 2),
800                                         nr + 1);
801                 void *n;
802
803                 new_size = roundup_pow_of_two(new_size);
804                 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
805                 if (!n) {
806                         return -ENOMEM;
807                 }
808
809                 b->bits = n;
810                 b->size = new_size;
811         }
812
813         __set_bit(nr, b->bits);
814         return 0;
815 }
816
817 struct pathbuf {
818         size_t          nr;
819         size_t          size;
820
821         struct pathbuf_entry {
822                 u64     inum;
823                 u64     offset;
824         }               *entries;
825 };
826
827 static int path_down(struct pathbuf *p, u64 inum)
828 {
829         if (p->nr == p->size) {
830                 size_t new_size = max_t(size_t, 256UL, p->size * 2);
831                 void *n = krealloc(p->entries,
832                                    new_size * sizeof(p->entries[0]),
833                                    GFP_KERNEL);
834                 if (!n)
835                         return -ENOMEM;
836
837                 p->entries = n;
838                 p->size = new_size;
839         };
840
841         p->entries[p->nr++] = (struct pathbuf_entry) {
842                 .inum = inum,
843                 .offset = 0,
844         };
845         return 0;
846 }
847
848 noinline_for_stack
849 static int check_directory_structure(struct bch_fs *c,
850                                      struct bch_inode_unpacked *lostfound_inode)
851 {
852         struct inode_bitmap dirs_done = { NULL, 0 };
853         struct pathbuf path = { 0, 0, NULL };
854         struct pathbuf_entry *e;
855         struct btree_iter iter;
856         struct bkey_s_c k;
857         struct bkey_s_c_dirent dirent;
858         bool had_unreachable;
859         u64 d_inum;
860         int ret = 0;
861
862         bch_verbose(c, "checking directory structure");
863
864         /* DFS: */
865 restart_dfs:
866         had_unreachable = false;
867
868         ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
869         if (ret) {
870                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
871                 goto err;
872         }
873
874         ret = path_down(&path, BCACHEFS_ROOT_INO);
875         if (ret) {
876                 return ret;
877         }
878
879         while (path.nr) {
880 next:
881                 e = &path.entries[path.nr - 1];
882
883                 if (e->offset == U64_MAX)
884                         goto up;
885
886                 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
887                                    POS(e->inum, e->offset + 1), 0, k) {
888                         if (k.k->p.inode != e->inum)
889                                 break;
890
891                         e->offset = k.k->p.offset;
892
893                         if (k.k->type != BCH_DIRENT)
894                                 continue;
895
896                         dirent = bkey_s_c_to_dirent(k);
897
898                         if (dirent.v->d_type != DT_DIR)
899                                 continue;
900
901                         d_inum = le64_to_cpu(dirent.v->d_inum);
902
903                         if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
904                                         "directory %llu has multiple hardlinks",
905                                         d_inum)) {
906                                 ret = remove_dirent(c, &iter, dirent);
907                                 if (ret)
908                                         goto err;
909                                 continue;
910                         }
911
912                         ret = inode_bitmap_set(&dirs_done, d_inum);
913                         if (ret) {
914                                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
915                                 goto err;
916                         }
917
918                         ret = path_down(&path, d_inum);
919                         if (ret) {
920                                 goto err;
921                         }
922
923                         bch2_btree_iter_unlock(&iter);
924                         goto next;
925                 }
926                 ret = bch2_btree_iter_unlock(&iter);
927                 if (ret) {
928                         bch_err(c, "btree error %i in fsck", ret);
929                         goto err;
930                 }
931 up:
932                 path.nr--;
933         }
934
935         for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
936                 if (k.k->type != BCH_INODE_FS)
937                         continue;
938
939                 if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
940                         continue;
941
942                 if (!bch2_empty_dir(c, k.k->p.inode))
943                         continue;
944
945                 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
946                                 "unreachable directory found (inum %llu)",
947                                 k.k->p.inode)) {
948                         bch2_btree_iter_unlock(&iter);
949
950                         ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
951                         if (ret) {
952                                 goto err;
953                         }
954
955                         had_unreachable = true;
956                 }
957         }
958         ret = bch2_btree_iter_unlock(&iter);
959         if (ret)
960                 goto err;
961
962         if (had_unreachable) {
963                 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
964                 kfree(dirs_done.bits);
965                 kfree(path.entries);
966                 memset(&dirs_done, 0, sizeof(dirs_done));
967                 memset(&path, 0, sizeof(path));
968                 goto restart_dfs;
969         }
970
971 out:
972         kfree(dirs_done.bits);
973         kfree(path.entries);
974         return ret;
975 err:
976 fsck_err:
977         ret = bch2_btree_iter_unlock(&iter) ?: ret;
978         goto out;
979 }
980
981 struct nlink {
982         u32     count;
983         u32     dir_count;
984 };
985
986 typedef GENRADIX(struct nlink) nlink_table;
987
988 static void inc_link(struct bch_fs *c, nlink_table *links,
989                      u64 range_start, u64 *range_end,
990                      u64 inum, bool dir)
991 {
992         struct nlink *link;
993
994         if (inum < range_start || inum >= *range_end)
995                 return;
996
997         link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
998         if (!link) {
999                 bch_verbose(c, "allocation failed during fs gc - will need another pass");
1000                 *range_end = inum;
1001                 return;
1002         }
1003
1004         if (dir)
1005                 link->dir_count++;
1006         else
1007                 link->count++;
1008 }
1009
1010 noinline_for_stack
1011 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
1012                                u64 range_start, u64 *range_end)
1013 {
1014         struct btree_iter iter;
1015         struct bkey_s_c k;
1016         struct bkey_s_c_dirent d;
1017         u64 d_inum;
1018         int ret;
1019
1020         inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
1021
1022         for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
1023                 switch (k.k->type) {
1024                 case BCH_DIRENT:
1025                         d = bkey_s_c_to_dirent(k);
1026                         d_inum = le64_to_cpu(d.v->d_inum);
1027
1028                         if (d.v->d_type == DT_DIR)
1029                                 inc_link(c, links, range_start, range_end,
1030                                          d.k->p.inode, true);
1031
1032                         inc_link(c, links, range_start, range_end,
1033                                  d_inum, false);
1034
1035                         break;
1036                 }
1037
1038                 bch2_btree_iter_cond_resched(&iter);
1039         }
1040         ret = bch2_btree_iter_unlock(&iter);
1041         if (ret)
1042                 bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
1043
1044         return ret;
1045 }
1046
1047 s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
1048 {
1049         struct btree_iter iter;
1050         struct bkey_s_c k;
1051         u64 sectors = 0;
1052
1053         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
1054                 if (k.k->p.inode != inum)
1055                         break;
1056
1057                 if (bkey_extent_is_allocation(k.k))
1058                         sectors += k.k->size;
1059         }
1060
1061         return bch2_btree_iter_unlock(&iter) ?: sectors;
1062 }
1063
1064 static int check_inode_nlink(struct bch_fs *c,
1065                              struct bch_inode_unpacked *lostfound_inode,
1066                              struct bch_inode_unpacked *u,
1067                              struct nlink *link,
1068                              bool *do_update)
1069 {
1070         u32 i_nlink = u->bi_flags & BCH_INODE_UNLINKED
1071                 ? 0
1072                 : u->bi_nlink + nlink_bias(u->bi_mode);
1073         u32 real_i_nlink =
1074                 link->count * nlink_bias(u->bi_mode) +
1075                 link->dir_count;
1076         int ret = 0;
1077
1078         /*
1079          * These should have been caught/fixed by earlier passes, we don't
1080          * repair them here:
1081          */
1082         if (S_ISDIR(u->bi_mode) && link->count > 1) {
1083                 need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
1084                               u->bi_inum, link->count);
1085                 return 0;
1086         }
1087
1088         if (S_ISDIR(u->bi_mode) && !link->count) {
1089                 need_fsck_err(c, "unreachable directory found (inum %llu)",
1090                               u->bi_inum);
1091                 return 0;
1092         }
1093
1094         if (!S_ISDIR(u->bi_mode) && link->dir_count) {
1095                 need_fsck_err(c, "non directory with subdirectories",
1096                               u->bi_inum);
1097                 return 0;
1098         }
1099
1100         if (!link->count &&
1101             !(u->bi_flags & BCH_INODE_UNLINKED) &&
1102             (c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
1103                 if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
1104                              u->bi_inum, mode_to_type(u->bi_mode)) ==
1105                     FSCK_ERR_IGNORE)
1106                         return 0;
1107
1108                 ret = reattach_inode(c, lostfound_inode, u->bi_inum);
1109                 if (ret)
1110                         return ret;
1111
1112                 link->count = 1;
1113                 real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
1114                 goto set_i_nlink;
1115         }
1116
1117         if (i_nlink < link->count) {
1118                 if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
1119                              u->bi_inum, i_nlink, link->count,
1120                              mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
1121                         return 0;
1122                 goto set_i_nlink;
1123         }
1124
1125         if (i_nlink != real_i_nlink &&
1126             c->sb.clean) {
1127                 if (fsck_err(c, "filesystem marked clean, "
1128                              "but inode %llu has wrong i_nlink "
1129                              "(type %u i_nlink %u, should be %u)",
1130                              u->bi_inum, mode_to_type(u->bi_mode),
1131                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1132                         return 0;
1133                 goto set_i_nlink;
1134         }
1135
1136         if (i_nlink != real_i_nlink &&
1137             (c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
1138                 if (fsck_err(c, "inode %llu has wrong i_nlink "
1139                              "(type %u i_nlink %u, should be %u)",
1140                              u->bi_inum, mode_to_type(u->bi_mode),
1141                              i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1142                         return 0;
1143                 goto set_i_nlink;
1144         }
1145
1146         if (real_i_nlink && i_nlink != real_i_nlink)
1147                 bch_verbose(c, "setting inode %llu nlink from %u to %u",
1148                             u->bi_inum, i_nlink, real_i_nlink);
1149 set_i_nlink:
1150         if (i_nlink != real_i_nlink) {
1151                 if (real_i_nlink) {
1152                         u->bi_nlink = real_i_nlink - nlink_bias(u->bi_mode);
1153                         u->bi_flags &= ~BCH_INODE_UNLINKED;
1154                 } else {
1155                         u->bi_nlink = 0;
1156                         u->bi_flags |= BCH_INODE_UNLINKED;
1157                 }
1158
1159                 *do_update = true;
1160         }
1161 fsck_err:
1162         return ret;
1163 }
1164
1165 static int check_inode(struct bch_fs *c,
1166                        struct bch_inode_unpacked *lostfound_inode,
1167                        struct btree_iter *iter,
1168                        struct bkey_s_c_inode inode,
1169                        struct nlink *link)
1170 {
1171         struct bch_inode_unpacked u;
1172         bool do_update = false;
1173         int ret = 0;
1174
1175         ret = bch2_inode_unpack(inode, &u);
1176         if (bch2_fs_inconsistent_on(ret, c,
1177                          "error unpacking inode %llu in fsck",
1178                          inode.k->p.inode))
1179                 return ret;
1180
1181         if (link) {
1182                 ret = check_inode_nlink(c, lostfound_inode, &u, link,
1183                                         &do_update);
1184                 if (ret)
1185                         return ret;
1186         }
1187
1188         if (u.bi_flags & BCH_INODE_UNLINKED) {
1189                 bch_verbose(c, "deleting inode %llu", u.bi_inum);
1190
1191                 ret = bch2_inode_rm(c, u.bi_inum);
1192                 if (ret)
1193                         bch_err(c, "error in fs gc: error %i "
1194                                 "while deleting inode", ret);
1195                 return ret;
1196         }
1197
1198         if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY) {
1199                 fsck_err_on(c->sb.clean, c,
1200                             "filesystem marked clean, "
1201                             "but inode %llu has i_size dirty",
1202                             u.bi_inum);
1203
1204                 bch_verbose(c, "truncating inode %llu", u.bi_inum);
1205
1206                 /*
1207                  * XXX: need to truncate partial blocks too here - or ideally
1208                  * just switch units to bytes and that issue goes away
1209                  */
1210
1211                 ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
1212                 if (ret) {
1213                         bch_err(c, "error in fs gc: error %i "
1214                                 "truncating inode", ret);
1215                         return ret;
1216                 }
1217
1218                 /*
1219                  * We truncated without our normal sector accounting hook, just
1220                  * make sure we recalculate it:
1221                  */
1222                 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
1223
1224                 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
1225                 do_update = true;
1226         }
1227
1228         if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY) {
1229                 s64 sectors;
1230
1231                 fsck_err_on(c->sb.clean, c,
1232                             "filesystem marked clean, "
1233                             "but inode %llu has i_sectors dirty",
1234                             u.bi_inum);
1235
1236                 bch_verbose(c, "recounting sectors for inode %llu",
1237                             u.bi_inum);
1238
1239                 sectors = bch2_count_inode_sectors(c, u.bi_inum);
1240                 if (sectors < 0) {
1241                         bch_err(c, "error in fs gc: error %i "
1242                                 "recounting inode sectors",
1243                                 (int) sectors);
1244                         return sectors;
1245                 }
1246
1247                 u.bi_sectors = sectors;
1248                 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
1249                 do_update = true;
1250         }
1251
1252         if (do_update) {
1253                 struct bkey_inode_buf p;
1254
1255                 bch2_inode_pack(&p, &u);
1256
1257                 ret = bch2_btree_insert_at(c, NULL, NULL,
1258                                           BTREE_INSERT_NOFAIL,
1259                                           BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
1260                 if (ret && ret != -EINTR)
1261                         bch_err(c, "error in fs gc: error %i "
1262                                 "updating inode", ret);
1263         }
1264 fsck_err:
1265         return ret;
1266 }
1267
1268 noinline_for_stack
1269 static int bch2_gc_walk_inodes(struct bch_fs *c,
1270                                struct bch_inode_unpacked *lostfound_inode,
1271                                nlink_table *links,
1272                                u64 range_start, u64 range_end)
1273 {
1274         struct btree_iter iter;
1275         struct bkey_s_c k;
1276         struct nlink *link, zero_links = { 0, 0 };
1277         struct genradix_iter nlinks_iter;
1278         int ret = 0, ret2 = 0;
1279         u64 nlinks_pos;
1280
1281         bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
1282         nlinks_iter = genradix_iter_init(links, 0);
1283
1284         while ((k = bch2_btree_iter_peek(&iter)).k &&
1285                !btree_iter_err(k)) {
1286 peek_nlinks:    link = genradix_iter_peek(&nlinks_iter, links);
1287
1288                 if (!link && (!k.k || iter.pos.inode >= range_end))
1289                         break;
1290
1291                 nlinks_pos = range_start + nlinks_iter.pos;
1292                 if (iter.pos.inode > nlinks_pos) {
1293                         /* Should have been caught by dirents pass: */
1294                         need_fsck_err_on(link && link->count, c,
1295                                 "missing inode %llu (nlink %u)",
1296                                 nlinks_pos, link->count);
1297                         genradix_iter_advance(&nlinks_iter, links);
1298                         goto peek_nlinks;
1299                 }
1300
1301                 if (iter.pos.inode < nlinks_pos || !link)
1302                         link = &zero_links;
1303
1304                 if (k.k && k.k->type == BCH_INODE_FS) {
1305                         /*
1306                          * Avoid potential deadlocks with iter for
1307                          * truncate/rm/etc.:
1308                          */
1309                         bch2_btree_iter_unlock(&iter);
1310
1311                         ret = check_inode(c, lostfound_inode, &iter,
1312                                           bkey_s_c_to_inode(k), link);
1313                         BUG_ON(ret == -EINTR);
1314                         if (ret)
1315                                 break;
1316
1317                         if (link->count)
1318                                 atomic_long_inc(&c->nr_inodes);
1319                 } else {
1320                         /* Should have been caught by dirents pass: */
1321                         need_fsck_err_on(link->count, c,
1322                                 "missing inode %llu (nlink %u)",
1323                                 nlinks_pos, link->count);
1324                 }
1325
1326                 if (nlinks_pos == iter.pos.inode)
1327                         genradix_iter_advance(&nlinks_iter, links);
1328
1329                 bch2_btree_iter_next(&iter);
1330                 bch2_btree_iter_cond_resched(&iter);
1331         }
1332 fsck_err:
1333         ret2 = bch2_btree_iter_unlock(&iter);
1334         if (ret2)
1335                 bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
1336
1337         return ret ?: ret2;
1338 }
1339
1340 noinline_for_stack
1341 static int check_inode_nlinks(struct bch_fs *c,
1342                               struct bch_inode_unpacked *lostfound_inode)
1343 {
1344         nlink_table links;
1345         u64 this_iter_range_start, next_iter_range_start = 0;
1346         int ret = 0;
1347
1348         bch_verbose(c, "checking inode nlinks");
1349
1350         genradix_init(&links);
1351
1352         do {
1353                 this_iter_range_start = next_iter_range_start;
1354                 next_iter_range_start = U64_MAX;
1355
1356                 ret = bch2_gc_walk_dirents(c, &links,
1357                                           this_iter_range_start,
1358                                           &next_iter_range_start);
1359                 if (ret)
1360                         break;
1361
1362                 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
1363                                          this_iter_range_start,
1364                                          next_iter_range_start);
1365                 if (ret)
1366                         break;
1367
1368                 genradix_free(&links);
1369         } while (next_iter_range_start != U64_MAX);
1370
1371         genradix_free(&links);
1372
1373         return ret;
1374 }
1375
1376 noinline_for_stack
1377 static int check_inodes_fast(struct bch_fs *c)
1378 {
1379         struct btree_iter iter;
1380         struct bkey_s_c k;
1381         struct bkey_s_c_inode inode;
1382         unsigned long nr_inodes = 0;
1383         int ret = 0;
1384
1385         for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
1386                 if (k.k->type != BCH_INODE_FS)
1387                         continue;
1388
1389                 inode = bkey_s_c_to_inode(k);
1390
1391                 if (!(inode.v->bi_flags & BCH_INODE_UNLINKED))
1392                         nr_inodes++;
1393
1394                 if (inode.v->bi_flags &
1395                     (BCH_INODE_I_SIZE_DIRTY|
1396                      BCH_INODE_I_SECTORS_DIRTY|
1397                      BCH_INODE_UNLINKED)) {
1398                         fsck_err_on(c->sb.clean, c,
1399                                 "filesystem marked clean but found inode %llu with flags %x",
1400                                 inode.k->p.inode, inode.v->bi_flags);
1401                         ret = check_inode(c, NULL, &iter, inode, NULL);
1402                         BUG_ON(ret == -EINTR);
1403                         if (ret)
1404                                 break;
1405                 }
1406         }
1407         atomic_long_set(&c->nr_inodes, nr_inodes);
1408 fsck_err:
1409         return bch2_btree_iter_unlock(&iter) ?: ret;
1410 }
1411
1412 /*
1413  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1414  * Doesn't fix them yet, mainly because they haven't yet been observed:
1415  */
1416 static int bch2_fsck_full(struct bch_fs *c)
1417 {
1418         struct bch_inode_unpacked root_inode, lostfound_inode;
1419         int ret;
1420
1421         bch_verbose(c, "starting fsck:");
1422         ret =   check_extents(c) ?:
1423                 check_dirents(c) ?:
1424                 check_xattrs(c) ?:
1425                 check_root(c, &root_inode) ?:
1426                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1427                 check_directory_structure(c, &lostfound_inode) ?:
1428                 check_inode_nlinks(c, &lostfound_inode);
1429
1430         bch2_flush_fsck_errs(c);
1431         bch_verbose(c, "fsck done");
1432
1433         return ret;
1434 }
1435
1436 static int bch2_fsck_inode_nlink(struct bch_fs *c)
1437 {
1438         struct bch_inode_unpacked root_inode, lostfound_inode;
1439         int ret;
1440
1441         bch_verbose(c, "checking inode link counts:");
1442         ret =   check_root(c, &root_inode) ?:
1443                 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1444                 check_inode_nlinks(c, &lostfound_inode);
1445
1446         bch2_flush_fsck_errs(c);
1447         bch_verbose(c, "done");
1448
1449         return ret;
1450 }
1451
1452 static int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1453 {
1454         int ret;
1455
1456         bch_verbose(c, "walking inodes:");
1457         ret = check_inodes_fast(c);
1458
1459         bch2_flush_fsck_errs(c);
1460         bch_verbose(c, "done");
1461
1462         return ret;
1463 }
1464
1465 int bch2_fsck(struct bch_fs *c)
1466 {
1467         if (c->opts.fsck)
1468                 return bch2_fsck_full(c);
1469
1470         if (!c->sb.clean &&
1471             !(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)))
1472                 return bch2_fsck_inode_nlink(c);
1473
1474         return bch2_fsck_walk_inodes_only(c);
1475 }