]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fsck.c
2991a0dd3830bee2df923f9aabb56c1b460d3cca
[bcachefs-tools-debian] / libbcachefs / fsck.c
1
2 #include "bcachefs.h"
3 #include "btree_update.h"
4 #include "dirent.h"
5 #include "error.h"
6 #include "fs.h"
7 #include "fsck.h"
8 #include "inode.h"
9 #include "keylist.h"
10 #include "super.h"
11 #include "xattr.h"
12
13 #include <linux/dcache.h> /* struct qstr */
14 #include <linux/generic-radix-tree.h>
15
16 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
17
18 static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
19                          struct bkey_s_c_dirent dirent)
20 {
21         struct qstr name;
22         struct bch_inode_unpacked dir_inode;
23         struct bch_hash_info dir_hash_info;
24         u64 dir_inum = dirent.k->p.inode;
25         int ret;
26         char *buf;
27
28         name.len = bch2_dirent_name_bytes(dirent);
29         buf = kmalloc(name.len + 1, GFP_KERNEL);
30         if (!buf)
31                 return -ENOMEM;
32
33         memcpy(buf, dirent.v->d_name, name.len);
34         buf[name.len] = '\0';
35         name.name = buf;
36
37         /* Unlock iter so we don't deadlock, after copying name: */
38         bch2_btree_iter_unlock(iter);
39
40         ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
41         if (ret) {
42                 bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
43                 goto err;
44         }
45
46         dir_hash_info = bch2_hash_info_init(c, &dir_inode);
47
48         ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
49         if (ret)
50                 bch_err(c, "remove_dirent: err %i deleting dirent", ret);
51 err:
52         kfree(buf);
53         return ret;
54 }
55
56 static int reattach_inode(struct bch_fs *c,
57                           struct bch_inode_unpacked *lostfound_inode,
58                           u64 inum)
59 {
60         struct bch_hash_info lostfound_hash_info =
61                 bch2_hash_info_init(c, lostfound_inode);
62         struct bkey_inode_buf packed;
63         char name_buf[20];
64         struct qstr name;
65         int ret;
66
67         snprintf(name_buf, sizeof(name_buf), "%llu", inum);
68         name = (struct qstr) QSTR(name_buf);
69
70         lostfound_inode->bi_nlink++;
71
72         bch2_inode_pack(&packed, lostfound_inode);
73
74         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
75                                NULL, NULL, NULL,
76                                BTREE_INSERT_NOFAIL);
77         if (ret) {
78                 bch_err(c, "error %i reattaching inode %llu while updating lost+found",
79                         ret, inum);
80                 return ret;
81         }
82
83         ret = bch2_dirent_create(c, lostfound_inode->bi_inum,
84                                  &lostfound_hash_info,
85                                  DT_DIR, &name, inum, NULL,
86                                  BTREE_INSERT_NOFAIL);
87         if (ret) {
88                 bch_err(c, "error %i reattaching inode %llu while creating new dirent",
89                         ret, inum);
90                 return ret;
91         }
92         return ret;
93 }
94
95 struct inode_walker {
96         bool                    first_this_inode;
97         bool                    have_inode;
98         u64                     cur_inum;
99         struct bch_inode_unpacked inode;
100 };
101
102 static struct inode_walker inode_walker_init(void)
103 {
104         return (struct inode_walker) {
105                 .cur_inum       = -1,
106                 .have_inode     = false,
107         };
108 }
109
110 static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
111 {
112         w->first_this_inode     = inum != w->cur_inum;
113         w->cur_inum             = inum;
114
115         if (w->first_this_inode) {
116                 int ret = bch2_inode_find_by_inum(c, inum, &w->inode);
117
118                 if (ret && ret != -ENOENT)
119                         return ret;
120
121                 w->have_inode = !ret;
122         }
123
124         return 0;
125 }
126
127 struct hash_check {
128         struct bch_hash_info    info;
129         struct btree_iter       chain;
130         struct btree_iter       iter;
131         u64                     next;
132 };
133
134 static void hash_check_init(const struct bch_hash_desc desc,
135                             struct hash_check *h, struct bch_fs *c)
136 {
137         bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN, 0);
138         bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN, 0);
139 }
140
141 static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
142                                  const struct bch_inode_unpacked *bi)
143 {
144         h->info = bch2_hash_info_init(c, bi);
145         h->next = -1;
146 }
147
148 static int hash_redo_key(const struct bch_hash_desc desc,
149                          struct hash_check *h, struct bch_fs *c,
150                          struct btree_iter *k_iter, struct bkey_s_c k,
151                          u64 hashed)
152 {
153         struct bkey_i *tmp;
154         int ret = 0;
155
156         tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
157         if (!tmp)
158                 return -ENOMEM;
159
160         bkey_reassemble(tmp, k);
161
162         ret = bch2_btree_delete_at(k_iter, 0);
163         if (ret)
164                 goto err;
165
166         bch2_btree_iter_unlock(k_iter);
167
168         bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL, tmp,
169                       BTREE_INSERT_NOFAIL|
170                       BCH_HASH_SET_MUST_CREATE);
171 err:
172         kfree(tmp);
173         return ret;
174 }
175
176 static int hash_check_key(const struct bch_hash_desc desc,
177                           struct hash_check *h, struct bch_fs *c,
178                           struct btree_iter *k_iter, struct bkey_s_c k)
179 {
180         char buf[200];
181         u64 hashed;
182         int ret = 0;
183
184         if (k.k->type != desc.whiteout_type &&
185             k.k->type != desc.key_type)
186                 return 0;
187
188         if (k.k->p.offset != h->next) {
189                 if (!btree_iter_linked(&h->chain)) {
190                         bch2_btree_iter_link(k_iter, &h->chain);
191                         bch2_btree_iter_link(k_iter, &h->iter);
192                 }
193                 bch2_btree_iter_copy(&h->chain, k_iter);
194         }
195         h->next = k.k->p.offset + 1;
196
197         if (k.k->type != desc.key_type)
198                 return 0;
199
200         hashed = desc.hash_bkey(&h->info, k);
201
202         if (fsck_err_on(hashed < h->chain.pos.offset ||
203                         hashed > k.k->p.offset, c,
204                         "hash table key at wrong offset: %llu, "
205                         "hashed to %llu chain starts at %llu\n%s",
206                         k.k->p.offset, hashed, h->chain.pos.offset,
207                         (bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
208                                                buf, sizeof(buf), k), buf))) {
209                 ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
210                 if (ret) {
211                         bch_err(c, "hash_redo_key err %i", ret);
212                         return ret;
213                 }
214                 return 1;
215         }
216
217         if (!bkey_cmp(h->chain.pos, k_iter->pos))
218                 return 0;
219
220         bch2_btree_iter_copy(&h->iter, &h->chain);
221         while (bkey_cmp(h->iter.pos, k_iter->pos) < 0) {
222                 struct bkey_s_c k2 = bch2_btree_iter_peek(&h->iter);
223
224                 if (fsck_err_on(k2.k->type == desc.key_type &&
225                                 !desc.cmp_bkey(k, k2), c,
226                                 "duplicate hash table keys:\n%s",
227                                 (bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
228                                                        buf, sizeof(buf), k), buf))) {
229                         ret = bch2_hash_delete_at(desc, &h->info, &h->iter, NULL);
230                         if (ret)
231                                 return ret;
232                         return 1;
233                 }
234                 bch2_btree_iter_next(&h->iter);
235         }
236 fsck_err:
237         return ret;
238 }
239
240 /*
241  * Walk extents: verify that extents have a corresponding S_ISREG inode, and
242  * that i_size an i_sectors are consistent
243  */
244 noinline_for_stack
245 static int check_extents(struct bch_fs *c)
246 {
247         struct inode_walker w = inode_walker_init();
248         struct btree_iter iter;
249         struct bkey_s_c k;
250         u64 i_sectors;
251         int ret = 0;
252
253         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
254                            POS(BCACHEFS_ROOT_INO, 0), 0, k) {
255                 if (k.k->type == KEY_TYPE_DISCARD)
256                         continue;
257
258                 ret = walk_inode(c, &w, k.k->p.inode);
259                 if (ret)
260                         break;
261
262                 if (fsck_err_on(!w.have_inode, c,
263                         "extent type %u for missing inode %llu",
264                         k.k->type, k.k->p.inode) ||
265                     fsck_err_on(w.have_inode &&
266                         !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
267                         "extent type %u for non regular file, inode %llu mode %o",
268                         k.k->type, k.k->p.inode, w.inode.bi_mode)) {
269                         bch2_btree_iter_unlock(&iter);
270
271                         ret = bch2_inode_truncate(c, k.k->p.inode, 0, NULL, NULL);
272                         if (ret)
273                                 goto err;
274                         continue;
275                 }
276
277                 if (fsck_err_on(w.first_this_inode &&
278                         w.have_inode &&
279                         !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
280                         w.inode.bi_sectors !=
281                         (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)),
282                         c, "i_sectors wrong: got %llu, should be %llu",
283                         w.inode.bi_sectors, i_sectors)) {
284                         struct bkey_inode_buf p;
285
286                         w.inode.bi_sectors = i_sectors;
287
288                         bch2_btree_iter_unlock(&iter);
289
290                         bch2_inode_pack(&p, &w.inode);
291
292                         ret = bch2_btree_insert(c, BTREE_ID_INODES,
293                                                 &p.inode.k_i,
294                                                 NULL,
295                                                 NULL,
296                                                 NULL,
297                                                 BTREE_INSERT_NOFAIL);
298                         if (ret) {
299                                 bch_err(c, "error in fs gc: error %i "
300                                         "updating inode", ret);
301                                 goto err;
302                         }
303
304                         /* revalidate iterator: */
305                         k = bch2_btree_iter_peek(&iter);
306                 }
307
308                 if (fsck_err_on(w.have_inode &&
309                         !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
310                         k.k->type != BCH_RESERVATION &&
311                         k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
312                         "extent type %u offset %llu past end of inode %llu, i_size %llu",
313                         k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
314                         bch2_btree_iter_unlock(&iter);
315
316                         ret = bch2_inode_truncate(c, k.k->p.inode,
317                                         round_up(w.inode.bi_size, PAGE_SIZE) >> 9,
318                                         NULL, NULL);
319                         if (ret)
320                                 goto err;
321                         continue;
322                 }
323         }
324 err:
325 fsck_err:
326         return bch2_btree_iter_unlock(&iter) ?: ret;
327 }
328
329 /*
330  * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
331  * validate d_type
332  */
333 noinline_for_stack
334 static int check_dirents(struct bch_fs *c)
335 {
336         struct inode_walker w = inode_walker_init();
337         struct hash_check h;
338         struct btree_iter iter;
339         struct bkey_s_c k;
340         unsigned name_len;
341         char buf[200];
342         int ret = 0;
343
344         hash_check_init(bch2_dirent_hash_desc, &h, c);
345
346         for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
347                            POS(BCACHEFS_ROOT_INO, 0), 0, k) {
348                 struct bkey_s_c_dirent d;
349                 struct bch_inode_unpacked target;
350                 bool have_target;
351                 u64 d_inum;
352
353                 ret = walk_inode(c, &w, k.k->p.inode);
354                 if (ret)
355                         break;
356
357                 if (fsck_err_on(!w.have_inode, c,
358                                 "dirent in nonexisting directory:\n%s",
359                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
360                                                        buf, sizeof(buf), k), buf)) ||
361                     fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
362                                 "dirent in non directory inode type %u:\n%s",
363                                 mode_to_type(w.inode.bi_mode),
364                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
365                                                        buf, sizeof(buf), k), buf))) {
366                         ret = bch2_btree_delete_at(&iter, 0);
367                         if (ret)
368                                 goto err;
369                         continue;
370                 }
371
372                 if (w.first_this_inode && w.have_inode)
373                         hash_check_set_inode(&h, c, &w.inode);
374
375                 ret = hash_check_key(bch2_dirent_hash_desc, &h, c, &iter, k);
376                 if (ret > 0) {
377                         ret = 0;
378                         continue;
379                 }
380
381                 if (ret)
382                         goto fsck_err;
383
384                 if (k.k->type != BCH_DIRENT)
385                         continue;
386
387                 d = bkey_s_c_to_dirent(k);
388                 d_inum = le64_to_cpu(d.v->d_inum);
389
390                 name_len = bch2_dirent_name_bytes(d);
391
392                 if (fsck_err_on(!name_len, c, "empty dirent") ||
393                     fsck_err_on(name_len == 1 &&
394                                 !memcmp(d.v->d_name, ".", 1), c,
395                                 ". dirent") ||
396                     fsck_err_on(name_len == 2 &&
397                                 !memcmp(d.v->d_name, "..", 2), c,
398                                 ".. dirent")) {
399                         ret = remove_dirent(c, &iter, d);
400                         if (ret)
401                                 goto err;
402                         continue;
403                 }
404
405                 if (fsck_err_on(d_inum == d.k->p.inode, c,
406                                 "dirent points to own directory:\n%s",
407                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
408                                                        buf, sizeof(buf), k), buf))) {
409                         ret = remove_dirent(c, &iter, d);
410                         if (ret)
411                                 goto err;
412                         continue;
413                 }
414
415                 ret = bch2_inode_find_by_inum(c, d_inum, &target);
416                 if (ret && ret != -ENOENT)
417                         break;
418
419                 have_target = !ret;
420                 ret = 0;
421
422                 if (fsck_err_on(!have_target, c,
423                                 "dirent points to missing inode:\n%s",
424                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
425                                                        buf, sizeof(buf), k), buf))) {
426                         ret = remove_dirent(c, &iter, d);
427                         if (ret)
428                                 goto err;
429                         continue;
430                 }
431
432                 if (fsck_err_on(have_target &&
433                                 d.v->d_type !=
434                                 mode_to_type(target.bi_mode), c,
435                                 "incorrect d_type: should be %u:\n%s",
436                                 mode_to_type(target.bi_mode),
437                                 (bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
438                                                        buf, sizeof(buf), k), buf))) {
439                         struct bkey_i_dirent *n;
440
441                         n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
442                         if (!n) {
443                                 ret = -ENOMEM;
444                                 goto err;
445                         }
446
447                         bkey_reassemble(&n->k_i, d.s_c);
448                         n->v.d_type = mode_to_type(target.bi_mode);
449
450                         ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
451                                         BTREE_INSERT_NOFAIL,
452                                         BTREE_INSERT_ENTRY(&iter, &n->k_i));
453                         kfree(n);
454                         if (ret)
455                                 goto err;
456
457                 }
458         }
459 err:
460 fsck_err:
461         bch2_btree_iter_unlock(&h.chain);
462         bch2_btree_iter_unlock(&h.iter);
463         return bch2_btree_iter_unlock(&iter) ?: ret;
464 }
465
466 /*
467  * Walk xattrs: verify that they all have a corresponding inode
468  */
469 noinline_for_stack
470 static int check_xattrs(struct bch_fs *c)
471 {
472         struct inode_walker w = inode_walker_init();
473         struct hash_check h;
474         struct btree_iter iter;
475         struct bkey_s_c k;
476         int ret = 0;
477
478         hash_check_init(bch2_xattr_hash_desc, &h, c);
479
480         for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
481                            POS(BCACHEFS_ROOT_INO, 0), 0, k) {
482                 ret = walk_inode(c, &w, k.k->p.inode);
483                 if (ret)
484                         break;
485
486                 if (fsck_err_on(!w.have_inode, c,
487                                 "xattr for missing inode %llu",
488                                 k.k->p.inode)) {
489                         ret = bch2_btree_delete_at(&iter, 0);
490                         if (ret)
491                                 goto err;
492                         continue;
493                 }
494
495                 if (w.first_this_inode && w.have_inode)
496                         hash_check_set_inode(&h, c, &w.inode);
497
498                 ret = hash_check_key(bch2_xattr_hash_desc, &h, c, &iter, k);
499                 if (ret)
500                         goto fsck_err;
501         }
502 err:
503 fsck_err:
504         bch2_btree_iter_unlock(&h.chain);
505         bch2_btree_iter_unlock(&h.iter);
506         return bch2_btree_iter_unlock(&iter) ?: ret;
507 }
508
509 /* Get root directory, create if it doesn't exist: */
510 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
511 {
512         struct bkey_inode_buf packed;
513         int ret;
514
515         ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
516         if (ret && ret != -ENOENT)
517                 return ret;
518
519         if (fsck_err_on(ret, c, "root directory missing"))
520                 goto create_root;
521
522         if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
523                         "root inode not a directory"))
524                 goto create_root;
525
526         return 0;
527 fsck_err:
528         return ret;
529 create_root:
530         bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
531                         0, NULL);
532         root_inode->bi_inum = BCACHEFS_ROOT_INO;
533
534         bch2_inode_pack(&packed, root_inode);
535
536         return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
537                                  NULL, NULL, NULL, BTREE_INSERT_NOFAIL);
538 }
539
540 /* Get lost+found, create if it doesn't exist: */
541 static int check_lostfound(struct bch_fs *c,
542                            struct bch_inode_unpacked *root_inode,
543                            struct bch_inode_unpacked *lostfound_inode)
544 {
545         struct qstr lostfound = QSTR("lost+found");
546         struct bch_hash_info root_hash_info =
547                 bch2_hash_info_init(c, root_inode);
548         struct bkey_inode_buf packed;
549         u64 inum;
550         int ret;
551
552         inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
553                                  &lostfound);
554         if (!inum) {
555                 bch_notice(c, "creating lost+found");
556                 goto create_lostfound;
557         }
558
559         ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
560         if (ret && ret != -ENOENT)
561                 return ret;
562
563         if (fsck_err_on(ret, c, "lost+found missing"))
564                 goto create_lostfound;
565
566         if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
567                         "lost+found inode not a directory"))
568                 goto create_lostfound;
569
570         return 0;
571 fsck_err:
572         return ret;
573 create_lostfound:
574         root_inode->bi_nlink++;
575
576         bch2_inode_pack(&packed, root_inode);
577
578         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
579                                 NULL, NULL, NULL, BTREE_INSERT_NOFAIL);
580         if (ret)
581                 return ret;
582
583         bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
584                         0, root_inode);
585
586         ret = bch2_inode_create(c, lostfound_inode, BLOCKDEV_INODE_MAX, 0,
587                                &c->unused_inode_hint);
588         if (ret)
589                 return ret;
590
591         ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR,
592                                  &lostfound, lostfound_inode->bi_inum, NULL,
593                                  BTREE_INSERT_NOFAIL);
594         if (ret)
595                 return ret;
596
597         return 0;
598 }
599
600 struct inode_bitmap {
601         unsigned long   *bits;
602         size_t          size;
603 };
604
605 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
606 {
607         return nr < b->size ? test_bit(nr, b->bits) : false;
608 }
609
610 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
611 {
612         if (nr >= b->size) {
613                 size_t new_size = max(max(PAGE_SIZE * 8,
614                                           b->size * 2),
615                                           nr + 1);
616                 void *n;
617
618                 new_size = roundup_pow_of_two(new_size);
619                 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
620                 if (!n) {
621                         return -ENOMEM;
622                 }
623
624                 b->bits = n;
625                 b->size = new_size;
626         }
627
628         __set_bit(nr, b->bits);
629         return 0;
630 }
631
632 struct pathbuf {
633         size_t          nr;
634         size_t          size;
635
636         struct pathbuf_entry {
637                 u64     inum;
638                 u64     offset;
639         }               *entries;
640 };
641
642 static int path_down(struct pathbuf *p, u64 inum)
643 {
644         if (p->nr == p->size) {
645                 size_t new_size = max(256UL, p->size * 2);
646                 void *n = krealloc(p->entries,
647                                    new_size * sizeof(p->entries[0]),
648                                    GFP_KERNEL);
649                 if (!n)
650                         return -ENOMEM;
651
652                 p->entries = n;
653                 p->size = new_size;
654         };
655
656         p->entries[p->nr++] = (struct pathbuf_entry) {
657                 .inum = inum,
658                 .offset = 0,
659         };
660         return 0;
661 }
662
663 noinline_for_stack
664 static int check_directory_structure(struct bch_fs *c,
665                                      struct bch_inode_unpacked *lostfound_inode)
666 {
667         struct inode_bitmap dirs_done = { NULL, 0 };
668         struct pathbuf path = { 0, 0, NULL };
669         struct pathbuf_entry *e;
670         struct btree_iter iter;
671         struct bkey_s_c k;
672         struct bkey_s_c_dirent dirent;
673         bool had_unreachable;
674         u64 d_inum;
675         int ret = 0;
676
677         /* DFS: */
678 restart_dfs:
679         had_unreachable = false;
680
681         ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
682         if (ret) {
683                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
684                 goto err;
685         }
686
687         ret = path_down(&path, BCACHEFS_ROOT_INO);
688         if (ret) {
689                 return ret;
690         }
691
692         while (path.nr) {
693 next:
694                 e = &path.entries[path.nr - 1];
695
696                 if (e->offset == U64_MAX)
697                         goto up;
698
699                 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
700                                    POS(e->inum, e->offset + 1), 0, k) {
701                         if (k.k->p.inode != e->inum)
702                                 break;
703
704                         e->offset = k.k->p.offset;
705
706                         if (k.k->type != BCH_DIRENT)
707                                 continue;
708
709                         dirent = bkey_s_c_to_dirent(k);
710
711                         if (dirent.v->d_type != DT_DIR)
712                                 continue;
713
714                         d_inum = le64_to_cpu(dirent.v->d_inum);
715
716                         if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
717                                         "directory %llu has multiple hardlinks",
718                                         d_inum)) {
719                                 ret = remove_dirent(c, &iter, dirent);
720                                 if (ret)
721                                         goto err;
722                                 continue;
723                         }
724
725                         ret = inode_bitmap_set(&dirs_done, d_inum);
726                         if (ret) {
727                                 bch_err(c, "memory allocation failure in inode_bitmap_set()");
728                                 goto err;
729                         }
730
731                         ret = path_down(&path, d_inum);
732                         if (ret) {
733                                 goto err;
734                         }
735
736                         bch2_btree_iter_unlock(&iter);
737                         goto next;
738                 }
739                 ret = bch2_btree_iter_unlock(&iter);
740                 if (ret) {
741                         bch_err(c, "btree error %i in fsck", ret);
742                         goto err;
743                 }
744 up:
745                 path.nr--;
746         }
747
748         for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
749                 if (k.k->type != BCH_INODE_FS ||
750                     !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
751                         continue;
752
753                 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
754                                 "unreachable directory found (inum %llu)",
755                                 k.k->p.inode)) {
756                         bch2_btree_iter_unlock(&iter);
757
758                         ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
759                         if (ret) {
760                                 goto err;
761                         }
762
763                         had_unreachable = true;
764                 }
765         }
766         ret = bch2_btree_iter_unlock(&iter);
767         if (ret)
768                 goto err;
769
770         if (had_unreachable) {
771                 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
772                 kfree(dirs_done.bits);
773                 kfree(path.entries);
774                 memset(&dirs_done, 0, sizeof(dirs_done));
775                 memset(&path, 0, sizeof(path));
776                 goto restart_dfs;
777         }
778
779 out:
780         kfree(dirs_done.bits);
781         kfree(path.entries);
782         return ret;
783 err:
784 fsck_err:
785         ret = bch2_btree_iter_unlock(&iter) ?: ret;
786         goto out;
787 }
788
789 struct nlink {
790         u32     count;
791         u32     dir_count;
792 };
793
794 typedef GENRADIX(struct nlink) nlink_table;
795
796 static void inc_link(struct bch_fs *c, nlink_table *links,
797                      u64 range_start, u64 *range_end,
798                      u64 inum, bool dir)
799 {
800         struct nlink *link;
801
802         if (inum < range_start || inum >= *range_end)
803                 return;
804
805         link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
806         if (!link) {
807                 bch_verbose(c, "allocation failed during fs gc - will need another pass");
808                 *range_end = inum;
809                 return;
810         }
811
812         if (dir)
813                 link->dir_count++;
814         else
815                 link->count++;
816 }
817
818 noinline_for_stack
819 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
820                                u64 range_start, u64 *range_end)
821 {
822         struct btree_iter iter;
823         struct bkey_s_c k;
824         struct bkey_s_c_dirent d;
825         u64 d_inum;
826         int ret;
827
828         inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
829
830         for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
831                 switch (k.k->type) {
832                 case BCH_DIRENT:
833                         d = bkey_s_c_to_dirent(k);
834                         d_inum = le64_to_cpu(d.v->d_inum);
835
836                         if (d.v->d_type == DT_DIR)
837                                 inc_link(c, links, range_start, range_end,
838                                          d.k->p.inode, true);
839
840                         inc_link(c, links, range_start, range_end,
841                                  d_inum, false);
842
843                         break;
844                 }
845
846                 bch2_btree_iter_cond_resched(&iter);
847         }
848         ret = bch2_btree_iter_unlock(&iter);
849         if (ret)
850                 bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
851
852         return ret;
853 }
854
855 s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
856 {
857         struct btree_iter iter;
858         struct bkey_s_c k;
859         u64 sectors = 0;
860
861         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
862                 if (k.k->p.inode != inum)
863                         break;
864
865                 if (bkey_extent_is_allocation(k.k))
866                         sectors += k.k->size;
867         }
868
869         return bch2_btree_iter_unlock(&iter) ?: sectors;
870 }
871
872 static int bch2_gc_do_inode(struct bch_fs *c,
873                            struct bch_inode_unpacked *lostfound_inode,
874                            struct btree_iter *iter,
875                            struct bkey_s_c_inode inode, struct nlink link)
876 {
877         struct bch_inode_unpacked u;
878         int ret = 0;
879         u32 i_nlink, real_i_nlink;
880         bool do_update = false;
881
882         ret = bch2_inode_unpack(inode, &u);
883         if (bch2_fs_inconsistent_on(ret, c,
884                          "error unpacking inode %llu in fsck",
885                          inode.k->p.inode))
886                 return ret;
887
888         i_nlink = u.bi_nlink + nlink_bias(u.bi_mode);
889
890         fsck_err_on(i_nlink < link.count, c,
891                     "inode %llu i_link too small (%u < %u, type %i)",
892                     inode.k->p.inode, i_nlink,
893                     link.count, mode_to_type(u.bi_mode));
894
895         /* These should have been caught/fixed by earlier passes: */
896         if (S_ISDIR(u.bi_mode)) {
897                 need_fsck_err_on(link.count > 1, c,
898                         "directory %llu with multiple hardlinks: %u",
899                         inode.k->p.inode, link.count);
900
901                 real_i_nlink = link.count * 2 + link.dir_count;
902         } else {
903                 need_fsck_err_on(link.dir_count, c,
904                         "found dirents for non directory %llu",
905                         inode.k->p.inode);
906
907                 real_i_nlink = link.count + link.dir_count;
908         }
909
910         if (!link.count) {
911                 fsck_err_on(c->sb.clean, c,
912                             "filesystem marked clean, "
913                             "but found orphaned inode %llu",
914                             inode.k->p.inode);
915
916                 if (fsck_err_on(S_ISDIR(u.bi_mode) &&
917                                 bch2_empty_dir(c, inode.k->p.inode), c,
918                                 "non empty directory with link count 0, "
919                                 "inode nlink %u, dir links found %u",
920                                 i_nlink, link.dir_count)) {
921                         ret = reattach_inode(c, lostfound_inode,
922                                              inode.k->p.inode);
923                         if (ret)
924                                 return ret;
925                 }
926
927                 bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
928
929                 ret = bch2_inode_rm(c, inode.k->p.inode);
930                 if (ret)
931                         bch_err(c, "error in fs gc: error %i "
932                                 "while deleting inode", ret);
933                 return ret;
934         }
935
936         if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY) {
937                 fsck_err_on(c->sb.clean, c,
938                             "filesystem marked clean, "
939                             "but inode %llu has i_size dirty",
940                             inode.k->p.inode);
941
942                 bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
943
944                 /*
945                  * XXX: need to truncate partial blocks too here - or ideally
946                  * just switch units to bytes and that issue goes away
947                  */
948
949                 ret = bch2_inode_truncate(c, inode.k->p.inode,
950                                 round_up(u.bi_size, PAGE_SIZE) >> 9,
951                                 NULL, NULL);
952                 if (ret) {
953                         bch_err(c, "error in fs gc: error %i "
954                                 "truncating inode", ret);
955                         return ret;
956                 }
957
958                 /*
959                  * We truncated without our normal sector accounting hook, just
960                  * make sure we recalculate it:
961                  */
962                 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
963
964                 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
965                 do_update = true;
966         }
967
968         if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY) {
969                 s64 sectors;
970
971                 fsck_err_on(c->sb.clean, c,
972                             "filesystem marked clean, "
973                             "but inode %llu has i_sectors dirty",
974                             inode.k->p.inode);
975
976                 bch_verbose(c, "recounting sectors for inode %llu",
977                             inode.k->p.inode);
978
979                 sectors = bch2_count_inode_sectors(c, inode.k->p.inode);
980                 if (sectors < 0) {
981                         bch_err(c, "error in fs gc: error %i "
982                                 "recounting inode sectors",
983                                 (int) sectors);
984                         return sectors;
985                 }
986
987                 u.bi_sectors = sectors;
988                 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
989                 do_update = true;
990         }
991
992         if (i_nlink != real_i_nlink) {
993                 fsck_err_on(c->sb.clean, c,
994                             "filesystem marked clean, "
995                             "but inode %llu has wrong i_nlink "
996                             "(type %u i_nlink %u, should be %u)",
997                             inode.k->p.inode, mode_to_type(u.bi_mode),
998                             i_nlink, real_i_nlink);
999
1000                 bch_verbose(c, "setting inode %llu nlinks from %u to %u",
1001                             inode.k->p.inode, i_nlink, real_i_nlink);
1002                 u.bi_nlink = real_i_nlink - nlink_bias(u.bi_mode);
1003                 do_update = true;
1004         }
1005
1006         if (do_update) {
1007                 struct bkey_inode_buf p;
1008
1009                 bch2_inode_pack(&p, &u);
1010
1011                 ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
1012                                           BTREE_INSERT_NOFAIL,
1013                                           BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
1014                 if (ret && ret != -EINTR)
1015                         bch_err(c, "error in fs gc: error %i "
1016                                 "updating inode", ret);
1017         }
1018 fsck_err:
1019         return ret;
1020 }
1021
1022 noinline_for_stack
1023 static int bch2_gc_walk_inodes(struct bch_fs *c,
1024                               struct bch_inode_unpacked *lostfound_inode,
1025                               nlink_table *links,
1026                               u64 range_start, u64 range_end)
1027 {
1028         struct btree_iter iter;
1029         struct bkey_s_c k;
1030         struct nlink *link, zero_links = { 0, 0 };
1031         struct genradix_iter nlinks_iter;
1032         int ret = 0, ret2 = 0;
1033         u64 nlinks_pos;
1034
1035         bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
1036         nlinks_iter = genradix_iter_init(links, 0);
1037
1038         while ((k = bch2_btree_iter_peek(&iter)).k &&
1039                !btree_iter_err(k)) {
1040 peek_nlinks:    link = genradix_iter_peek(&nlinks_iter, links);
1041
1042                 if (!link && (!k.k || iter.pos.inode >= range_end))
1043                         break;
1044
1045                 nlinks_pos = range_start + nlinks_iter.pos;
1046                 if (iter.pos.inode > nlinks_pos) {
1047                         /* Should have been caught by dirents pass: */
1048                         need_fsck_err_on(link && link->count, c,
1049                                 "missing inode %llu (nlink %u)",
1050                                 nlinks_pos, link->count);
1051                         genradix_iter_advance(&nlinks_iter, links);
1052                         goto peek_nlinks;
1053                 }
1054
1055                 if (iter.pos.inode < nlinks_pos || !link)
1056                         link = &zero_links;
1057
1058                 if (k.k && k.k->type == BCH_INODE_FS) {
1059                         /*
1060                          * Avoid potential deadlocks with iter for
1061                          * truncate/rm/etc.:
1062                          */
1063                         bch2_btree_iter_unlock(&iter);
1064
1065                         ret = bch2_gc_do_inode(c, lostfound_inode, &iter,
1066                                               bkey_s_c_to_inode(k), *link);
1067                         if (ret == -EINTR)
1068                                 continue;
1069                         if (ret)
1070                                 break;
1071
1072                         if (link->count)
1073                                 atomic_long_inc(&c->nr_inodes);
1074                 } else {
1075                         /* Should have been caught by dirents pass: */
1076                         need_fsck_err_on(link->count, c,
1077                                 "missing inode %llu (nlink %u)",
1078                                 nlinks_pos, link->count);
1079                 }
1080
1081                 if (nlinks_pos == iter.pos.inode)
1082                         genradix_iter_advance(&nlinks_iter, links);
1083
1084                 bch2_btree_iter_next(&iter);
1085                 bch2_btree_iter_cond_resched(&iter);
1086         }
1087 fsck_err:
1088         ret2 = bch2_btree_iter_unlock(&iter);
1089         if (ret2)
1090                 bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
1091
1092         return ret ?: ret2;
1093 }
1094
1095 noinline_for_stack
1096 static int check_inode_nlinks(struct bch_fs *c,
1097                               struct bch_inode_unpacked *lostfound_inode)
1098 {
1099         nlink_table links;
1100         u64 this_iter_range_start, next_iter_range_start = 0;
1101         int ret = 0;
1102
1103         genradix_init(&links);
1104
1105         do {
1106                 this_iter_range_start = next_iter_range_start;
1107                 next_iter_range_start = U64_MAX;
1108
1109                 ret = bch2_gc_walk_dirents(c, &links,
1110                                           this_iter_range_start,
1111                                           &next_iter_range_start);
1112                 if (ret)
1113                         break;
1114
1115                 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
1116                                          this_iter_range_start,
1117                                          next_iter_range_start);
1118                 if (ret)
1119                         break;
1120
1121                 genradix_free(&links);
1122         } while (next_iter_range_start != U64_MAX);
1123
1124         genradix_free(&links);
1125
1126         return ret;
1127 }
1128
1129 /*
1130  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1131  * Doesn't fix them yet, mainly because they haven't yet been observed:
1132  */
1133 int bch2_fsck(struct bch_fs *c, bool full_fsck)
1134 {
1135         struct bch_inode_unpacked root_inode, lostfound_inode;
1136         int ret;
1137
1138         if (full_fsck) {
1139                 bch_verbose(c, "checking extents");
1140                 ret = check_extents(c);
1141                 if (ret)
1142                         return ret;
1143
1144                 bch_verbose(c, "checking dirents");
1145                 ret = check_dirents(c);
1146                 if (ret)
1147                         return ret;
1148
1149                 bch_verbose(c, "checking xattrs");
1150                 ret = check_xattrs(c);
1151                 if (ret)
1152                         return ret;
1153
1154                 bch_verbose(c, "checking root directory");
1155                 ret = check_root(c, &root_inode);
1156                 if (ret)
1157                         return ret;
1158
1159                 bch_verbose(c, "checking lost+found");
1160                 ret = check_lostfound(c, &root_inode, &lostfound_inode);
1161                 if (ret)
1162                         return ret;
1163
1164                 bch_verbose(c, "checking directory structure");
1165                 ret = check_directory_structure(c, &lostfound_inode);
1166                 if (ret)
1167                         return ret;
1168
1169                 bch_verbose(c, "checking inode nlinks");
1170                 ret = check_inode_nlinks(c, &lostfound_inode);
1171                 if (ret)
1172                         return ret;
1173         } else {
1174                 bch_verbose(c, "checking root directory");
1175                 ret = check_root(c, &root_inode);
1176                 if (ret)
1177                         return ret;
1178
1179                 bch_verbose(c, "checking lost+found");
1180                 ret = check_lostfound(c, &root_inode, &lostfound_inode);
1181                 if (ret)
1182                         return ret;
1183
1184                 bch_verbose(c, "checking inode nlinks");
1185                 ret = check_inode_nlinks(c, &lostfound_inode);
1186                 if (ret)
1187                         return ret;
1188         }
1189
1190         bch2_flush_fsck_errs(c);
1191
1192         return 0;
1193 }