]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fs-gc.c
bcachefs-in-userspace improvements
[bcachefs-tools-debian] / libbcachefs / fs-gc.c
1
2 #include "bcachefs.h"
3 #include "btree_update.h"
4 #include "dirent.h"
5 #include "error.h"
6 #include "fs.h"
7 #include "fs-gc.h"
8 #include "inode.h"
9 #include "keylist.h"
10 #include "super.h"
11
12 #include <linux/dcache.h> /* struct qstr */
13 #include <linux/generic-radix-tree.h>
14
15 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
16
17 static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
18                          struct bkey_s_c_dirent dirent)
19 {
20         struct qstr name;
21         struct bch_inode_unpacked dir_inode;
22         struct bch_hash_info dir_hash_info;
23         u64 dir_inum = dirent.k->p.inode;
24         int ret;
25         char *buf;
26
27         name.len = bch2_dirent_name_bytes(dirent);
28         buf = kmalloc(name.len + 1, GFP_KERNEL);
29         if (!buf)
30                 return -ENOMEM;
31
32         memcpy(buf, dirent.v->d_name, name.len);
33         buf[name.len] = '\0';
34         name.name = buf;
35
36         /* Unlock iter so we don't deadlock, after copying name: */
37         bch2_btree_iter_unlock(iter);
38
39         ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
40         if (ret)
41                 goto err;
42
43         dir_hash_info = bch2_hash_info_init(c, &dir_inode);
44
45         ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
46 err:
47         kfree(buf);
48         return ret;
49 }
50
51 static int reattach_inode(struct bch_fs *c,
52                           struct bch_inode_unpacked *lostfound_inode,
53                           u64 inum)
54 {
55         struct bch_hash_info lostfound_hash_info =
56                 bch2_hash_info_init(c, lostfound_inode);
57         struct bkey_inode_buf packed;
58         char name_buf[20];
59         struct qstr name;
60         int ret;
61
62         snprintf(name_buf, sizeof(name_buf), "%llu", inum);
63         name = (struct qstr) QSTR(name_buf);
64
65         lostfound_inode->i_nlink++;
66
67         bch2_inode_pack(&packed, lostfound_inode);
68
69         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
70                                NULL, NULL, NULL, 0);
71         if (ret)
72                 return ret;
73
74         return bch2_dirent_create(c, lostfound_inode->inum,
75                                  &lostfound_hash_info,
76                                  DT_DIR, &name, inum, NULL, 0);
77 }
78
79 struct inode_walker {
80         bool                    first_this_inode;
81         bool                    have_inode;
82         u64                     cur_inum;
83         struct bch_inode_unpacked inode;
84 };
85
86 static struct inode_walker inode_walker_init(void)
87 {
88         return (struct inode_walker) {
89                 .cur_inum       = -1,
90                 .have_inode     = false,
91         };
92 }
93
94 static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
95 {
96         w->first_this_inode     = inum != w->cur_inum;
97         w->cur_inum             = inum;
98
99         if (w->first_this_inode) {
100                 int ret = bch2_inode_find_by_inum(c, inum, &w->inode);
101
102                 if (ret && ret != -ENOENT)
103                         return ret;
104
105                 w->have_inode = !ret;
106         }
107
108         return 0;
109 }
110
111 /*
112  * Walk extents: verify that extents have a corresponding S_ISREG inode, and
113  * that i_size an i_sectors are consistent
114  */
115 noinline_for_stack
116 static int check_extents(struct bch_fs *c)
117 {
118         struct inode_walker w = inode_walker_init();
119         struct btree_iter iter;
120         struct bkey_s_c k;
121         u64 i_sectors;
122         int ret = 0;
123
124         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
125                            POS(BCACHE_ROOT_INO, 0), k) {
126                 if (k.k->type == KEY_TYPE_DISCARD)
127                         continue;
128
129                 ret = walk_inode(c, &w, k.k->p.inode);
130                 if (ret)
131                         break;
132
133                 unfixable_fsck_err_on(!w.have_inode, c,
134                         "extent type %u for missing inode %llu",
135                         k.k->type, k.k->p.inode);
136
137                 unfixable_fsck_err_on(w.first_this_inode && w.have_inode &&
138                         w.inode.i_sectors !=
139                         (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)),
140                         c, "i_sectors wrong: got %llu, should be %llu",
141                         w.inode.i_sectors, i_sectors);
142
143                 unfixable_fsck_err_on(w.have_inode &&
144                         !S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c,
145                         "extent type %u for non regular file, inode %llu mode %o",
146                         k.k->type, k.k->p.inode, w.inode.i_mode);
147
148                 unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
149                         k.k->p.offset > round_up(w.inode.i_size, PAGE_SIZE) >> 9, c,
150                         "extent type %u offset %llu past end of inode %llu, i_size %llu",
151                         k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size);
152         }
153 fsck_err:
154         return bch2_btree_iter_unlock(&iter) ?: ret;
155 }
156
157 /*
158  * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
159  * validate d_type
160  */
161 noinline_for_stack
162 static int check_dirents(struct bch_fs *c)
163 {
164         struct inode_walker w = inode_walker_init();
165         struct btree_iter iter;
166         struct bkey_s_c k;
167         int ret = 0;
168
169         for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
170                            POS(BCACHE_ROOT_INO, 0), k) {
171                 struct bkey_s_c_dirent d;
172                 struct bch_inode_unpacked target;
173                 bool have_target;
174                 u64 d_inum;
175
176                 ret = walk_inode(c, &w, k.k->p.inode);
177                 if (ret)
178                         break;
179
180                 unfixable_fsck_err_on(!w.have_inode, c,
181                                       "dirent in nonexisting directory %llu",
182                                       k.k->p.inode);
183
184                 unfixable_fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
185                                       "dirent in non directory inode %llu, type %u",
186                                       k.k->p.inode, mode_to_type(w.inode.i_mode));
187
188                 if (k.k->type != BCH_DIRENT)
189                         continue;
190
191                 d = bkey_s_c_to_dirent(k);
192                 d_inum = le64_to_cpu(d.v->d_inum);
193
194                 if (fsck_err_on(d_inum == d.k->p.inode, c,
195                                 "dirent points to own directory")) {
196                         ret = remove_dirent(c, &iter, d);
197                         if (ret)
198                                 goto err;
199                         continue;
200                 }
201
202                 ret = bch2_inode_find_by_inum(c, d_inum, &target);
203                 if (ret && ret != -ENOENT)
204                         break;
205
206                 have_target = !ret;
207                 ret = 0;
208
209                 if (fsck_err_on(!have_target, c,
210                                 "dirent points to missing inode %llu, type %u filename %s",
211                                 d_inum, d.v->d_type, d.v->d_name)) {
212                         ret = remove_dirent(c, &iter, d);
213                         if (ret)
214                                 goto err;
215                         continue;
216                 }
217
218                 if (fsck_err_on(have_target &&
219                                 d.v->d_type !=
220                                 mode_to_type(le16_to_cpu(target.i_mode)), c,
221                                 "incorrect d_type: got %u should be %u, filename %s",
222                                 d.v->d_type,
223                                 mode_to_type(le16_to_cpu(target.i_mode)),
224                                 d.v->d_name)) {
225                         struct bkey_i_dirent *n;
226
227                         n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
228                         if (!n) {
229                                 ret = -ENOMEM;
230                                 goto err;
231                         }
232
233                         bkey_reassemble(&n->k_i, d.s_c);
234                         n->v.d_type = mode_to_type(le16_to_cpu(target.i_mode));
235
236                         ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
237                                         BTREE_INSERT_NOFAIL,
238                                         BTREE_INSERT_ENTRY(&iter, &n->k_i));
239                         kfree(n);
240                         if (ret)
241                                 goto err;
242
243                 }
244         }
245 err:
246 fsck_err:
247         return bch2_btree_iter_unlock(&iter) ?: ret;
248 }
249
250 /*
251  * Walk xattrs: verify that they all have a corresponding inode
252  */
253 noinline_for_stack
254 static int check_xattrs(struct bch_fs *c)
255 {
256         struct inode_walker w = inode_walker_init();
257         struct btree_iter iter;
258         struct bkey_s_c k;
259         int ret = 0;
260
261         for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
262                            POS(BCACHE_ROOT_INO, 0), k) {
263                 ret = walk_inode(c, &w, k.k->p.inode);
264                 if (ret)
265                         break;
266
267                 unfixable_fsck_err_on(!w.have_inode, c,
268                         "xattr for missing inode %llu",
269                         k.k->p.inode);
270         }
271 fsck_err:
272         return bch2_btree_iter_unlock(&iter) ?: ret;
273 }
274
275 /* Get root directory, create if it doesn't exist: */
276 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
277 {
278         struct bkey_inode_buf packed;
279         int ret;
280
281         ret = bch2_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode);
282         if (ret && ret != -ENOENT)
283                 return ret;
284
285         if (fsck_err_on(ret, c, "root directory missing"))
286                 goto create_root;
287
288         if (fsck_err_on(!S_ISDIR(root_inode->i_mode), c,
289                         "root inode not a directory"))
290                 goto create_root;
291
292         return 0;
293 fsck_err:
294         return ret;
295 create_root:
296         bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
297         root_inode->inum = BCACHE_ROOT_INO;
298
299         bch2_inode_pack(&packed, root_inode);
300
301         return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
302                                 NULL, NULL, NULL, 0);
303 }
304
305 /* Get lost+found, create if it doesn't exist: */
306 static int check_lostfound(struct bch_fs *c,
307                            struct bch_inode_unpacked *root_inode,
308                            struct bch_inode_unpacked *lostfound_inode)
309 {
310         struct qstr lostfound = QSTR("lost+found");
311         struct bch_hash_info root_hash_info =
312                 bch2_hash_info_init(c, root_inode);
313         struct bkey_inode_buf packed;
314         u64 inum;
315         int ret;
316
317         inum = bch2_dirent_lookup(c, BCACHE_ROOT_INO, &root_hash_info,
318                                  &lostfound);
319         if (!inum) {
320                 bch_notice(c, "creating lost+found");
321                 goto create_lostfound;
322         }
323
324         ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
325         if (ret && ret != -ENOENT)
326                 return ret;
327
328         if (fsck_err_on(ret, c, "lost+found missing"))
329                 goto create_lostfound;
330
331         if (fsck_err_on(!S_ISDIR(lostfound_inode->i_mode), c,
332                         "lost+found inode not a directory"))
333                 goto create_lostfound;
334
335         return 0;
336 fsck_err:
337         return ret;
338 create_lostfound:
339         root_inode->i_nlink++;
340
341         bch2_inode_pack(&packed, root_inode);
342
343         ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
344                                NULL, NULL, NULL, 0);
345         if (ret)
346                 return ret;
347
348         bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
349         bch2_inode_pack(&packed, lostfound_inode);
350
351         ret = bch2_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
352                                &c->unused_inode_hint);
353         if (ret)
354                 return ret;
355
356         lostfound_inode->inum = packed.inode.k.p.inode;
357
358         ret = bch2_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR,
359                                 &lostfound, lostfound_inode->inum, NULL, 0);
360         if (ret)
361                 return ret;
362
363         return 0;
364 }
365
366 struct inode_bitmap {
367         unsigned long   *bits;
368         size_t          size;
369 };
370
371 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
372 {
373         return nr < b->size ? test_bit(nr, b->bits) : false;
374 }
375
376 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
377 {
378         if (nr >= b->size) {
379                 size_t new_size = max(max(PAGE_SIZE * 8,
380                                           b->size * 2),
381                                           nr + 1);
382                 void *n;
383
384                 new_size = roundup_pow_of_two(new_size);
385                 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
386                 if (!n)
387                         return -ENOMEM;
388
389                 b->bits = n;
390                 b->size = new_size;
391         }
392
393         __set_bit(nr, b->bits);
394         return 0;
395 }
396
397 struct pathbuf {
398         size_t          nr;
399         size_t          size;
400
401         struct pathbuf_entry {
402                 u64     inum;
403                 u64     offset;
404         }               *entries;
405 };
406
407 static int path_down(struct pathbuf *p, u64 inum)
408 {
409         if (p->nr == p->size) {
410                 size_t new_size = max(256UL, p->size * 2);
411                 void *n = krealloc(p->entries,
412                                    new_size * sizeof(p->entries[0]),
413                                    GFP_KERNEL);
414                 if (!n)
415                         return -ENOMEM;
416
417                 p->entries = n;
418                 p->size = new_size;
419         };
420
421         p->entries[p->nr++] = (struct pathbuf_entry) {
422                 .inum = inum,
423                 .offset = 0,
424         };
425         return 0;
426 }
427
428 noinline_for_stack
429 static int check_directory_structure(struct bch_fs *c,
430                                      struct bch_inode_unpacked *lostfound_inode)
431 {
432         struct inode_bitmap dirs_done = { NULL, 0 };
433         struct pathbuf path = { 0, 0, NULL };
434         struct pathbuf_entry *e;
435         struct btree_iter iter;
436         struct bkey_s_c k;
437         struct bkey_s_c_dirent dirent;
438         bool had_unreachable;
439         u64 d_inum;
440         int ret = 0;
441
442         /* DFS: */
443 restart_dfs:
444         ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
445         if (ret)
446                 goto err;
447
448         ret = path_down(&path, BCACHE_ROOT_INO);
449         if (ret)
450                 return ret;
451
452         while (path.nr) {
453 next:
454                 e = &path.entries[path.nr - 1];
455
456                 if (e->offset == U64_MAX)
457                         goto up;
458
459                 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
460                                    POS(e->inum, e->offset + 1), k) {
461                         if (k.k->p.inode != e->inum)
462                                 break;
463
464                         e->offset = k.k->p.offset;
465
466                         if (k.k->type != BCH_DIRENT)
467                                 continue;
468
469                         dirent = bkey_s_c_to_dirent(k);
470
471                         if (dirent.v->d_type != DT_DIR)
472                                 continue;
473
474                         d_inum = le64_to_cpu(dirent.v->d_inum);
475
476                         if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
477                                         "directory with multiple hardlinks")) {
478                                 ret = remove_dirent(c, &iter, dirent);
479                                 if (ret)
480                                         goto err;
481                                 continue;
482                         }
483
484                         ret = inode_bitmap_set(&dirs_done, d_inum);
485                         if (ret)
486                                 goto err;
487
488                         ret = path_down(&path, d_inum);
489                         if (ret)
490                                 goto err;
491
492                         bch2_btree_iter_unlock(&iter);
493                         goto next;
494                 }
495                 ret = bch2_btree_iter_unlock(&iter);
496                 if (ret)
497                         goto err;
498 up:
499                 path.nr--;
500         }
501
502         had_unreachable = false;
503
504         for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
505                 if (k.k->type != BCH_INODE_FS ||
506                     !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
507                         continue;
508
509                 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
510                                 "unreachable directory found (inum %llu)",
511                                 k.k->p.inode)) {
512                         bch2_btree_iter_unlock(&iter);
513
514                         ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
515                         if (ret)
516                                 goto err;
517
518                         had_unreachable = true;
519                 }
520         }
521         ret = bch2_btree_iter_unlock(&iter);
522         if (ret)
523                 goto err;
524
525         if (had_unreachable) {
526                 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
527                 kfree(dirs_done.bits);
528                 kfree(path.entries);
529                 memset(&dirs_done, 0, sizeof(dirs_done));
530                 memset(&path, 0, sizeof(path));
531                 goto restart_dfs;
532         }
533
534 out:
535         kfree(dirs_done.bits);
536         kfree(path.entries);
537         return ret;
538 err:
539 fsck_err:
540         ret = bch2_btree_iter_unlock(&iter) ?: ret;
541         goto out;
542 }
543
544 struct nlink {
545         u32     count;
546         u32     dir_count;
547 };
548
549 typedef GENRADIX(struct nlink) nlink_table;
550
551 static void inc_link(struct bch_fs *c, nlink_table *links,
552                      u64 range_start, u64 *range_end,
553                      u64 inum, bool dir)
554 {
555         struct nlink *link;
556
557         if (inum < range_start || inum >= *range_end)
558                 return;
559
560         link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
561         if (!link) {
562                 bch_verbose(c, "allocation failed during fs gc - will need another pass");
563                 *range_end = inum;
564                 return;
565         }
566
567         if (dir)
568                 link->dir_count++;
569         else
570                 link->count++;
571 }
572
573 noinline_for_stack
574 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
575                                u64 range_start, u64 *range_end)
576 {
577         struct btree_iter iter;
578         struct bkey_s_c k;
579         struct bkey_s_c_dirent d;
580         u64 d_inum;
581         int ret;
582
583         inc_link(c, links, range_start, range_end, BCACHE_ROOT_INO, false);
584
585         for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) {
586                 switch (k.k->type) {
587                 case BCH_DIRENT:
588                         d = bkey_s_c_to_dirent(k);
589                         d_inum = le64_to_cpu(d.v->d_inum);
590
591                         if (d.v->d_type == DT_DIR)
592                                 inc_link(c, links, range_start, range_end,
593                                          d.k->p.inode, true);
594
595                         inc_link(c, links, range_start, range_end,
596                                  d_inum, false);
597
598                         break;
599                 }
600
601                 bch2_btree_iter_cond_resched(&iter);
602         }
603         ret = bch2_btree_iter_unlock(&iter);
604         if (ret)
605                 bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
606
607         return ret;
608 }
609
610 s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
611 {
612         struct btree_iter iter;
613         struct bkey_s_c k;
614         u64 sectors = 0;
615
616         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) {
617                 if (k.k->p.inode != inum)
618                         break;
619
620                 if (bkey_extent_is_allocation(k.k))
621                         sectors += k.k->size;
622         }
623
624         return bch2_btree_iter_unlock(&iter) ?: sectors;
625 }
626
627 static int bch2_gc_do_inode(struct bch_fs *c,
628                            struct bch_inode_unpacked *lostfound_inode,
629                            struct btree_iter *iter,
630                            struct bkey_s_c_inode inode, struct nlink link)
631 {
632         struct bch_inode_unpacked u;
633         int ret = 0;
634         u32 i_nlink, real_i_nlink;
635         bool do_update = false;
636
637         ret = bch2_inode_unpack(inode, &u);
638         if (bch2_fs_inconsistent_on(ret, c,
639                          "error unpacking inode %llu in fs-gc",
640                          inode.k->p.inode))
641                 return ret;
642
643         i_nlink = u.i_nlink + nlink_bias(u.i_mode);
644
645         fsck_err_on(i_nlink < link.count, c,
646                     "inode %llu i_link too small (%u < %u, type %i)",
647                     inode.k->p.inode, i_nlink,
648                     link.count, mode_to_type(u.i_mode));
649
650         /* These should have been caught/fixed by earlier passes: */
651         if (S_ISDIR(u.i_mode)) {
652                 need_fsck_err_on(link.count > 1, c,
653                         "directory %llu with multiple hardlinks: %u",
654                         inode.k->p.inode, link.count);
655
656                 real_i_nlink = link.count * 2 + link.dir_count;
657         } else {
658                 need_fsck_err_on(link.dir_count, c,
659                         "found dirents for non directory %llu",
660                         inode.k->p.inode);
661
662                 real_i_nlink = link.count + link.dir_count;
663         }
664
665         if (!link.count) {
666                 fsck_err_on(c->sb.clean, c,
667                             "filesystem marked clean, "
668                             "but found orphaned inode %llu",
669                             inode.k->p.inode);
670
671                 if (fsck_err_on(S_ISDIR(u.i_mode) &&
672                                 bch2_empty_dir(c, inode.k->p.inode), c,
673                                 "non empty directory with link count 0, "
674                                 "inode nlink %u, dir links found %u",
675                                 i_nlink, link.dir_count)) {
676                         ret = reattach_inode(c, lostfound_inode,
677                                              inode.k->p.inode);
678                         if (ret)
679                                 return ret;
680                 }
681
682                 bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
683
684                 ret = bch2_inode_rm(c, inode.k->p.inode);
685                 if (ret)
686                         bch_err(c, "error in fs gc: error %i "
687                                 "while deleting inode", ret);
688                 return ret;
689         }
690
691         if (u.i_flags & BCH_INODE_I_SIZE_DIRTY) {
692                 fsck_err_on(c->sb.clean, c,
693                             "filesystem marked clean, "
694                             "but inode %llu has i_size dirty",
695                             inode.k->p.inode);
696
697                 bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
698
699                 /*
700                  * XXX: need to truncate partial blocks too here - or ideally
701                  * just switch units to bytes and that issue goes away
702                  */
703
704                 ret = bch2_inode_truncate(c, inode.k->p.inode,
705                                 round_up(u.i_size, PAGE_SIZE) >> 9,
706                                 NULL, NULL);
707                 if (ret) {
708                         bch_err(c, "error in fs gc: error %i "
709                                 "truncating inode", ret);
710                         return ret;
711                 }
712
713                 /*
714                  * We truncated without our normal sector accounting hook, just
715                  * make sure we recalculate it:
716                  */
717                 u.i_flags |= BCH_INODE_I_SECTORS_DIRTY;
718
719                 u.i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
720                 do_update = true;
721         }
722
723         if (u.i_flags & BCH_INODE_I_SECTORS_DIRTY) {
724                 s64 sectors;
725
726                 fsck_err_on(c->sb.clean, c,
727                             "filesystem marked clean, "
728                             "but inode %llu has i_sectors dirty",
729                             inode.k->p.inode);
730
731                 bch_verbose(c, "recounting sectors for inode %llu",
732                             inode.k->p.inode);
733
734                 sectors = bch2_count_inode_sectors(c, inode.k->p.inode);
735                 if (sectors < 0) {
736                         bch_err(c, "error in fs gc: error %i "
737                                 "recounting inode sectors",
738                                 (int) sectors);
739                         return sectors;
740                 }
741
742                 u.i_sectors = sectors;
743                 u.i_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
744                 do_update = true;
745         }
746
747         if (i_nlink != real_i_nlink) {
748                 fsck_err_on(c->sb.clean, c,
749                             "filesystem marked clean, "
750                             "but inode %llu has wrong i_nlink "
751                             "(type %u i_nlink %u, should be %u)",
752                             inode.k->p.inode, mode_to_type(u.i_mode),
753                             i_nlink, real_i_nlink);
754
755                 bch_verbose(c, "setting inode %llu nlinks from %u to %u",
756                             inode.k->p.inode, i_nlink, real_i_nlink);
757                 u.i_nlink = real_i_nlink - nlink_bias(u.i_mode);;
758                 do_update = true;
759         }
760
761         if (do_update) {
762                 struct bkey_inode_buf p;
763
764                 bch2_inode_pack(&p, &u);
765
766                 ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
767                                           BTREE_INSERT_NOFAIL,
768                                           BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
769                 if (ret && ret != -EINTR)
770                         bch_err(c, "error in fs gc: error %i "
771                                 "updating inode", ret);
772         }
773 fsck_err:
774         return ret;
775 }
776
777 noinline_for_stack
778 static int bch2_gc_walk_inodes(struct bch_fs *c,
779                               struct bch_inode_unpacked *lostfound_inode,
780                               nlink_table *links,
781                               u64 range_start, u64 range_end)
782 {
783         struct btree_iter iter;
784         struct bkey_s_c k;
785         struct nlink *link, zero_links = { 0, 0 };
786         struct genradix_iter nlinks_iter;
787         int ret = 0, ret2 = 0;
788         u64 nlinks_pos;
789
790         bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0));
791         genradix_iter_init(&nlinks_iter);
792
793         while ((k = bch2_btree_iter_peek(&iter)).k &&
794                !btree_iter_err(k)) {
795 peek_nlinks:    link = genradix_iter_peek(&nlinks_iter, links);
796
797                 if (!link && (!k.k || iter.pos.inode >= range_end))
798                         break;
799
800                 nlinks_pos = range_start + nlinks_iter.pos;
801                 if (iter.pos.inode > nlinks_pos) {
802                         /* Should have been caught by dirents pass: */
803                         need_fsck_err_on(link && link->count, c,
804                                 "missing inode %llu (nlink %u)",
805                                 nlinks_pos, link->count);
806                         genradix_iter_advance(&nlinks_iter, links);
807                         goto peek_nlinks;
808                 }
809
810                 if (iter.pos.inode < nlinks_pos || !link)
811                         link = &zero_links;
812
813                 if (k.k && k.k->type == BCH_INODE_FS) {
814                         /*
815                          * Avoid potential deadlocks with iter for
816                          * truncate/rm/etc.:
817                          */
818                         bch2_btree_iter_unlock(&iter);
819
820                         ret = bch2_gc_do_inode(c, lostfound_inode, &iter,
821                                               bkey_s_c_to_inode(k), *link);
822                         if (ret == -EINTR)
823                                 continue;
824                         if (ret)
825                                 break;
826
827                         if (link->count)
828                                 atomic_long_inc(&c->nr_inodes);
829                 } else {
830                         /* Should have been caught by dirents pass: */
831                         need_fsck_err_on(link->count, c,
832                                 "missing inode %llu (nlink %u)",
833                                 nlinks_pos, link->count);
834                 }
835
836                 if (nlinks_pos == iter.pos.inode)
837                         genradix_iter_advance(&nlinks_iter, links);
838
839                 bch2_btree_iter_advance_pos(&iter);
840                 bch2_btree_iter_cond_resched(&iter);
841         }
842 fsck_err:
843         ret2 = bch2_btree_iter_unlock(&iter);
844         if (ret2)
845                 bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
846
847         return ret ?: ret2;
848 }
849
850 noinline_for_stack
851 static int check_inode_nlinks(struct bch_fs *c,
852                               struct bch_inode_unpacked *lostfound_inode)
853 {
854         nlink_table links;
855         u64 this_iter_range_start, next_iter_range_start = 0;
856         int ret = 0;
857
858         genradix_init(&links);
859
860         do {
861                 this_iter_range_start = next_iter_range_start;
862                 next_iter_range_start = U64_MAX;
863
864                 ret = bch2_gc_walk_dirents(c, &links,
865                                           this_iter_range_start,
866                                           &next_iter_range_start);
867                 if (ret)
868                         break;
869
870                 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
871                                          this_iter_range_start,
872                                          next_iter_range_start);
873                 if (ret)
874                         break;
875
876                 genradix_free(&links);
877         } while (next_iter_range_start != U64_MAX);
878
879         genradix_free(&links);
880
881         return ret;
882 }
883
884 /*
885  * Checks for inconsistencies that shouldn't happen, unless we have a bug.
886  * Doesn't fix them yet, mainly because they haven't yet been observed:
887  */
888 int bch2_fsck(struct bch_fs *c, bool full_fsck)
889 {
890         struct bch_inode_unpacked root_inode, lostfound_inode;
891         int ret;
892
893         ret = check_root(c, &root_inode);
894         if (ret)
895                 return ret;
896
897         ret = check_lostfound(c, &root_inode, &lostfound_inode);
898         if (ret)
899                 return ret;
900
901         if (!full_fsck)
902                 goto check_nlinks;
903
904         ret = check_extents(c);
905         if (ret)
906                 return ret;
907
908         ret = check_dirents(c);
909         if (ret)
910                 return ret;
911
912         ret = check_xattrs(c);
913         if (ret)
914                 return ret;
915
916         ret = check_directory_structure(c, &lostfound_inode);
917         if (ret)
918                 return ret;
919 check_nlinks:
920         ret = check_inode_nlinks(c, &lostfound_inode);
921         if (ret)
922                 return ret;
923
924         return 0;
925 }