3 #include "btree_update.h"
12 #include <linux/dcache.h> /* struct qstr */
13 #include <linux/generic-radix-tree.h>
15 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
17 static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
18 struct bkey_s_c_dirent dirent)
21 struct bch_inode_unpacked dir_inode;
22 struct bch_hash_info dir_hash_info;
23 u64 dir_inum = dirent.k->p.inode;
27 name.len = bch2_dirent_name_bytes(dirent);
28 buf = kmalloc(name.len + 1, GFP_KERNEL);
32 memcpy(buf, dirent.v->d_name, name.len);
36 /* Unlock iter so we don't deadlock, after copying name: */
37 bch2_btree_iter_unlock(iter);
39 ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
43 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
45 ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
51 static int reattach_inode(struct bch_fs *c,
52 struct bch_inode_unpacked *lostfound_inode,
55 struct bch_hash_info lostfound_hash_info =
56 bch2_hash_info_init(c, lostfound_inode);
57 struct bkey_inode_buf packed;
62 snprintf(name_buf, sizeof(name_buf), "%llu", inum);
63 name = (struct qstr) QSTR(name_buf);
65 lostfound_inode->i_nlink++;
67 bch2_inode_pack(&packed, lostfound_inode);
69 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
74 return bch2_dirent_create(c, lostfound_inode->inum,
76 DT_DIR, &name, inum, NULL, 0);
80 bool first_this_inode;
83 struct bch_inode_unpacked inode;
86 static struct inode_walker inode_walker_init(void)
88 return (struct inode_walker) {
94 static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
96 w->first_this_inode = inum != w->cur_inum;
99 if (w->first_this_inode) {
100 int ret = bch2_inode_find_by_inum(c, inum, &w->inode);
102 if (ret && ret != -ENOENT)
105 w->have_inode = !ret;
112 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
113 * that i_size an i_sectors are consistent
116 static int check_extents(struct bch_fs *c)
118 struct inode_walker w = inode_walker_init();
119 struct btree_iter iter;
124 for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
125 POS(BCACHE_ROOT_INO, 0), k) {
126 if (k.k->type == KEY_TYPE_DISCARD)
129 ret = walk_inode(c, &w, k.k->p.inode);
133 unfixable_fsck_err_on(!w.have_inode, c,
134 "extent type %u for missing inode %llu",
135 k.k->type, k.k->p.inode);
137 unfixable_fsck_err_on(w.first_this_inode && w.have_inode &&
139 (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)),
140 c, "i_sectors wrong: got %llu, should be %llu",
141 w.inode.i_sectors, i_sectors);
143 unfixable_fsck_err_on(w.have_inode &&
144 !S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c,
145 "extent type %u for non regular file, inode %llu mode %o",
146 k.k->type, k.k->p.inode, w.inode.i_mode);
148 unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
149 k.k->p.offset > round_up(w.inode.i_size, PAGE_SIZE) >> 9, c,
150 "extent type %u offset %llu past end of inode %llu, i_size %llu",
151 k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size);
154 return bch2_btree_iter_unlock(&iter) ?: ret;
158 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
162 static int check_dirents(struct bch_fs *c)
164 struct inode_walker w = inode_walker_init();
165 struct btree_iter iter;
169 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
170 POS(BCACHE_ROOT_INO, 0), k) {
171 struct bkey_s_c_dirent d;
172 struct bch_inode_unpacked target;
176 ret = walk_inode(c, &w, k.k->p.inode);
180 unfixable_fsck_err_on(!w.have_inode, c,
181 "dirent in nonexisting directory %llu",
184 unfixable_fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
185 "dirent in non directory inode %llu, type %u",
186 k.k->p.inode, mode_to_type(w.inode.i_mode));
188 if (k.k->type != BCH_DIRENT)
191 d = bkey_s_c_to_dirent(k);
192 d_inum = le64_to_cpu(d.v->d_inum);
194 if (fsck_err_on(d_inum == d.k->p.inode, c,
195 "dirent points to own directory")) {
196 ret = remove_dirent(c, &iter, d);
202 ret = bch2_inode_find_by_inum(c, d_inum, &target);
203 if (ret && ret != -ENOENT)
209 if (fsck_err_on(!have_target, c,
210 "dirent points to missing inode %llu, type %u filename %s",
211 d_inum, d.v->d_type, d.v->d_name)) {
212 ret = remove_dirent(c, &iter, d);
218 if (fsck_err_on(have_target &&
220 mode_to_type(le16_to_cpu(target.i_mode)), c,
221 "incorrect d_type: got %u should be %u, filename %s",
223 mode_to_type(le16_to_cpu(target.i_mode)),
225 struct bkey_i_dirent *n;
227 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
233 bkey_reassemble(&n->k_i, d.s_c);
234 n->v.d_type = mode_to_type(le16_to_cpu(target.i_mode));
236 ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
238 BTREE_INSERT_ENTRY(&iter, &n->k_i));
247 return bch2_btree_iter_unlock(&iter) ?: ret;
251 * Walk xattrs: verify that they all have a corresponding inode
254 static int check_xattrs(struct bch_fs *c)
256 struct inode_walker w = inode_walker_init();
257 struct btree_iter iter;
261 for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
262 POS(BCACHE_ROOT_INO, 0), k) {
263 ret = walk_inode(c, &w, k.k->p.inode);
267 unfixable_fsck_err_on(!w.have_inode, c,
268 "xattr for missing inode %llu",
272 return bch2_btree_iter_unlock(&iter) ?: ret;
275 /* Get root directory, create if it doesn't exist: */
276 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
278 struct bkey_inode_buf packed;
281 ret = bch2_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode);
282 if (ret && ret != -ENOENT)
285 if (fsck_err_on(ret, c, "root directory missing"))
288 if (fsck_err_on(!S_ISDIR(root_inode->i_mode), c,
289 "root inode not a directory"))
296 bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
297 root_inode->inum = BCACHE_ROOT_INO;
299 bch2_inode_pack(&packed, root_inode);
301 return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
302 NULL, NULL, NULL, 0);
305 /* Get lost+found, create if it doesn't exist: */
306 static int check_lostfound(struct bch_fs *c,
307 struct bch_inode_unpacked *root_inode,
308 struct bch_inode_unpacked *lostfound_inode)
310 struct qstr lostfound = QSTR("lost+found");
311 struct bch_hash_info root_hash_info =
312 bch2_hash_info_init(c, root_inode);
313 struct bkey_inode_buf packed;
317 inum = bch2_dirent_lookup(c, BCACHE_ROOT_INO, &root_hash_info,
320 bch_notice(c, "creating lost+found");
321 goto create_lostfound;
324 ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
325 if (ret && ret != -ENOENT)
328 if (fsck_err_on(ret, c, "lost+found missing"))
329 goto create_lostfound;
331 if (fsck_err_on(!S_ISDIR(lostfound_inode->i_mode), c,
332 "lost+found inode not a directory"))
333 goto create_lostfound;
339 root_inode->i_nlink++;
341 bch2_inode_pack(&packed, root_inode);
343 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
344 NULL, NULL, NULL, 0);
348 bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
349 bch2_inode_pack(&packed, lostfound_inode);
351 ret = bch2_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
352 &c->unused_inode_hint);
356 lostfound_inode->inum = packed.inode.k.p.inode;
358 ret = bch2_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR,
359 &lostfound, lostfound_inode->inum, NULL, 0);
366 struct inode_bitmap {
371 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
373 return nr < b->size ? test_bit(nr, b->bits) : false;
376 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
379 size_t new_size = max(max(PAGE_SIZE * 8,
384 new_size = roundup_pow_of_two(new_size);
385 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
393 __set_bit(nr, b->bits);
401 struct pathbuf_entry {
407 static int path_down(struct pathbuf *p, u64 inum)
409 if (p->nr == p->size) {
410 size_t new_size = max(256UL, p->size * 2);
411 void *n = krealloc(p->entries,
412 new_size * sizeof(p->entries[0]),
421 p->entries[p->nr++] = (struct pathbuf_entry) {
429 static int check_directory_structure(struct bch_fs *c,
430 struct bch_inode_unpacked *lostfound_inode)
432 struct inode_bitmap dirs_done = { NULL, 0 };
433 struct pathbuf path = { 0, 0, NULL };
434 struct pathbuf_entry *e;
435 struct btree_iter iter;
437 struct bkey_s_c_dirent dirent;
438 bool had_unreachable;
444 ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
448 ret = path_down(&path, BCACHE_ROOT_INO);
454 e = &path.entries[path.nr - 1];
456 if (e->offset == U64_MAX)
459 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
460 POS(e->inum, e->offset + 1), k) {
461 if (k.k->p.inode != e->inum)
464 e->offset = k.k->p.offset;
466 if (k.k->type != BCH_DIRENT)
469 dirent = bkey_s_c_to_dirent(k);
471 if (dirent.v->d_type != DT_DIR)
474 d_inum = le64_to_cpu(dirent.v->d_inum);
476 if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
477 "directory with multiple hardlinks")) {
478 ret = remove_dirent(c, &iter, dirent);
484 ret = inode_bitmap_set(&dirs_done, d_inum);
488 ret = path_down(&path, d_inum);
492 bch2_btree_iter_unlock(&iter);
495 ret = bch2_btree_iter_unlock(&iter);
502 had_unreachable = false;
504 for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
505 if (k.k->type != BCH_INODE_FS ||
506 !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
509 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
510 "unreachable directory found (inum %llu)",
512 bch2_btree_iter_unlock(&iter);
514 ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
518 had_unreachable = true;
521 ret = bch2_btree_iter_unlock(&iter);
525 if (had_unreachable) {
526 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
527 kfree(dirs_done.bits);
529 memset(&dirs_done, 0, sizeof(dirs_done));
530 memset(&path, 0, sizeof(path));
535 kfree(dirs_done.bits);
540 ret = bch2_btree_iter_unlock(&iter) ?: ret;
549 typedef GENRADIX(struct nlink) nlink_table;
551 static void inc_link(struct bch_fs *c, nlink_table *links,
552 u64 range_start, u64 *range_end,
557 if (inum < range_start || inum >= *range_end)
560 link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
562 bch_verbose(c, "allocation failed during fs gc - will need another pass");
574 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
575 u64 range_start, u64 *range_end)
577 struct btree_iter iter;
579 struct bkey_s_c_dirent d;
583 inc_link(c, links, range_start, range_end, BCACHE_ROOT_INO, false);
585 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) {
588 d = bkey_s_c_to_dirent(k);
589 d_inum = le64_to_cpu(d.v->d_inum);
591 if (d.v->d_type == DT_DIR)
592 inc_link(c, links, range_start, range_end,
595 inc_link(c, links, range_start, range_end,
601 bch2_btree_iter_cond_resched(&iter);
603 ret = bch2_btree_iter_unlock(&iter);
605 bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
610 s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
612 struct btree_iter iter;
616 for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) {
617 if (k.k->p.inode != inum)
620 if (bkey_extent_is_allocation(k.k))
621 sectors += k.k->size;
624 return bch2_btree_iter_unlock(&iter) ?: sectors;
627 static int bch2_gc_do_inode(struct bch_fs *c,
628 struct bch_inode_unpacked *lostfound_inode,
629 struct btree_iter *iter,
630 struct bkey_s_c_inode inode, struct nlink link)
632 struct bch_inode_unpacked u;
634 u32 i_nlink, real_i_nlink;
635 bool do_update = false;
637 ret = bch2_inode_unpack(inode, &u);
638 if (bch2_fs_inconsistent_on(ret, c,
639 "error unpacking inode %llu in fs-gc",
643 i_nlink = u.i_nlink + nlink_bias(u.i_mode);
645 fsck_err_on(i_nlink < link.count, c,
646 "inode %llu i_link too small (%u < %u, type %i)",
647 inode.k->p.inode, i_nlink,
648 link.count, mode_to_type(u.i_mode));
650 /* These should have been caught/fixed by earlier passes: */
651 if (S_ISDIR(u.i_mode)) {
652 need_fsck_err_on(link.count > 1, c,
653 "directory %llu with multiple hardlinks: %u",
654 inode.k->p.inode, link.count);
656 real_i_nlink = link.count * 2 + link.dir_count;
658 need_fsck_err_on(link.dir_count, c,
659 "found dirents for non directory %llu",
662 real_i_nlink = link.count + link.dir_count;
666 fsck_err_on(c->sb.clean, c,
667 "filesystem marked clean, "
668 "but found orphaned inode %llu",
671 if (fsck_err_on(S_ISDIR(u.i_mode) &&
672 bch2_empty_dir(c, inode.k->p.inode), c,
673 "non empty directory with link count 0, "
674 "inode nlink %u, dir links found %u",
675 i_nlink, link.dir_count)) {
676 ret = reattach_inode(c, lostfound_inode,
682 bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
684 ret = bch2_inode_rm(c, inode.k->p.inode);
686 bch_err(c, "error in fs gc: error %i "
687 "while deleting inode", ret);
691 if (u.i_flags & BCH_INODE_I_SIZE_DIRTY) {
692 fsck_err_on(c->sb.clean, c,
693 "filesystem marked clean, "
694 "but inode %llu has i_size dirty",
697 bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
700 * XXX: need to truncate partial blocks too here - or ideally
701 * just switch units to bytes and that issue goes away
704 ret = bch2_inode_truncate(c, inode.k->p.inode,
705 round_up(u.i_size, PAGE_SIZE) >> 9,
708 bch_err(c, "error in fs gc: error %i "
709 "truncating inode", ret);
714 * We truncated without our normal sector accounting hook, just
715 * make sure we recalculate it:
717 u.i_flags |= BCH_INODE_I_SECTORS_DIRTY;
719 u.i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
723 if (u.i_flags & BCH_INODE_I_SECTORS_DIRTY) {
726 fsck_err_on(c->sb.clean, c,
727 "filesystem marked clean, "
728 "but inode %llu has i_sectors dirty",
731 bch_verbose(c, "recounting sectors for inode %llu",
734 sectors = bch2_count_inode_sectors(c, inode.k->p.inode);
736 bch_err(c, "error in fs gc: error %i "
737 "recounting inode sectors",
742 u.i_sectors = sectors;
743 u.i_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
747 if (i_nlink != real_i_nlink) {
748 fsck_err_on(c->sb.clean, c,
749 "filesystem marked clean, "
750 "but inode %llu has wrong i_nlink "
751 "(type %u i_nlink %u, should be %u)",
752 inode.k->p.inode, mode_to_type(u.i_mode),
753 i_nlink, real_i_nlink);
755 bch_verbose(c, "setting inode %llu nlinks from %u to %u",
756 inode.k->p.inode, i_nlink, real_i_nlink);
757 u.i_nlink = real_i_nlink - nlink_bias(u.i_mode);;
762 struct bkey_inode_buf p;
764 bch2_inode_pack(&p, &u);
766 ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
768 BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
769 if (ret && ret != -EINTR)
770 bch_err(c, "error in fs gc: error %i "
771 "updating inode", ret);
778 static int bch2_gc_walk_inodes(struct bch_fs *c,
779 struct bch_inode_unpacked *lostfound_inode,
781 u64 range_start, u64 range_end)
783 struct btree_iter iter;
785 struct nlink *link, zero_links = { 0, 0 };
786 struct genradix_iter nlinks_iter;
787 int ret = 0, ret2 = 0;
790 bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0));
791 genradix_iter_init(&nlinks_iter);
793 while ((k = bch2_btree_iter_peek(&iter)).k &&
794 !btree_iter_err(k)) {
795 peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
797 if (!link && (!k.k || iter.pos.inode >= range_end))
800 nlinks_pos = range_start + nlinks_iter.pos;
801 if (iter.pos.inode > nlinks_pos) {
802 /* Should have been caught by dirents pass: */
803 need_fsck_err_on(link && link->count, c,
804 "missing inode %llu (nlink %u)",
805 nlinks_pos, link->count);
806 genradix_iter_advance(&nlinks_iter, links);
810 if (iter.pos.inode < nlinks_pos || !link)
813 if (k.k && k.k->type == BCH_INODE_FS) {
815 * Avoid potential deadlocks with iter for
818 bch2_btree_iter_unlock(&iter);
820 ret = bch2_gc_do_inode(c, lostfound_inode, &iter,
821 bkey_s_c_to_inode(k), *link);
828 atomic_long_inc(&c->nr_inodes);
830 /* Should have been caught by dirents pass: */
831 need_fsck_err_on(link->count, c,
832 "missing inode %llu (nlink %u)",
833 nlinks_pos, link->count);
836 if (nlinks_pos == iter.pos.inode)
837 genradix_iter_advance(&nlinks_iter, links);
839 bch2_btree_iter_advance_pos(&iter);
840 bch2_btree_iter_cond_resched(&iter);
843 ret2 = bch2_btree_iter_unlock(&iter);
845 bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
851 static int check_inode_nlinks(struct bch_fs *c,
852 struct bch_inode_unpacked *lostfound_inode)
855 u64 this_iter_range_start, next_iter_range_start = 0;
858 genradix_init(&links);
861 this_iter_range_start = next_iter_range_start;
862 next_iter_range_start = U64_MAX;
864 ret = bch2_gc_walk_dirents(c, &links,
865 this_iter_range_start,
866 &next_iter_range_start);
870 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
871 this_iter_range_start,
872 next_iter_range_start);
876 genradix_free(&links);
877 } while (next_iter_range_start != U64_MAX);
879 genradix_free(&links);
885 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
886 * Doesn't fix them yet, mainly because they haven't yet been observed:
888 int bch2_fsck(struct bch_fs *c, bool full_fsck)
890 struct bch_inode_unpacked root_inode, lostfound_inode;
893 ret = check_root(c, &root_inode);
897 ret = check_lostfound(c, &root_inode, &lostfound_inode);
904 ret = check_extents(c);
908 ret = check_dirents(c);
912 ret = check_xattrs(c);
916 ret = check_directory_structure(c, &lostfound_inode);
920 ret = check_inode_nlinks(c, &lostfound_inode);