3 #include "btree_update.h"
12 #include <linux/generic-radix-tree.h>
14 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
16 static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
17 struct bkey_s_c_dirent dirent)
20 struct bch_inode_unpacked dir_inode;
21 struct bch_hash_info dir_hash_info;
22 u64 dir_inum = dirent.k->p.inode;
26 name.len = bch_dirent_name_bytes(dirent);
27 buf = kmalloc(name.len + 1, GFP_KERNEL);
31 memcpy(buf, dirent.v->d_name, name.len);
35 /* Unlock iter so we don't deadlock, after copying name: */
36 bch_btree_iter_unlock(iter);
38 ret = bch_inode_find_by_inum(c, dir_inum, &dir_inode);
42 dir_hash_info = bch_hash_info_init(&dir_inode);
44 ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
50 static int reattach_inode(struct bch_fs *c,
51 struct bch_inode_unpacked *lostfound_inode,
54 struct bch_hash_info lostfound_hash_info =
55 bch_hash_info_init(lostfound_inode);
56 struct bkey_inode_buf packed;
61 snprintf(name_buf, sizeof(name_buf), "%llu", inum);
62 name = (struct qstr) QSTR(name_buf);
64 lostfound_inode->i_nlink++;
66 bch_inode_pack(&packed, lostfound_inode);
68 ret = bch_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
73 return bch_dirent_create(c, lostfound_inode->inum,
75 DT_DIR, &name, inum, NULL, 0);
79 bool first_this_inode;
82 struct bch_inode_unpacked inode;
85 static struct inode_walker inode_walker_init(void)
87 return (struct inode_walker) {
93 static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
95 w->first_this_inode = inum != w->cur_inum;
98 if (w->first_this_inode) {
99 int ret = bch_inode_find_by_inum(c, inum, &w->inode);
101 if (ret && ret != -ENOENT)
104 w->have_inode = !ret;
111 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
112 * that i_size an i_sectors are consistent
115 static int check_extents(struct bch_fs *c)
117 struct inode_walker w = inode_walker_init();
118 struct btree_iter iter;
123 for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
124 POS(BCACHE_ROOT_INO, 0), k) {
125 if (k.k->type == KEY_TYPE_DISCARD)
128 ret = walk_inode(c, &w, k.k->p.inode);
132 unfixable_fsck_err_on(!w.have_inode, c,
133 "extent type %u for missing inode %llu",
134 k.k->type, k.k->p.inode);
136 unfixable_fsck_err_on(w.first_this_inode && w.have_inode &&
138 (i_sectors = bch_count_inode_sectors(c, w.cur_inum)),
139 c, "i_sectors wrong: got %llu, should be %llu",
140 w.inode.i_sectors, i_sectors);
142 unfixable_fsck_err_on(w.have_inode &&
143 !S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c,
144 "extent type %u for non regular file, inode %llu mode %o",
145 k.k->type, k.k->p.inode, w.inode.i_mode);
147 unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
148 k.k->p.offset > round_up(w.inode.i_size, PAGE_SIZE) >> 9, c,
149 "extent type %u offset %llu past end of inode %llu, i_size %llu",
150 k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size);
153 return bch_btree_iter_unlock(&iter) ?: ret;
157 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
161 static int check_dirents(struct bch_fs *c)
163 struct inode_walker w = inode_walker_init();
164 struct btree_iter iter;
168 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
169 POS(BCACHE_ROOT_INO, 0), k) {
170 struct bkey_s_c_dirent d;
171 struct bch_inode_unpacked target;
175 ret = walk_inode(c, &w, k.k->p.inode);
179 unfixable_fsck_err_on(!w.have_inode, c,
180 "dirent in nonexisting directory %llu",
183 unfixable_fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
184 "dirent in non directory inode %llu, type %u",
185 k.k->p.inode, mode_to_type(w.inode.i_mode));
187 if (k.k->type != BCH_DIRENT)
190 d = bkey_s_c_to_dirent(k);
191 d_inum = le64_to_cpu(d.v->d_inum);
193 if (fsck_err_on(d_inum == d.k->p.inode, c,
194 "dirent points to own directory")) {
195 ret = remove_dirent(c, &iter, d);
201 ret = bch_inode_find_by_inum(c, d_inum, &target);
202 if (ret && ret != -ENOENT)
208 if (fsck_err_on(!have_target, c,
209 "dirent points to missing inode %llu, type %u filename %s",
210 d_inum, d.v->d_type, d.v->d_name)) {
211 ret = remove_dirent(c, &iter, d);
217 if (fsck_err_on(have_target &&
219 mode_to_type(le16_to_cpu(target.i_mode)), c,
220 "incorrect d_type: got %u should be %u, filename %s",
222 mode_to_type(le16_to_cpu(target.i_mode)),
224 struct bkey_i_dirent *n;
226 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
232 bkey_reassemble(&n->k_i, d.s_c);
233 n->v.d_type = mode_to_type(le16_to_cpu(target.i_mode));
235 ret = bch_btree_insert_at(c, NULL, NULL, NULL,
237 BTREE_INSERT_ENTRY(&iter, &n->k_i));
246 return bch_btree_iter_unlock(&iter) ?: ret;
250 * Walk xattrs: verify that they all have a corresponding inode
253 static int check_xattrs(struct bch_fs *c)
255 struct inode_walker w = inode_walker_init();
256 struct btree_iter iter;
260 for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
261 POS(BCACHE_ROOT_INO, 0), k) {
262 ret = walk_inode(c, &w, k.k->p.inode);
266 unfixable_fsck_err_on(!w.have_inode, c,
267 "xattr for missing inode %llu",
271 return bch_btree_iter_unlock(&iter) ?: ret;
274 /* Get root directory, create if it doesn't exist: */
275 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
277 struct bkey_inode_buf packed;
280 ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode);
281 if (ret && ret != -ENOENT)
284 if (fsck_err_on(ret, c, "root directory missing"))
287 if (fsck_err_on(!S_ISDIR(root_inode->i_mode), c,
288 "root inode not a directory"))
295 bch_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
296 root_inode->inum = BCACHE_ROOT_INO;
298 bch_inode_pack(&packed, root_inode);
300 return bch_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
301 NULL, NULL, NULL, 0);
304 /* Get lost+found, create if it doesn't exist: */
305 static int check_lostfound(struct bch_fs *c,
306 struct bch_inode_unpacked *root_inode,
307 struct bch_inode_unpacked *lostfound_inode)
309 struct qstr lostfound = QSTR("lost+found");
310 struct bch_hash_info root_hash_info =
311 bch_hash_info_init(root_inode);
312 struct bkey_inode_buf packed;
316 inum = bch_dirent_lookup(c, BCACHE_ROOT_INO, &root_hash_info,
319 bch_notice(c, "creating lost+found");
320 goto create_lostfound;
323 ret = bch_inode_find_by_inum(c, inum, lostfound_inode);
324 if (ret && ret != -ENOENT)
327 if (fsck_err_on(ret, c, "lost+found missing"))
328 goto create_lostfound;
330 if (fsck_err_on(!S_ISDIR(lostfound_inode->i_mode), c,
331 "lost+found inode not a directory"))
332 goto create_lostfound;
338 root_inode->i_nlink++;
340 bch_inode_pack(&packed, root_inode);
342 ret = bch_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
343 NULL, NULL, NULL, 0);
347 bch_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
348 bch_inode_pack(&packed, lostfound_inode);
350 ret = bch_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
351 &c->unused_inode_hint);
355 lostfound_inode->inum = packed.inode.k.p.inode;
357 ret = bch_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR,
358 &lostfound, lostfound_inode->inum, NULL, 0);
365 struct inode_bitmap {
370 static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
372 return nr < b->size ? test_bit(nr, b->bits) : false;
375 static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
378 size_t new_size = max(max(PAGE_SIZE * 8,
383 new_size = roundup_pow_of_two(new_size);
384 n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
392 __set_bit(nr, b->bits);
400 struct pathbuf_entry {
406 static int path_down(struct pathbuf *p, u64 inum)
408 if (p->nr == p->size) {
409 size_t new_size = max(256UL, p->size * 2);
410 void *n = krealloc(p->entries,
411 new_size * sizeof(p->entries[0]),
420 p->entries[p->nr++] = (struct pathbuf_entry) {
428 static int check_directory_structure(struct bch_fs *c,
429 struct bch_inode_unpacked *lostfound_inode)
431 struct inode_bitmap dirs_done = { NULL, 0 };
432 struct pathbuf path = { 0, 0, NULL };
433 struct pathbuf_entry *e;
434 struct btree_iter iter;
436 struct bkey_s_c_dirent dirent;
437 bool had_unreachable;
443 ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
447 ret = path_down(&path, BCACHE_ROOT_INO);
453 e = &path.entries[path.nr - 1];
455 if (e->offset == U64_MAX)
458 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
459 POS(e->inum, e->offset + 1), k) {
460 if (k.k->p.inode != e->inum)
463 e->offset = k.k->p.offset;
465 if (k.k->type != BCH_DIRENT)
468 dirent = bkey_s_c_to_dirent(k);
470 if (dirent.v->d_type != DT_DIR)
473 d_inum = le64_to_cpu(dirent.v->d_inum);
475 if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
476 "directory with multiple hardlinks")) {
477 ret = remove_dirent(c, &iter, dirent);
483 ret = inode_bitmap_set(&dirs_done, d_inum);
487 ret = path_down(&path, d_inum);
491 bch_btree_iter_unlock(&iter);
494 ret = bch_btree_iter_unlock(&iter);
501 had_unreachable = false;
503 for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
504 if (k.k->type != BCH_INODE_FS ||
505 !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
508 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
509 "unreachable directory found (inum %llu)",
511 bch_btree_iter_unlock(&iter);
513 ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
517 had_unreachable = true;
520 ret = bch_btree_iter_unlock(&iter);
524 if (had_unreachable) {
525 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
526 kfree(dirs_done.bits);
528 memset(&dirs_done, 0, sizeof(dirs_done));
529 memset(&path, 0, sizeof(path));
534 kfree(dirs_done.bits);
539 ret = bch_btree_iter_unlock(&iter) ?: ret;
548 typedef GENRADIX(struct nlink) nlink_table;
550 static void inc_link(struct bch_fs *c, nlink_table *links,
551 u64 range_start, u64 *range_end,
556 if (inum < range_start || inum >= *range_end)
559 link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
561 bch_verbose(c, "allocation failed during fs gc - will need another pass");
573 static int bch_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
574 u64 range_start, u64 *range_end)
576 struct btree_iter iter;
578 struct bkey_s_c_dirent d;
582 inc_link(c, links, range_start, range_end, BCACHE_ROOT_INO, false);
584 for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) {
587 d = bkey_s_c_to_dirent(k);
588 d_inum = le64_to_cpu(d.v->d_inum);
590 if (d.v->d_type == DT_DIR)
591 inc_link(c, links, range_start, range_end,
594 inc_link(c, links, range_start, range_end,
600 bch_btree_iter_cond_resched(&iter);
602 ret = bch_btree_iter_unlock(&iter);
604 bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
609 s64 bch_count_inode_sectors(struct bch_fs *c, u64 inum)
611 struct btree_iter iter;
615 for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) {
616 if (k.k->p.inode != inum)
619 if (bkey_extent_is_allocation(k.k))
620 sectors += k.k->size;
623 return bch_btree_iter_unlock(&iter) ?: sectors;
626 static int bch_gc_do_inode(struct bch_fs *c,
627 struct bch_inode_unpacked *lostfound_inode,
628 struct btree_iter *iter,
629 struct bkey_s_c_inode inode, struct nlink link)
631 struct bch_inode_unpacked u;
633 u32 i_nlink, real_i_nlink;
634 bool do_update = false;
636 ret = bch_inode_unpack(inode, &u);
637 if (bch_fs_inconsistent_on(ret, c,
638 "error unpacking inode %llu in fs-gc",
642 i_nlink = u.i_nlink + nlink_bias(u.i_mode);
644 fsck_err_on(i_nlink < link.count, c,
645 "inode %llu i_link too small (%u < %u, type %i)",
646 inode.k->p.inode, i_nlink,
647 link.count, mode_to_type(u.i_mode));
649 /* These should have been caught/fixed by earlier passes: */
650 if (S_ISDIR(u.i_mode)) {
651 need_fsck_err_on(link.count > 1, c,
652 "directory %llu with multiple hardlinks: %u",
653 inode.k->p.inode, link.count);
655 real_i_nlink = link.count * 2 + link.dir_count;
657 need_fsck_err_on(link.dir_count, c,
658 "found dirents for non directory %llu",
661 real_i_nlink = link.count + link.dir_count;
665 fsck_err_on(c->sb.clean, c,
666 "filesystem marked clean, "
667 "but found orphaned inode %llu",
670 if (fsck_err_on(S_ISDIR(u.i_mode) &&
671 bch_empty_dir(c, inode.k->p.inode), c,
672 "non empty directory with link count 0, "
673 "inode nlink %u, dir links found %u",
674 i_nlink, link.dir_count)) {
675 ret = reattach_inode(c, lostfound_inode,
681 bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
683 ret = bch_inode_rm(c, inode.k->p.inode);
685 bch_err(c, "error in fs gc: error %i "
686 "while deleting inode", ret);
690 if (u.i_flags & BCH_INODE_I_SIZE_DIRTY) {
691 fsck_err_on(c->sb.clean, c,
692 "filesystem marked clean, "
693 "but inode %llu has i_size dirty",
696 bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
699 * XXX: need to truncate partial blocks too here - or ideally
700 * just switch units to bytes and that issue goes away
703 ret = bch_inode_truncate(c, inode.k->p.inode,
704 round_up(u.i_size, PAGE_SIZE) >> 9,
707 bch_err(c, "error in fs gc: error %i "
708 "truncating inode", ret);
713 * We truncated without our normal sector accounting hook, just
714 * make sure we recalculate it:
716 u.i_flags |= BCH_INODE_I_SECTORS_DIRTY;
718 u.i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
722 if (u.i_flags & BCH_INODE_I_SECTORS_DIRTY) {
725 fsck_err_on(c->sb.clean, c,
726 "filesystem marked clean, "
727 "but inode %llu has i_sectors dirty",
730 bch_verbose(c, "recounting sectors for inode %llu",
733 sectors = bch_count_inode_sectors(c, inode.k->p.inode);
735 bch_err(c, "error in fs gc: error %i "
736 "recounting inode sectors",
741 u.i_sectors = sectors;
742 u.i_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
746 if (i_nlink != real_i_nlink) {
747 fsck_err_on(c->sb.clean, c,
748 "filesystem marked clean, "
749 "but inode %llu has wrong i_nlink "
750 "(type %u i_nlink %u, should be %u)",
751 inode.k->p.inode, mode_to_type(u.i_mode),
752 i_nlink, real_i_nlink);
754 bch_verbose(c, "setting inode %llu nlinks from %u to %u",
755 inode.k->p.inode, i_nlink, real_i_nlink);
756 u.i_nlink = real_i_nlink - nlink_bias(u.i_mode);;
761 struct bkey_inode_buf p;
763 bch_inode_pack(&p, &u);
765 ret = bch_btree_insert_at(c, NULL, NULL, NULL,
767 BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
768 if (ret && ret != -EINTR)
769 bch_err(c, "error in fs gc: error %i "
770 "updating inode", ret);
777 static int bch_gc_walk_inodes(struct bch_fs *c,
778 struct bch_inode_unpacked *lostfound_inode,
780 u64 range_start, u64 range_end)
782 struct btree_iter iter;
784 struct nlink *link, zero_links = { 0, 0 };
785 struct genradix_iter nlinks_iter;
786 int ret = 0, ret2 = 0;
789 bch_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0));
790 genradix_iter_init(&nlinks_iter);
792 while ((k = bch_btree_iter_peek(&iter)).k &&
793 !btree_iter_err(k)) {
794 peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
796 if (!link && (!k.k || iter.pos.inode >= range_end))
799 nlinks_pos = range_start + nlinks_iter.pos;
800 if (iter.pos.inode > nlinks_pos) {
801 /* Should have been caught by dirents pass: */
802 need_fsck_err_on(link && link->count, c,
803 "missing inode %llu (nlink %u)",
804 nlinks_pos, link->count);
805 genradix_iter_advance(&nlinks_iter, links);
809 if (iter.pos.inode < nlinks_pos || !link)
812 if (k.k && k.k->type == BCH_INODE_FS) {
814 * Avoid potential deadlocks with iter for
817 bch_btree_iter_unlock(&iter);
819 ret = bch_gc_do_inode(c, lostfound_inode, &iter,
820 bkey_s_c_to_inode(k), *link);
827 atomic_long_inc(&c->nr_inodes);
829 /* Should have been caught by dirents pass: */
830 need_fsck_err_on(link->count, c,
831 "missing inode %llu (nlink %u)",
832 nlinks_pos, link->count);
835 if (nlinks_pos == iter.pos.inode)
836 genradix_iter_advance(&nlinks_iter, links);
838 bch_btree_iter_advance_pos(&iter);
839 bch_btree_iter_cond_resched(&iter);
842 ret2 = bch_btree_iter_unlock(&iter);
844 bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
850 static int check_inode_nlinks(struct bch_fs *c,
851 struct bch_inode_unpacked *lostfound_inode)
854 u64 this_iter_range_start, next_iter_range_start = 0;
857 genradix_init(&links);
860 this_iter_range_start = next_iter_range_start;
861 next_iter_range_start = U64_MAX;
863 ret = bch_gc_walk_dirents(c, &links,
864 this_iter_range_start,
865 &next_iter_range_start);
869 ret = bch_gc_walk_inodes(c, lostfound_inode, &links,
870 this_iter_range_start,
871 next_iter_range_start);
875 genradix_free(&links);
876 } while (next_iter_range_start != U64_MAX);
878 genradix_free(&links);
884 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
885 * Doesn't fix them yet, mainly because they haven't yet been observed:
887 int bch_fsck(struct bch_fs *c, bool full_fsck)
889 struct bch_inode_unpacked root_inode, lostfound_inode;
892 ret = check_root(c, &root_inode);
896 ret = check_lostfound(c, &root_inode, &lostfound_inode);
903 ret = check_extents(c);
907 ret = check_dirents(c);
911 ret = check_xattrs(c);
915 ret = check_directory_structure(c, &lostfound_inode);
919 ret = check_inode_nlinks(c, &lostfound_inode);