1 // SPDX-License-Identifier: GPL-2.0
5 #include "btree_update.h"
15 #include <linux/bsearch.h>
16 #include <linux/dcache.h> /* struct qstr */
18 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
20 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
22 struct btree_iter *iter;
27 for_each_btree_key(trans, iter, BTREE_ID_extents,
28 POS(inum, 0), 0, k, ret) {
29 if (k.k->p.inode != inum)
32 if (bkey_extent_is_allocation(k.k))
36 bch2_trans_iter_free(trans, iter);
38 return ret ?: sectors;
41 static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
42 struct bch_inode_unpacked *inode,
45 struct btree_iter *iter;
49 iter = bch2_trans_get_iter(trans, BTREE_ID_inodes,
51 k = bch2_btree_iter_peek_slot(iter);
57 *snapshot = iter->pos.snapshot;
58 ret = k.k->type == KEY_TYPE_inode
59 ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
62 bch2_trans_iter_free(trans, iter);
66 static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
67 struct bch_inode_unpacked *inode,
70 return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
73 static int __write_inode(struct btree_trans *trans,
74 struct bch_inode_unpacked *inode,
77 struct btree_iter *inode_iter =
78 bch2_trans_get_iter(trans, BTREE_ID_inodes,
79 SPOS(0, inode->bi_inum, snapshot),
81 int ret = bch2_btree_iter_traverse(inode_iter) ?:
82 bch2_inode_write(trans, inode_iter, inode);
83 bch2_trans_iter_put(trans, inode_iter);
87 static int write_inode(struct btree_trans *trans,
88 struct bch_inode_unpacked *inode,
91 int ret = __bch2_trans_do(trans, NULL, NULL,
94 __write_inode(trans, inode, snapshot));
96 bch_err(trans->c, "error in fsck: error %i updating inode", ret);
100 static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
102 struct bch_fs *c = trans->c;
103 struct btree_iter *iter;
104 struct bch_inode_unpacked dir_inode;
105 struct bch_hash_info dir_hash_info;
108 ret = lookup_inode(trans, pos.inode, &dir_inode, NULL);
112 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
114 iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
116 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
117 &dir_hash_info, iter);
118 bch2_trans_iter_put(trans, iter);
122 static int remove_dirent(struct btree_trans *trans, struct bpos pos)
124 int ret = __bch2_trans_do(trans, NULL, NULL,
126 BTREE_INSERT_LAZY_RW,
127 __remove_dirent(trans, pos));
129 bch_err(trans->c, "remove_dirent: err %i deleting dirent", ret);
133 /* Get lost+found, create if it doesn't exist: */
134 static int lookup_lostfound(struct btree_trans *trans,
135 struct bch_inode_unpacked *lostfound)
137 struct bch_fs *c = trans->c;
138 struct bch_inode_unpacked root;
139 struct bch_hash_info root_hash_info;
140 struct qstr lostfound_str = QSTR("lost+found");
145 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root, &snapshot);
146 if (ret && ret != -ENOENT)
149 root_hash_info = bch2_hash_info_init(c, &root);
150 inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
153 bch_notice(c, "creating lost+found");
154 goto create_lostfound;
157 ret = lookup_inode(trans, inum, lostfound, &snapshot);
158 if (ret && ret != -ENOENT) {
160 * The check_dirents pass has already run, dangling dirents
161 * shouldn't exist here:
163 bch_err(c, "error looking up lost+found: %i", ret);
167 if (ret == -ENOENT) {
169 bch2_inode_init_early(c, lostfound);
171 ret = __bch2_trans_do(trans, NULL, NULL,
173 BTREE_INSERT_LAZY_RW,
174 bch2_create_trans(trans,
175 BCACHEFS_ROOT_INO, &root,
178 0, 0, S_IFDIR|0700, 0, NULL, NULL));
180 bch_err(c, "error creating lost+found: %i", ret);
186 static int reattach_inode(struct btree_trans *trans,
187 struct bch_inode_unpacked *inode)
189 struct bch_hash_info dir_hash;
190 struct bch_inode_unpacked lostfound;
196 ret = lookup_lostfound(trans, &lostfound);
200 if (S_ISDIR(inode->bi_mode)) {
201 lostfound.bi_nlink++;
203 ret = write_inode(trans, &lostfound, U32_MAX);
208 dir_hash = bch2_hash_info_init(trans->c, &lostfound);
210 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
211 name = (struct qstr) QSTR(name_buf);
213 ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
214 bch2_dirent_create(trans, lostfound.bi_inum, &dir_hash,
215 mode_to_type(inode->bi_mode),
216 &name, inode->bi_inum, &dir_offset,
217 BCH_HASH_SET_MUST_CREATE));
219 bch_err(trans->c, "error %i reattaching inode %llu",
220 ret, inode->bi_inum);
224 inode->bi_dir = lostfound.bi_inum;
225 inode->bi_dir_offset = dir_offset;
227 return write_inode(trans, inode, U32_MAX);
230 static int remove_backpointer(struct btree_trans *trans,
231 struct bch_inode_unpacked *inode)
233 struct btree_iter *iter;
237 iter = bch2_trans_get_iter(trans, BTREE_ID_dirents,
238 POS(inode->bi_dir, inode->bi_dir_offset), 0);
239 k = bch2_btree_iter_peek_slot(iter);
243 if (k.k->type != KEY_TYPE_dirent) {
248 ret = remove_dirent(trans, k.k->p);
250 bch2_trans_iter_put(trans, iter);
254 struct inode_walker {
255 bool first_this_inode;
259 struct bch_inode_unpacked inode;
262 static struct inode_walker inode_walker_init(void)
264 return (struct inode_walker) {
270 static int __walk_inode(struct btree_trans *trans,
271 struct inode_walker *w, u64 inum)
273 if (inum != w->cur_inum) {
274 int ret = __lookup_inode(trans, inum, &w->inode, &w->snapshot);
276 if (ret && ret != -ENOENT)
279 w->have_inode = !ret;
281 w->first_this_inode = true;
283 w->first_this_inode = false;
289 static int walk_inode(struct btree_trans *trans,
290 struct inode_walker *w, u64 inum)
292 return lockrestart_do(trans, __walk_inode(trans, w, inum));
295 static int hash_redo_key(struct btree_trans *trans,
296 const struct bch_hash_desc desc,
297 struct bch_hash_info *hash_info,
298 struct btree_iter *k_iter, struct bkey_s_c k)
300 struct bkey_i *delete;
303 delete = bch2_trans_kmalloc(trans, sizeof(*delete));
305 return PTR_ERR(delete);
307 tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
311 bkey_reassemble(tmp, k);
313 bkey_init(&delete->k);
314 delete->k.p = k_iter->pos;
315 return bch2_btree_iter_traverse(k_iter) ?:
316 bch2_trans_update(trans, k_iter, delete, 0) ?:
317 bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
320 static int fsck_hash_delete_at(struct btree_trans *trans,
321 const struct bch_hash_desc desc,
322 struct bch_hash_info *info,
323 struct btree_iter *iter)
327 ret = bch2_hash_delete_at(trans, desc, info, iter) ?:
328 bch2_trans_commit(trans, NULL, NULL,
330 BTREE_INSERT_LAZY_RW);
332 ret = bch2_btree_iter_traverse(iter);
340 static int hash_check_key(struct btree_trans *trans,
341 const struct bch_hash_desc desc,
342 struct bch_hash_info *hash_info,
343 struct btree_iter *k_iter, struct bkey_s_c hash_k)
345 struct bch_fs *c = trans->c;
346 struct btree_iter *iter = NULL;
352 if (hash_k.k->type != desc.key_type)
355 hash = desc.hash_bkey(hash_info, hash_k);
357 if (likely(hash == hash_k.k->p.offset))
360 if (hash_k.k->p.offset < hash)
363 for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash),
364 BTREE_ITER_SLOTS, k, ret) {
365 if (!bkey_cmp(k.k->p, hash_k.k->p))
368 if (fsck_err_on(k.k->type == desc.key_type &&
369 !desc.cmp_bkey(k, hash_k), c,
370 "duplicate hash table keys:\n%s",
371 (bch2_bkey_val_to_text(&PBUF(buf), c,
373 ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter);
380 if (bkey_deleted(k.k)) {
381 bch2_trans_iter_free(trans, iter);
386 bch2_trans_iter_free(trans, iter);
389 if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
390 "hashed to %llu\n%s",
391 desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash,
392 (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
395 ret = __bch2_trans_do(trans, NULL, NULL,
396 BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
397 hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
399 bch_err(c, "hash_redo_key err %i", ret);
407 static int check_inode(struct btree_trans *trans,
408 struct btree_iter *iter,
409 struct bkey_s_c_inode inode)
411 struct bch_fs *c = trans->c;
412 struct bch_inode_unpacked u;
413 bool do_update = false;
416 ret = bch2_inode_unpack(inode, &u);
418 if (bch2_fs_inconsistent_on(ret, c,
419 "error unpacking inode %llu in fsck",
423 if (u.bi_flags & BCH_INODE_UNLINKED &&
425 fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
427 bch_verbose(c, "deleting inode %llu", u.bi_inum);
429 bch2_trans_unlock(trans);
432 ret = bch2_inode_rm(c, u.bi_inum, false);
434 bch_err(c, "error in fsck: error %i while deleting inode", ret);
438 if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
440 fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
442 bch_verbose(c, "truncating inode %llu", u.bi_inum);
444 bch2_trans_unlock(trans);
448 * XXX: need to truncate partial blocks too here - or ideally
449 * just switch units to bytes and that issue goes away
451 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
452 POS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9),
453 POS(u.bi_inum, U64_MAX),
456 bch_err(c, "error in fsck: error %i truncating inode", ret);
461 * We truncated without our normal sector accounting hook, just
462 * make sure we recalculate it:
464 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
466 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
470 if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
472 fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
476 bch_verbose(c, "recounting sectors for inode %llu",
479 sectors = bch2_count_inode_sectors(trans, u.bi_inum);
481 bch_err(c, "error in fsck: error %i recounting inode sectors",
486 u.bi_sectors = sectors;
487 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
491 if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
494 u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
499 ret = __bch2_trans_do(trans, NULL, NULL,
501 BTREE_INSERT_LAZY_RW,
502 bch2_btree_iter_traverse(iter) ?:
503 bch2_inode_write(trans, iter, &u));
505 bch_err(c, "error in fsck: error %i "
506 "updating inode", ret);
513 static int check_inodes(struct bch_fs *c, bool full)
515 struct btree_trans trans;
516 struct btree_iter *iter;
518 struct bkey_s_c_inode inode;
521 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
523 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
525 BTREE_ITER_PREFETCH, k, ret) {
526 if (k.k->type != KEY_TYPE_inode)
529 inode = bkey_s_c_to_inode(k);
532 (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
533 BCH_INODE_I_SECTORS_DIRTY|
534 BCH_INODE_UNLINKED))) {
535 ret = check_inode(&trans, iter, inode);
540 bch2_trans_iter_put(&trans, iter);
542 BUG_ON(ret == -EINTR);
544 return bch2_trans_exit(&trans) ?: ret;
547 static int fix_overlapping_extent(struct btree_trans *trans,
548 struct bkey_s_c k, struct bpos cut_at)
550 struct btree_iter *iter;
554 u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
555 ret = PTR_ERR_OR_ZERO(u);
559 bkey_reassemble(u, k);
560 bch2_cut_front(cut_at, u);
564 * We don't want to go through the extent_handle_overwrites path:
566 * XXX: this is going to screw up disk accounting, extent triggers
567 * assume things about extent overwrites - we should be running the
568 * triggers manually here
570 iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p,
571 BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
573 BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
574 ret = bch2_btree_iter_traverse(iter) ?:
575 bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?:
576 bch2_trans_commit(trans, NULL, NULL,
578 BTREE_INSERT_LAZY_RW);
579 bch2_trans_iter_put(trans, iter);
583 static int inode_backpointer_exists(struct btree_trans *trans,
584 struct bch_inode_unpacked *inode)
586 struct btree_iter *iter;
590 iter = bch2_trans_get_iter(trans, BTREE_ID_dirents,
591 POS(inode->bi_dir, inode->bi_dir_offset), 0);
592 k = bch2_btree_iter_peek_slot(iter);
596 if (k.k->type != KEY_TYPE_dirent)
599 ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum;
601 bch2_trans_iter_free(trans, iter);
605 static bool inode_backpointer_matches(struct bkey_s_c_dirent d,
606 struct bch_inode_unpacked *inode)
608 return d.k->p.inode == inode->bi_dir &&
609 d.k->p.offset == inode->bi_dir_offset;
613 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
614 * that i_size an i_sectors are consistent
617 static int check_extents(struct bch_fs *c)
619 struct inode_walker w = inode_walker_init();
620 struct btree_trans trans;
621 struct btree_iter *iter;
623 struct bkey_buf prev;
627 bch2_bkey_buf_init(&prev);
628 prev.k->k = KEY(0, 0, 0);
629 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
631 bch_verbose(c, "checking extents");
633 iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
634 POS(BCACHEFS_ROOT_INO, 0),
636 BTREE_ITER_PREFETCH);
638 while ((k = bch2_btree_iter_peek(iter)).k &&
639 !(ret = bkey_err(k))) {
641 w.cur_inum != k.k->p.inode &&
642 !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
643 fsck_err_on(w.inode.bi_sectors != i_sectors, c,
644 "inode %llu has incorrect i_sectors: got %llu, should be %llu",
646 w.inode.bi_sectors, i_sectors)) {
647 w.inode.bi_sectors = i_sectors;
649 ret = write_inode(&trans, &w.inode, w.snapshot);
654 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
658 bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
659 bch2_bkey_val_to_text(&PBUF(buf2), c, k);
661 if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2))
662 return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR;
665 ret = walk_inode(&trans, &w, k.k->p.inode);
669 if (w.first_this_inode)
672 if (fsck_err_on(!w.have_inode, c,
673 "extent type %u for missing inode %llu",
674 k.k->type, k.k->p.inode) ||
675 fsck_err_on(w.have_inode &&
676 !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
677 "extent type %u for non regular file, inode %llu mode %o",
678 k.k->type, k.k->p.inode, w.inode.bi_mode)) {
680 return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
681 POS(k.k->p.inode, 0),
682 POS(k.k->p.inode, U64_MAX),
686 if (fsck_err_on(w.have_inode &&
687 !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
688 k.k->type != KEY_TYPE_reservation &&
689 k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
690 "extent type %u offset %llu past end of inode %llu, i_size %llu",
691 k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
693 return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
694 POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c)) >> 9),
695 POS(k.k->p.inode, U64_MAX),
699 if (bkey_extent_is_allocation(k.k))
700 i_sectors += k.k->size;
701 bch2_bkey_buf_reassemble(&prev, c, k);
703 bch2_btree_iter_advance(iter);
708 bch2_trans_iter_put(&trans, iter);
709 bch2_bkey_buf_exit(&prev, c);
710 return bch2_trans_exit(&trans) ?: ret;
713 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
714 struct bch_hash_info *hash_info,
715 struct inode_walker *w, unsigned *nr_subdirs)
717 struct bch_fs *c = trans->c;
719 struct bkey_s_c_dirent d;
720 struct bch_inode_unpacked target;
723 bool backpointer_exists = true;
728 k = bch2_btree_iter_peek(iter);
737 w->cur_inum != k.k->p.inode &&
738 fsck_err_on(w->inode.bi_nlink != *nr_subdirs, c,
739 "directory %llu with wrong i_nlink: got %u, should be %u",
740 w->inode.bi_inum, w->inode.bi_nlink, *nr_subdirs)) {
741 w->inode.bi_nlink = *nr_subdirs;
742 ret = write_inode(trans, &w->inode, w->snapshot);
743 return ret ?: -EINTR;
746 ret = __walk_inode(trans, w, k.k->p.inode);
750 if (w->first_this_inode)
753 if (fsck_err_on(!w->have_inode, c,
754 "dirent in nonexisting directory:\n%s",
755 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)) ||
756 fsck_err_on(!S_ISDIR(w->inode.bi_mode), c,
757 "dirent in non directory inode type %u:\n%s",
758 mode_to_type(w->inode.bi_mode),
759 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
760 return __bch2_trans_do(trans, NULL, NULL, 0,
761 bch2_btree_delete_at(trans, iter, 0));
766 if (w->first_this_inode)
767 *hash_info = bch2_hash_info_init(c, &w->inode);
769 ret = hash_check_key(trans, bch2_dirent_hash_desc,
773 if (ret) /* dirent has been deleted */
776 if (k.k->type != KEY_TYPE_dirent)
779 d = bkey_s_c_to_dirent(k);
780 d_inum = le64_to_cpu(d.v->d_inum);
782 ret = __lookup_inode(trans, d_inum, &target, &target_snapshot);
783 if (ret && ret != -ENOENT)
789 if (fsck_err_on(!have_target, c,
790 "dirent points to missing inode:\n%s",
791 (bch2_bkey_val_to_text(&PBUF(buf), c,
793 return remove_dirent(trans, d.k->p);
798 if (!target.bi_dir &&
799 !target.bi_dir_offset) {
800 target.bi_dir = k.k->p.inode;
801 target.bi_dir_offset = k.k->p.offset;
803 ret = __write_inode(trans, &target, target_snapshot) ?:
804 bch2_trans_commit(trans, NULL, NULL,
806 BTREE_INSERT_LAZY_RW);
812 if (!inode_backpointer_matches(d, &target)) {
813 ret = inode_backpointer_exists(trans, &target);
817 backpointer_exists = ret;
820 if (fsck_err_on(S_ISDIR(target.bi_mode) &&
821 backpointer_exists, c,
822 "directory %llu with multiple links",
824 return remove_dirent(trans, d.k->p);
826 if (fsck_err_on(backpointer_exists &&
828 "inode %llu has multiple links but i_nlink 0",
831 target.bi_flags &= ~BCH_INODE_UNLINKED;
833 ret = write_inode(trans, &target, target_snapshot);
834 return ret ?: -EINTR;
837 if (fsck_err_on(!backpointer_exists, c,
838 "inode %llu has wrong backpointer:\n"
840 "should be %llu:%llu",
843 target.bi_dir_offset,
846 target.bi_dir = k.k->p.inode;
847 target.bi_dir_offset = k.k->p.offset;
849 ret = write_inode(trans, &target, target_snapshot);
850 return ret ?: -EINTR;
854 if (fsck_err_on(d.v->d_type != mode_to_type(target.bi_mode), c,
855 "incorrect d_type: should be %u:\n%s",
856 mode_to_type(target.bi_mode),
857 (bch2_bkey_val_to_text(&PBUF(buf), c,
859 struct bkey_i_dirent *n;
861 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
865 bkey_reassemble(&n->k_i, d.s_c);
866 n->v.d_type = mode_to_type(target.bi_mode);
868 ret = __bch2_trans_do(trans, NULL, NULL,
870 BTREE_INSERT_LAZY_RW,
871 bch2_btree_iter_traverse(iter) ?:
872 bch2_trans_update(trans, iter, &n->k_i, 0));
874 return ret ?: -EINTR;
877 *nr_subdirs += d.v->d_type == DT_DIR;
884 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
888 static int check_dirents(struct bch_fs *c)
890 struct inode_walker w = inode_walker_init();
891 struct bch_hash_info hash_info;
892 struct btree_trans trans;
893 struct btree_iter *iter;
894 unsigned nr_subdirs = 0;
897 bch_verbose(c, "checking dirents");
899 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
901 iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
902 POS(BCACHEFS_ROOT_INO, 0),
904 BTREE_ITER_PREFETCH);
907 ret = lockrestart_do(&trans,
908 check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs));
911 } while (bch2_btree_iter_advance(iter));
912 bch2_trans_iter_put(&trans, iter);
914 return bch2_trans_exit(&trans) ?: ret;
918 * Walk xattrs: verify that they all have a corresponding inode
921 static int check_xattrs(struct bch_fs *c)
923 struct inode_walker w = inode_walker_init();
924 struct bch_hash_info hash_info;
925 struct btree_trans trans;
926 struct btree_iter *iter;
930 bch_verbose(c, "checking xattrs");
932 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
934 iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs,
935 POS(BCACHEFS_ROOT_INO, 0),
937 BTREE_ITER_PREFETCH);
939 while ((k = bch2_btree_iter_peek(iter)).k &&
940 !(ret = bkey_err(k))) {
941 ret = walk_inode(&trans, &w, k.k->p.inode);
945 if (fsck_err_on(!w.have_inode, c,
946 "xattr for missing inode %llu",
948 ret = bch2_btree_delete_at(&trans, iter, 0);
954 if (w.first_this_inode && w.have_inode)
955 hash_info = bch2_hash_info_init(c, &w.inode);
957 ret = hash_check_key(&trans, bch2_xattr_hash_desc,
958 &hash_info, iter, k);
962 bch2_btree_iter_advance(iter);
968 bch2_trans_iter_put(&trans, iter);
969 return bch2_trans_exit(&trans) ?: ret;
972 /* Get root directory, create if it doesn't exist: */
973 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
975 struct bkey_inode_buf packed;
979 bch_verbose(c, "checking root directory");
981 ret = bch2_trans_do(c, NULL, NULL, 0,
982 lookup_inode(&trans, BCACHEFS_ROOT_INO, root_inode, &snapshot));
983 if (ret && ret != -ENOENT)
986 if (fsck_err_on(ret, c, "root directory missing"))
989 if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
990 "root inode not a directory"))
997 bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
999 root_inode->bi_inum = BCACHEFS_ROOT_INO;
1001 bch2_inode_pack(c, &packed, root_inode);
1003 return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
1005 BTREE_INSERT_NOFAIL|
1006 BTREE_INSERT_LAZY_RW);
1013 struct pathbuf_entry {
1018 static int path_down(struct pathbuf *p, u64 inum)
1020 if (p->nr == p->size) {
1021 size_t new_size = max_t(size_t, 256UL, p->size * 2);
1022 void *n = krealloc(p->entries,
1023 new_size * sizeof(p->entries[0]),
1033 p->entries[p->nr++] = (struct pathbuf_entry) {
1039 static int check_path(struct btree_trans *trans,
1041 struct bch_inode_unpacked *inode)
1043 struct bch_fs *c = trans->c;
1050 while (inode->bi_inum != BCACHEFS_ROOT_INO) {
1051 ret = lockrestart_do(trans,
1052 inode_backpointer_exists(trans, inode));
1057 if (fsck_err(c, "unreachable inode %llu, type %u nlink %u backptr %llu:%llu",
1059 mode_to_type(inode->bi_mode),
1062 inode->bi_dir_offset))
1063 ret = reattach_inode(trans, inode);
1068 if (!S_ISDIR(inode->bi_mode))
1071 ret = path_down(p, inode->bi_inum);
1073 bch_err(c, "memory allocation failure");
1077 for (i = 0; i < p->nr; i++) {
1078 if (inode->bi_dir != p->entries[i].inum)
1081 /* XXX print path */
1082 if (!fsck_err(c, "directory structure loop"))
1085 ret = lockrestart_do(trans,
1086 remove_backpointer(trans, inode));
1088 bch_err(c, "error removing dirent: %i", ret);
1092 ret = reattach_inode(trans, inode);
1096 ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
1098 /* Should have been caught in dirents pass */
1099 bch_err(c, "error looking up parent directory: %i", ret);
1105 bch_err(c, "%s: err %i", __func__, ret);
1110 * Check for unreachable inodes, as well as loops in the directory structure:
1111 * After check_dirents(), if an inode backpointer doesn't exist that means it's
1114 static int check_directory_structure(struct bch_fs *c)
1116 struct btree_trans trans;
1117 struct btree_iter *iter;
1119 struct bch_inode_unpacked u;
1120 struct pathbuf path = { 0, 0, NULL };
1123 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1125 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
1127 BTREE_ITER_PREFETCH, k, ret) {
1128 if (k.k->type != KEY_TYPE_inode)
1131 ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
1133 /* Should have been caught earlier in fsck: */
1134 bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
1138 ret = check_path(&trans, &path, &u);
1142 bch2_trans_iter_put(&trans, iter);
1144 BUG_ON(ret == -EINTR);
1146 kfree(path.entries);
1148 return bch2_trans_exit(&trans) ?: ret;
1151 struct nlink_table {
1162 static int add_nlink(struct nlink_table *t, u64 inum, u32 snapshot)
1164 if (t->nr == t->size) {
1165 size_t new_size = max_t(size_t, 128UL, t->size * 2);
1166 void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
1172 memcpy(d, t->d, t->size * sizeof(t->d[0]));
1180 t->d[t->nr++] = (struct nlink) {
1182 .snapshot = snapshot,
1188 static int nlink_cmp(const void *_l, const void *_r)
1190 const struct nlink *l = _l;
1191 const struct nlink *r = _r;
1193 return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
1196 static void inc_link(struct bch_fs *c, struct nlink_table *links,
1197 u64 range_start, u64 range_end, u64 inum)
1199 struct nlink *link, key = {
1200 .inum = inum, .snapshot = U32_MAX,
1203 if (inum < range_start || inum >= range_end)
1206 link = __inline_bsearch(&key, links->d, links->nr,
1207 sizeof(links->d[0]), nlink_cmp);
1213 static int check_nlinks_find_hardlinks(struct bch_fs *c,
1214 struct nlink_table *t,
1215 u64 start, u64 *end)
1217 struct btree_trans trans;
1218 struct btree_iter *iter;
1220 struct bkey_s_c_inode inode;
1221 struct bch_inode_unpacked u;
1224 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1226 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
1229 BTREE_ITER_PREFETCH, k, ret) {
1230 if (k.k->type != KEY_TYPE_inode)
1233 inode = bkey_s_c_to_inode(k);
1236 * Backpointer and directory structure checks are sufficient for
1237 * directories, since they can't have hardlinks:
1239 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
1242 /* Should never fail, checked by bch2_inode_invalid: */
1243 BUG_ON(bch2_inode_unpack(inode, &u));
1248 ret = add_nlink(t, k.k->p.offset, k.k->p.snapshot);
1250 *end = k.k->p.offset;
1256 bch2_trans_iter_put(&trans, iter);
1257 bch2_trans_exit(&trans);
1260 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1266 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
1267 u64 range_start, u64 range_end)
1269 struct btree_trans trans;
1270 struct btree_iter *iter;
1272 struct bkey_s_c_dirent d;
1275 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1277 for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
1279 BTREE_ITER_PREFETCH, k, ret) {
1280 switch (k.k->type) {
1281 case KEY_TYPE_dirent:
1282 d = bkey_s_c_to_dirent(k);
1284 if (d.v->d_type != DT_DIR)
1285 inc_link(c, links, range_start, range_end,
1286 le64_to_cpu(d.v->d_inum));
1290 bch2_trans_cond_resched(&trans);
1292 bch2_trans_iter_put(&trans, iter);
1294 ret = bch2_trans_exit(&trans) ?: ret;
1296 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1302 static int check_nlinks_update_hardlinks(struct bch_fs *c,
1303 struct nlink_table *links,
1304 u64 range_start, u64 range_end)
1306 struct btree_trans trans;
1307 struct btree_iter *iter;
1309 struct bkey_s_c_inode inode;
1310 struct bch_inode_unpacked u;
1311 struct nlink *link = links->d;
1314 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1316 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
1317 POS(0, range_start),
1319 BTREE_ITER_PREFETCH, k, ret) {
1320 if (k.k->p.offset >= range_end)
1323 if (k.k->type != KEY_TYPE_inode)
1326 inode = bkey_s_c_to_inode(k);
1327 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
1330 BUG_ON(bch2_inode_unpack(inode, &u));
1335 while (link->inum < k.k->p.offset) {
1337 BUG_ON(link >= links->d + links->nr);
1340 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
1341 "inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)",
1342 u.bi_inum, mode_to_type(u.bi_mode),
1343 bch2_inode_nlink_get(&u), link->count)) {
1344 bch2_inode_nlink_set(&u, link->count);
1346 ret = __bch2_trans_do(&trans, NULL, NULL,
1347 BTREE_INSERT_NOFAIL|
1348 BTREE_INSERT_LAZY_RW,
1349 bch2_btree_iter_traverse(iter) ?:
1350 bch2_inode_write(&trans, iter, &u));
1352 bch_err(c, "error in fsck: error %i updating inode", ret);
1356 bch2_trans_iter_put(&trans, iter);
1357 bch2_trans_exit(&trans);
1360 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1366 static int check_nlinks(struct bch_fs *c)
1368 struct nlink_table links = { 0 };
1369 u64 this_iter_range_start, next_iter_range_start = 0;
1372 bch_verbose(c, "checking inode nlinks");
1375 this_iter_range_start = next_iter_range_start;
1376 next_iter_range_start = U64_MAX;
1378 ret = check_nlinks_find_hardlinks(c, &links,
1379 this_iter_range_start,
1380 &next_iter_range_start);
1382 ret = check_nlinks_walk_dirents(c, &links,
1383 this_iter_range_start,
1384 next_iter_range_start);
1388 ret = check_nlinks_update_hardlinks(c, &links,
1389 this_iter_range_start,
1390 next_iter_range_start);
1395 } while (next_iter_range_start != U64_MAX);
1403 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1404 * Doesn't fix them yet, mainly because they haven't yet been observed:
1406 int bch2_fsck_full(struct bch_fs *c)
1408 struct bch_inode_unpacked root_inode;
1410 return check_inodes(c, true) ?:
1414 check_root(c, &root_inode) ?:
1415 check_directory_structure(c) ?:
1419 int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1421 return check_inodes(c, false);