1 // SPDX-License-Identifier: GPL-2.0
5 #include "btree_update.h"
15 #include <linux/bsearch.h>
16 #include <linux/dcache.h> /* struct qstr */
18 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
20 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
22 struct btree_iter iter;
27 for_each_btree_key(trans, iter, BTREE_ID_extents,
28 POS(inum, 0), 0, k, ret) {
29 if (k.k->p.inode != inum)
32 if (bkey_extent_is_allocation(k.k))
36 bch2_trans_iter_exit(trans, &iter);
38 return ret ?: sectors;
41 static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
42 struct bch_inode_unpacked *inode,
45 struct btree_iter iter;
49 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
51 k = bch2_btree_iter_peek_slot(&iter);
57 *snapshot = iter.pos.snapshot;
58 ret = k.k->type == KEY_TYPE_inode
59 ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
62 bch2_trans_iter_exit(trans, &iter);
66 static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
67 struct bch_inode_unpacked *inode,
70 return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
73 static int __write_inode(struct btree_trans *trans,
74 struct bch_inode_unpacked *inode,
77 struct btree_iter iter;
80 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
81 SPOS(0, inode->bi_inum, snapshot),
84 ret = bch2_btree_iter_traverse(&iter) ?:
85 bch2_inode_write(trans, &iter, inode);
86 bch2_trans_iter_exit(trans, &iter);
90 static int write_inode(struct btree_trans *trans,
91 struct bch_inode_unpacked *inode,
94 int ret = __bch2_trans_do(trans, NULL, NULL,
97 __write_inode(trans, inode, snapshot));
99 bch_err(trans->c, "error in fsck: error %i updating inode", ret);
103 static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
105 struct bch_fs *c = trans->c;
106 struct btree_iter iter;
107 struct bch_inode_unpacked dir_inode;
108 struct bch_hash_info dir_hash_info;
111 ret = lookup_inode(trans, pos.inode, &dir_inode, NULL);
115 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
117 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
119 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
120 &dir_hash_info, &iter);
121 bch2_trans_iter_exit(trans, &iter);
125 static int remove_dirent(struct btree_trans *trans, struct bpos pos)
127 int ret = __bch2_trans_do(trans, NULL, NULL,
129 BTREE_INSERT_LAZY_RW,
130 __remove_dirent(trans, pos));
132 bch_err(trans->c, "remove_dirent: err %i deleting dirent", ret);
136 /* Get lost+found, create if it doesn't exist: */
137 static int lookup_lostfound(struct btree_trans *trans,
138 struct bch_inode_unpacked *lostfound)
140 struct bch_fs *c = trans->c;
141 struct bch_inode_unpacked root;
142 struct bch_hash_info root_hash_info;
143 struct qstr lostfound_str = QSTR("lost+found");
148 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root, &snapshot);
149 if (ret && ret != -ENOENT)
152 root_hash_info = bch2_hash_info_init(c, &root);
153 inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
156 bch_notice(c, "creating lost+found");
157 goto create_lostfound;
160 ret = lookup_inode(trans, inum, lostfound, &snapshot);
161 if (ret && ret != -ENOENT) {
163 * The check_dirents pass has already run, dangling dirents
164 * shouldn't exist here:
166 bch_err(c, "error looking up lost+found: %i", ret);
170 if (ret == -ENOENT) {
172 bch2_inode_init_early(c, lostfound);
174 ret = __bch2_trans_do(trans, NULL, NULL,
176 BTREE_INSERT_LAZY_RW,
177 bch2_create_trans(trans,
178 BCACHEFS_ROOT_INO, &root,
181 0, 0, S_IFDIR|0700, 0, NULL, NULL));
183 bch_err(c, "error creating lost+found: %i", ret);
189 static int reattach_inode(struct btree_trans *trans,
190 struct bch_inode_unpacked *inode)
192 struct bch_hash_info dir_hash;
193 struct bch_inode_unpacked lostfound;
199 ret = lookup_lostfound(trans, &lostfound);
203 if (S_ISDIR(inode->bi_mode)) {
204 lostfound.bi_nlink++;
206 ret = write_inode(trans, &lostfound, U32_MAX);
211 dir_hash = bch2_hash_info_init(trans->c, &lostfound);
213 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
214 name = (struct qstr) QSTR(name_buf);
216 ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
217 bch2_dirent_create(trans, lostfound.bi_inum, &dir_hash,
218 mode_to_type(inode->bi_mode),
219 &name, inode->bi_inum, &dir_offset,
220 BCH_HASH_SET_MUST_CREATE));
222 bch_err(trans->c, "error %i reattaching inode %llu",
223 ret, inode->bi_inum);
227 inode->bi_dir = lostfound.bi_inum;
228 inode->bi_dir_offset = dir_offset;
230 return write_inode(trans, inode, U32_MAX);
233 static int remove_backpointer(struct btree_trans *trans,
234 struct bch_inode_unpacked *inode)
236 struct btree_iter iter;
240 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
241 POS(inode->bi_dir, inode->bi_dir_offset), 0);
242 k = bch2_btree_iter_peek_slot(&iter);
246 if (k.k->type != KEY_TYPE_dirent) {
251 ret = remove_dirent(trans, k.k->p);
253 bch2_trans_iter_exit(trans, &iter);
257 struct inode_walker {
258 bool first_this_inode;
262 struct bch_inode_unpacked inode;
265 static struct inode_walker inode_walker_init(void)
267 return (struct inode_walker) {
273 static int __walk_inode(struct btree_trans *trans,
274 struct inode_walker *w, u64 inum)
276 if (inum != w->cur_inum) {
277 int ret = __lookup_inode(trans, inum, &w->inode, &w->snapshot);
279 if (ret && ret != -ENOENT)
282 w->have_inode = !ret;
284 w->first_this_inode = true;
286 w->first_this_inode = false;
292 static int walk_inode(struct btree_trans *trans,
293 struct inode_walker *w, u64 inum)
295 return lockrestart_do(trans, __walk_inode(trans, w, inum));
298 static int hash_redo_key(struct btree_trans *trans,
299 const struct bch_hash_desc desc,
300 struct bch_hash_info *hash_info,
301 struct btree_iter *k_iter, struct bkey_s_c k)
303 struct bkey_i *delete;
306 delete = bch2_trans_kmalloc(trans, sizeof(*delete));
308 return PTR_ERR(delete);
310 tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
314 bkey_reassemble(tmp, k);
316 bkey_init(&delete->k);
317 delete->k.p = k_iter->pos;
318 return bch2_btree_iter_traverse(k_iter) ?:
319 bch2_trans_update(trans, k_iter, delete, 0) ?:
320 bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
323 static int fsck_hash_delete_at(struct btree_trans *trans,
324 const struct bch_hash_desc desc,
325 struct bch_hash_info *info,
326 struct btree_iter *iter)
330 ret = bch2_hash_delete_at(trans, desc, info, iter) ?:
331 bch2_trans_commit(trans, NULL, NULL,
333 BTREE_INSERT_LAZY_RW);
335 ret = bch2_btree_iter_traverse(iter);
343 static int hash_check_key(struct btree_trans *trans,
344 const struct bch_hash_desc desc,
345 struct bch_hash_info *hash_info,
346 struct btree_iter *k_iter, struct bkey_s_c hash_k)
348 struct bch_fs *c = trans->c;
349 struct btree_iter iter = { NULL };
355 if (hash_k.k->type != desc.key_type)
358 hash = desc.hash_bkey(hash_info, hash_k);
360 if (likely(hash == hash_k.k->p.offset))
363 if (hash_k.k->p.offset < hash)
366 for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash),
367 BTREE_ITER_SLOTS, k, ret) {
368 if (!bkey_cmp(k.k->p, hash_k.k->p))
371 if (fsck_err_on(k.k->type == desc.key_type &&
372 !desc.cmp_bkey(k, hash_k), c,
373 "duplicate hash table keys:\n%s",
374 (bch2_bkey_val_to_text(&PBUF(buf), c,
376 ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter);
383 if (bkey_deleted(k.k)) {
384 bch2_trans_iter_exit(trans, &iter);
389 bch2_trans_iter_exit(trans, &iter);
392 if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
393 "hashed to %llu\n%s",
394 desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash,
395 (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
398 ret = __bch2_trans_do(trans, NULL, NULL,
399 BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
400 hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
402 bch_err(c, "hash_redo_key err %i", ret);
410 static int check_inode(struct btree_trans *trans,
411 struct btree_iter *iter,
412 struct bkey_s_c_inode inode)
414 struct bch_fs *c = trans->c;
415 struct bch_inode_unpacked u;
416 bool do_update = false;
419 ret = bch2_inode_unpack(inode, &u);
421 if (bch2_fs_inconsistent_on(ret, c,
422 "error unpacking inode %llu in fsck",
426 if (u.bi_flags & BCH_INODE_UNLINKED &&
428 fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
430 bch_verbose(c, "deleting inode %llu", u.bi_inum);
432 bch2_trans_unlock(trans);
435 ret = bch2_inode_rm(c, u.bi_inum, false);
437 bch_err(c, "error in fsck: error %i while deleting inode", ret);
441 if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
443 fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
445 bch_verbose(c, "truncating inode %llu", u.bi_inum);
447 bch2_trans_unlock(trans);
451 * XXX: need to truncate partial blocks too here - or ideally
452 * just switch units to bytes and that issue goes away
454 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
455 POS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9),
456 POS(u.bi_inum, U64_MAX),
459 bch_err(c, "error in fsck: error %i truncating inode", ret);
464 * We truncated without our normal sector accounting hook, just
465 * make sure we recalculate it:
467 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
469 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
473 if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
475 fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
479 bch_verbose(c, "recounting sectors for inode %llu",
482 sectors = bch2_count_inode_sectors(trans, u.bi_inum);
484 bch_err(c, "error in fsck: error %i recounting inode sectors",
489 u.bi_sectors = sectors;
490 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
494 if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
497 u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
502 ret = __bch2_trans_do(trans, NULL, NULL,
504 BTREE_INSERT_LAZY_RW,
505 bch2_btree_iter_traverse(iter) ?:
506 bch2_inode_write(trans, iter, &u));
508 bch_err(c, "error in fsck: error %i "
509 "updating inode", ret);
516 static int check_inodes(struct bch_fs *c, bool full)
518 struct btree_trans trans;
519 struct btree_iter iter;
521 struct bkey_s_c_inode inode;
524 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
526 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
528 BTREE_ITER_PREFETCH, k, ret) {
529 if (k.k->type != KEY_TYPE_inode)
532 inode = bkey_s_c_to_inode(k);
535 (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
536 BCH_INODE_I_SECTORS_DIRTY|
537 BCH_INODE_UNLINKED))) {
538 ret = check_inode(&trans, &iter, inode);
543 bch2_trans_iter_exit(&trans, &iter);
545 BUG_ON(ret == -EINTR);
547 return bch2_trans_exit(&trans) ?: ret;
550 static int fix_overlapping_extent(struct btree_trans *trans,
551 struct bkey_s_c k, struct bpos cut_at)
553 struct btree_iter iter;
557 u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
558 ret = PTR_ERR_OR_ZERO(u);
562 bkey_reassemble(u, k);
563 bch2_cut_front(cut_at, u);
567 * We don't want to go through the extent_handle_overwrites path:
569 * XXX: this is going to screw up disk accounting, extent triggers
570 * assume things about extent overwrites - we should be running the
571 * triggers manually here
573 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
574 BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
576 BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
577 ret = bch2_btree_iter_traverse(&iter) ?:
578 bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
579 bch2_trans_commit(trans, NULL, NULL,
581 BTREE_INSERT_LAZY_RW);
582 bch2_trans_iter_exit(trans, &iter);
586 static int inode_backpointer_exists(struct btree_trans *trans,
587 struct bch_inode_unpacked *inode)
589 struct btree_iter iter;
593 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
594 POS(inode->bi_dir, inode->bi_dir_offset), 0);
595 k = bch2_btree_iter_peek_slot(&iter);
599 if (k.k->type != KEY_TYPE_dirent)
602 ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum;
604 bch2_trans_iter_exit(trans, &iter);
608 static bool inode_backpointer_matches(struct bkey_s_c_dirent d,
609 struct bch_inode_unpacked *inode)
611 return d.k->p.inode == inode->bi_dir &&
612 d.k->p.offset == inode->bi_dir_offset;
616 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
617 * that i_size an i_sectors are consistent
620 static int check_extents(struct bch_fs *c)
622 struct inode_walker w = inode_walker_init();
623 struct btree_trans trans;
624 struct btree_iter iter;
626 struct bkey_buf prev;
630 bch2_bkey_buf_init(&prev);
631 prev.k->k = KEY(0, 0, 0);
632 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
634 bch_verbose(c, "checking extents");
636 bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
637 POS(BCACHEFS_ROOT_INO, 0),
639 BTREE_ITER_PREFETCH);
641 while ((k = bch2_btree_iter_peek(&iter)).k &&
642 !(ret = bkey_err(k))) {
644 w.cur_inum != k.k->p.inode &&
645 !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
646 fsck_err_on(w.inode.bi_sectors != i_sectors, c,
647 "inode %llu has incorrect i_sectors: got %llu, should be %llu",
649 w.inode.bi_sectors, i_sectors)) {
650 w.inode.bi_sectors = i_sectors;
652 ret = write_inode(&trans, &w.inode, w.snapshot);
657 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
661 bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
662 bch2_bkey_val_to_text(&PBUF(buf2), c, k);
664 if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2))
665 return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR;
668 ret = walk_inode(&trans, &w, k.k->p.inode);
672 if (w.first_this_inode)
675 if (fsck_err_on(!w.have_inode, c,
676 "extent type %u for missing inode %llu",
677 k.k->type, k.k->p.inode) ||
678 fsck_err_on(w.have_inode &&
679 !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
680 "extent type %u for non regular file, inode %llu mode %o",
681 k.k->type, k.k->p.inode, w.inode.bi_mode)) {
683 return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
684 POS(k.k->p.inode, 0),
685 POS(k.k->p.inode, U64_MAX),
689 if (fsck_err_on(w.have_inode &&
690 !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
691 k.k->type != KEY_TYPE_reservation &&
692 k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
693 "extent type %u offset %llu past end of inode %llu, i_size %llu",
694 k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
696 return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
697 POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c)) >> 9),
698 POS(k.k->p.inode, U64_MAX),
702 if (bkey_extent_is_allocation(k.k))
703 i_sectors += k.k->size;
704 bch2_bkey_buf_reassemble(&prev, c, k);
706 bch2_btree_iter_advance(&iter);
711 bch2_trans_iter_exit(&trans, &iter);
712 bch2_bkey_buf_exit(&prev, c);
713 return bch2_trans_exit(&trans) ?: ret;
716 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
717 struct bch_hash_info *hash_info,
718 struct inode_walker *w, unsigned *nr_subdirs)
720 struct bch_fs *c = trans->c;
722 struct bkey_s_c_dirent d;
723 struct bch_inode_unpacked target;
726 bool backpointer_exists = true;
731 k = bch2_btree_iter_peek(iter);
740 w->cur_inum != k.k->p.inode &&
741 fsck_err_on(w->inode.bi_nlink != *nr_subdirs, c,
742 "directory %llu with wrong i_nlink: got %u, should be %u",
743 w->inode.bi_inum, w->inode.bi_nlink, *nr_subdirs)) {
744 w->inode.bi_nlink = *nr_subdirs;
745 ret = write_inode(trans, &w->inode, w->snapshot);
746 return ret ?: -EINTR;
749 ret = __walk_inode(trans, w, k.k->p.inode);
753 if (w->first_this_inode)
756 if (fsck_err_on(!w->have_inode, c,
757 "dirent in nonexisting directory:\n%s",
758 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)) ||
759 fsck_err_on(!S_ISDIR(w->inode.bi_mode), c,
760 "dirent in non directory inode type %u:\n%s",
761 mode_to_type(w->inode.bi_mode),
762 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
763 return __bch2_trans_do(trans, NULL, NULL, 0,
764 bch2_btree_delete_at(trans, iter, 0));
769 if (w->first_this_inode)
770 *hash_info = bch2_hash_info_init(c, &w->inode);
772 ret = hash_check_key(trans, bch2_dirent_hash_desc,
776 if (ret) /* dirent has been deleted */
779 if (k.k->type != KEY_TYPE_dirent)
782 d = bkey_s_c_to_dirent(k);
783 d_inum = le64_to_cpu(d.v->d_inum);
785 ret = __lookup_inode(trans, d_inum, &target, &target_snapshot);
786 if (ret && ret != -ENOENT)
792 if (fsck_err_on(!have_target, c,
793 "dirent points to missing inode:\n%s",
794 (bch2_bkey_val_to_text(&PBUF(buf), c,
796 return remove_dirent(trans, d.k->p);
801 if (!target.bi_dir &&
802 !target.bi_dir_offset) {
803 target.bi_dir = k.k->p.inode;
804 target.bi_dir_offset = k.k->p.offset;
806 ret = __write_inode(trans, &target, target_snapshot) ?:
807 bch2_trans_commit(trans, NULL, NULL,
809 BTREE_INSERT_LAZY_RW);
815 if (!inode_backpointer_matches(d, &target)) {
816 ret = inode_backpointer_exists(trans, &target);
820 backpointer_exists = ret;
823 if (fsck_err_on(S_ISDIR(target.bi_mode) &&
824 backpointer_exists, c,
825 "directory %llu with multiple links",
827 return remove_dirent(trans, d.k->p);
829 if (fsck_err_on(backpointer_exists &&
831 "inode %llu has multiple links but i_nlink 0",
834 target.bi_flags &= ~BCH_INODE_UNLINKED;
836 ret = write_inode(trans, &target, target_snapshot);
837 return ret ?: -EINTR;
840 if (fsck_err_on(!backpointer_exists, c,
841 "inode %llu has wrong backpointer:\n"
843 "should be %llu:%llu",
846 target.bi_dir_offset,
849 target.bi_dir = k.k->p.inode;
850 target.bi_dir_offset = k.k->p.offset;
852 ret = write_inode(trans, &target, target_snapshot);
853 return ret ?: -EINTR;
857 if (fsck_err_on(d.v->d_type != mode_to_type(target.bi_mode), c,
858 "incorrect d_type: should be %u:\n%s",
859 mode_to_type(target.bi_mode),
860 (bch2_bkey_val_to_text(&PBUF(buf), c,
862 struct bkey_i_dirent *n;
864 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
868 bkey_reassemble(&n->k_i, d.s_c);
869 n->v.d_type = mode_to_type(target.bi_mode);
871 ret = __bch2_trans_do(trans, NULL, NULL,
873 BTREE_INSERT_LAZY_RW,
874 bch2_btree_iter_traverse(iter) ?:
875 bch2_trans_update(trans, iter, &n->k_i, 0));
877 return ret ?: -EINTR;
880 *nr_subdirs += d.v->d_type == DT_DIR;
887 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
891 static int check_dirents(struct bch_fs *c)
893 struct inode_walker w = inode_walker_init();
894 struct bch_hash_info hash_info;
895 struct btree_trans trans;
896 struct btree_iter iter;
897 unsigned nr_subdirs = 0;
900 bch_verbose(c, "checking dirents");
902 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
904 bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents,
905 POS(BCACHEFS_ROOT_INO, 0),
907 BTREE_ITER_PREFETCH);
910 ret = lockrestart_do(&trans,
911 check_dirent(&trans, &iter, &hash_info, &w, &nr_subdirs));
914 } while (bch2_btree_iter_advance(&iter));
915 bch2_trans_iter_exit(&trans, &iter);
917 return bch2_trans_exit(&trans) ?: ret;
921 * Walk xattrs: verify that they all have a corresponding inode
924 static int check_xattrs(struct bch_fs *c)
926 struct inode_walker w = inode_walker_init();
927 struct bch_hash_info hash_info;
928 struct btree_trans trans;
929 struct btree_iter iter;
933 bch_verbose(c, "checking xattrs");
935 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
937 bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
938 POS(BCACHEFS_ROOT_INO, 0),
940 BTREE_ITER_PREFETCH);
942 while ((k = bch2_btree_iter_peek(&iter)).k &&
943 !(ret = bkey_err(k))) {
944 ret = walk_inode(&trans, &w, k.k->p.inode);
948 if (fsck_err_on(!w.have_inode, c,
949 "xattr for missing inode %llu",
951 ret = bch2_btree_delete_at(&trans, &iter, 0);
957 if (w.first_this_inode && w.have_inode)
958 hash_info = bch2_hash_info_init(c, &w.inode);
960 ret = hash_check_key(&trans, bch2_xattr_hash_desc,
961 &hash_info, &iter, k);
965 bch2_btree_iter_advance(&iter);
971 bch2_trans_iter_exit(&trans, &iter);
972 return bch2_trans_exit(&trans) ?: ret;
975 /* Get root directory, create if it doesn't exist: */
976 static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
978 struct bkey_inode_buf packed;
982 bch_verbose(c, "checking root directory");
984 ret = bch2_trans_do(c, NULL, NULL, 0,
985 lookup_inode(&trans, BCACHEFS_ROOT_INO, root_inode, &snapshot));
986 if (ret && ret != -ENOENT)
989 if (fsck_err_on(ret, c, "root directory missing"))
992 if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
993 "root inode not a directory"))
1000 bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
1002 root_inode->bi_inum = BCACHEFS_ROOT_INO;
1004 bch2_inode_pack(c, &packed, root_inode);
1006 return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
1008 BTREE_INSERT_NOFAIL|
1009 BTREE_INSERT_LAZY_RW);
1016 struct pathbuf_entry {
1021 static int path_down(struct pathbuf *p, u64 inum)
1023 if (p->nr == p->size) {
1024 size_t new_size = max_t(size_t, 256UL, p->size * 2);
1025 void *n = krealloc(p->entries,
1026 new_size * sizeof(p->entries[0]),
1036 p->entries[p->nr++] = (struct pathbuf_entry) {
1042 static int check_path(struct btree_trans *trans,
1044 struct bch_inode_unpacked *inode)
1046 struct bch_fs *c = trans->c;
1053 while (inode->bi_inum != BCACHEFS_ROOT_INO) {
1054 ret = lockrestart_do(trans,
1055 inode_backpointer_exists(trans, inode));
1060 if (fsck_err(c, "unreachable inode %llu, type %u nlink %u backptr %llu:%llu",
1062 mode_to_type(inode->bi_mode),
1065 inode->bi_dir_offset))
1066 ret = reattach_inode(trans, inode);
1071 if (!S_ISDIR(inode->bi_mode))
1074 ret = path_down(p, inode->bi_inum);
1076 bch_err(c, "memory allocation failure");
1080 for (i = 0; i < p->nr; i++) {
1081 if (inode->bi_dir != p->entries[i].inum)
1084 /* XXX print path */
1085 if (!fsck_err(c, "directory structure loop"))
1088 ret = lockrestart_do(trans,
1089 remove_backpointer(trans, inode));
1091 bch_err(c, "error removing dirent: %i", ret);
1095 ret = reattach_inode(trans, inode);
1099 ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
1101 /* Should have been caught in dirents pass */
1102 bch_err(c, "error looking up parent directory: %i", ret);
1108 bch_err(c, "%s: err %i", __func__, ret);
1113 * Check for unreachable inodes, as well as loops in the directory structure:
1114 * After check_dirents(), if an inode backpointer doesn't exist that means it's
1117 static int check_directory_structure(struct bch_fs *c)
1119 struct btree_trans trans;
1120 struct btree_iter iter;
1122 struct bch_inode_unpacked u;
1123 struct pathbuf path = { 0, 0, NULL };
1126 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1128 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
1130 BTREE_ITER_PREFETCH, k, ret) {
1131 if (k.k->type != KEY_TYPE_inode)
1134 ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
1136 /* Should have been caught earlier in fsck: */
1137 bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
1141 ret = check_path(&trans, &path, &u);
1145 bch2_trans_iter_exit(&trans, &iter);
1147 BUG_ON(ret == -EINTR);
1149 kfree(path.entries);
1151 return bch2_trans_exit(&trans) ?: ret;
1154 struct nlink_table {
1165 static int add_nlink(struct nlink_table *t, u64 inum, u32 snapshot)
1167 if (t->nr == t->size) {
1168 size_t new_size = max_t(size_t, 128UL, t->size * 2);
1169 void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
1175 memcpy(d, t->d, t->size * sizeof(t->d[0]));
1183 t->d[t->nr++] = (struct nlink) {
1185 .snapshot = snapshot,
1191 static int nlink_cmp(const void *_l, const void *_r)
1193 const struct nlink *l = _l;
1194 const struct nlink *r = _r;
1196 return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
1199 static void inc_link(struct bch_fs *c, struct nlink_table *links,
1200 u64 range_start, u64 range_end, u64 inum)
1202 struct nlink *link, key = {
1203 .inum = inum, .snapshot = U32_MAX,
1206 if (inum < range_start || inum >= range_end)
1209 link = __inline_bsearch(&key, links->d, links->nr,
1210 sizeof(links->d[0]), nlink_cmp);
1216 static int check_nlinks_find_hardlinks(struct bch_fs *c,
1217 struct nlink_table *t,
1218 u64 start, u64 *end)
1220 struct btree_trans trans;
1221 struct btree_iter iter;
1223 struct bkey_s_c_inode inode;
1224 struct bch_inode_unpacked u;
1227 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1229 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
1232 BTREE_ITER_PREFETCH, k, ret) {
1233 if (k.k->type != KEY_TYPE_inode)
1236 inode = bkey_s_c_to_inode(k);
1239 * Backpointer and directory structure checks are sufficient for
1240 * directories, since they can't have hardlinks:
1242 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
1245 /* Should never fail, checked by bch2_inode_invalid: */
1246 BUG_ON(bch2_inode_unpack(inode, &u));
1251 ret = add_nlink(t, k.k->p.offset, k.k->p.snapshot);
1253 *end = k.k->p.offset;
1259 bch2_trans_iter_exit(&trans, &iter);
1260 bch2_trans_exit(&trans);
1263 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1269 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
1270 u64 range_start, u64 range_end)
1272 struct btree_trans trans;
1273 struct btree_iter iter;
1275 struct bkey_s_c_dirent d;
1278 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1280 for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
1282 BTREE_ITER_PREFETCH, k, ret) {
1283 switch (k.k->type) {
1284 case KEY_TYPE_dirent:
1285 d = bkey_s_c_to_dirent(k);
1287 if (d.v->d_type != DT_DIR)
1288 inc_link(c, links, range_start, range_end,
1289 le64_to_cpu(d.v->d_inum));
1293 bch2_trans_cond_resched(&trans);
1295 bch2_trans_iter_exit(&trans, &iter);
1297 ret = bch2_trans_exit(&trans) ?: ret;
1299 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1305 static int check_nlinks_update_hardlinks(struct bch_fs *c,
1306 struct nlink_table *links,
1307 u64 range_start, u64 range_end)
1309 struct btree_trans trans;
1310 struct btree_iter iter;
1312 struct bkey_s_c_inode inode;
1313 struct bch_inode_unpacked u;
1314 struct nlink *link = links->d;
1317 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1319 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
1320 POS(0, range_start),
1322 BTREE_ITER_PREFETCH, k, ret) {
1323 if (k.k->p.offset >= range_end)
1326 if (k.k->type != KEY_TYPE_inode)
1329 inode = bkey_s_c_to_inode(k);
1330 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
1333 BUG_ON(bch2_inode_unpack(inode, &u));
1338 while (link->inum < k.k->p.offset) {
1340 BUG_ON(link >= links->d + links->nr);
1343 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
1344 "inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)",
1345 u.bi_inum, mode_to_type(u.bi_mode),
1346 bch2_inode_nlink_get(&u), link->count)) {
1347 bch2_inode_nlink_set(&u, link->count);
1349 ret = __bch2_trans_do(&trans, NULL, NULL,
1350 BTREE_INSERT_NOFAIL|
1351 BTREE_INSERT_LAZY_RW,
1352 bch2_btree_iter_traverse(&iter) ?:
1353 bch2_inode_write(&trans, &iter, &u));
1355 bch_err(c, "error in fsck: error %i updating inode", ret);
1359 bch2_trans_iter_exit(&trans, &iter);
1360 bch2_trans_exit(&trans);
1363 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1369 static int check_nlinks(struct bch_fs *c)
1371 struct nlink_table links = { 0 };
1372 u64 this_iter_range_start, next_iter_range_start = 0;
1375 bch_verbose(c, "checking inode nlinks");
1378 this_iter_range_start = next_iter_range_start;
1379 next_iter_range_start = U64_MAX;
1381 ret = check_nlinks_find_hardlinks(c, &links,
1382 this_iter_range_start,
1383 &next_iter_range_start);
1385 ret = check_nlinks_walk_dirents(c, &links,
1386 this_iter_range_start,
1387 next_iter_range_start);
1391 ret = check_nlinks_update_hardlinks(c, &links,
1392 this_iter_range_start,
1393 next_iter_range_start);
1398 } while (next_iter_range_start != U64_MAX);
1406 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1407 * Doesn't fix them yet, mainly because they haven't yet been observed:
1409 int bch2_fsck_full(struct bch_fs *c)
1411 struct bch_inode_unpacked root_inode;
1413 return check_inodes(c, true) ?:
1417 check_root(c, &root_inode) ?:
1418 check_directory_structure(c) ?:
1422 int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1424 return check_inodes(c, false);