X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Ffsck.c;h=62788ae15eff3f9cf5078afaa93f9bd24f89f2db;hb=a2094890a90a2f865e49f94e8448deca7e5852ef;hp=f137b730f96d84b3c95035989d9ef97c2ed67a25;hpb=85ee972555948337bb1a58f0702a4da95db6758f;p=bcachefs-tools-debian diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index f137b73..62788ae 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1,9 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "btree_update.h" #include "dirent.h" #include "error.h" -#include "fs.h" +#include "fs-common.h" #include "fsck.h" #include "inode.h" #include "keylist.h" @@ -15,9 +17,31 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, - struct bkey_s_c_dirent dirent) +static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) +{ + struct btree_iter *iter; + struct bkey_s_c k; + u64 sectors = 0; + int ret; + + for_each_btree_key(trans, iter, BTREE_ID_extents, + POS(inum, 0), 0, k, ret) { + if (k.k->p.inode != inum) + break; + + if (bkey_extent_is_allocation(k.k)) + sectors += k.k->size; + } + + bch2_trans_iter_free(trans, iter); + + return ret ?: sectors; +} + +static int __remove_dirent(struct btree_trans *trans, + struct bkey_s_c_dirent dirent) { + struct bch_fs *c = trans->c; struct qstr name; struct bch_inode_unpacked dir_inode; struct bch_hash_info dir_hash_info; @@ -26,40 +50,46 @@ static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, char *buf; name.len = bch2_dirent_name_bytes(dirent); - buf = kmalloc(name.len + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; + buf = bch2_trans_kmalloc(trans, name.len + 1); + if (IS_ERR(buf)) + return PTR_ERR(buf); memcpy(buf, dirent.v->d_name, name.len); buf[name.len] = '\0'; name.name = buf; - /* Unlock iter so we don't deadlock, after copying name: */ - bch2_btree_iter_unlock(iter); - - ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode); - if (ret) { + ret = __bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode, 0); + if (ret && ret != -EINTR) bch_err(c, "remove_dirent: err %i looking up directory inode", ret); - goto err; - } + if (ret) + return ret; dir_hash_info = bch2_hash_info_init(c, &dir_inode); - ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL); - if (ret) + ret = bch2_hash_delete(trans, bch2_dirent_hash_desc, + &dir_hash_info, dir_inum, &name); + if (ret && ret != -EINTR) bch_err(c, "remove_dirent: err %i deleting dirent", ret); -err: - kfree(buf); - return ret; + if (ret) + return ret; + + return 0; +} + +static int remove_dirent(struct btree_trans *trans, + struct bkey_s_c_dirent dirent) +{ + return __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + __remove_dirent(trans, dirent)); } static int reattach_inode(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, u64 inum) { - struct bch_hash_info lostfound_hash_info = - bch2_hash_info_init(c, lostfound_inode); - struct bkey_inode_buf packed; + struct bch_inode_unpacked dir_u, inode_u; char name_buf[20]; struct qstr name; int ret; @@ -67,28 +97,13 @@ static int reattach_inode(struct bch_fs *c, snprintf(name_buf, sizeof(name_buf), "%llu", inum); name = (struct qstr) QSTR(name_buf); - lostfound_inode->i_nlink++; - - bch2_inode_pack(&packed, lostfound_inode); - - ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, NULL, - BTREE_INSERT_NOFAIL); - if (ret) { - bch_err(c, "error %i reattaching inode %llu while updating lost+found", - ret, inum); - return ret; - } + ret = bch2_trans_do(c, NULL, NULL, + BTREE_INSERT_LAZY_RW, + bch2_link_trans(&trans, lostfound_inode->bi_inum, + inum, &dir_u, &inode_u, &name)); + if (ret) + bch_err(c, "error %i reattaching inode %llu", ret, inum); - ret = bch2_dirent_create(c, lostfound_inode->inum, - &lostfound_hash_info, - DT_DIR, &name, inum, NULL, - BTREE_INSERT_NOFAIL); - if (ret) { - bch_err(c, "error %i reattaching inode %llu while creating new dirent", - ret, inum); - return ret; - } return ret; } @@ -107,18 +122,21 @@ static struct inode_walker inode_walker_init(void) }; } -static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum) +static int walk_inode(struct btree_trans *trans, + struct inode_walker *w, u64 inum) { - w->first_this_inode = inum != w->cur_inum; - w->cur_inum = inum; - - if (w->first_this_inode) { - int ret = bch2_inode_find_by_inum(c, inum, &w->inode); + if (inum != w->cur_inum) { + int ret = __bch2_inode_find_by_inum_trans(trans, inum, + &w->inode, 0); if (ret && ret != -ENOENT) return ret; - w->have_inode = !ret; + w->have_inode = !ret; + w->cur_inum = inum; + w->first_this_inode = true; + } else { + w->first_this_inode = false; } return 0; @@ -126,115 +144,308 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum) struct hash_check { struct bch_hash_info info; - struct btree_iter chain; - struct btree_iter iter; - u64 next; + + /* start of current chain of hash collisions: */ + struct btree_iter *chain; + + /* next offset in current chain of hash collisions: */ + u64 chain_end; }; -static void hash_check_init(const struct bch_hash_desc desc, - struct hash_check *h, struct bch_fs *c) +static void hash_check_init(struct hash_check *h) +{ + h->chain = NULL; + h->chain_end = 0; +} + +static void hash_stop_chain(struct btree_trans *trans, + struct hash_check *h) { - bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN, 0); - bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN, 0); + if (h->chain) + bch2_trans_iter_free(trans, h->chain); + h->chain = NULL; } -static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c, +static void hash_check_set_inode(struct btree_trans *trans, + struct hash_check *h, const struct bch_inode_unpacked *bi) { - h->info = bch2_hash_info_init(c, bi); - h->next = -1; + h->info = bch2_hash_info_init(trans->c, bi); + hash_stop_chain(trans, h); } static int hash_redo_key(const struct bch_hash_desc desc, - struct hash_check *h, struct bch_fs *c, + struct btree_trans *trans, struct hash_check *h, struct btree_iter *k_iter, struct bkey_s_c k, u64 hashed) { + struct bkey_i delete; struct bkey_i *tmp; - int ret = 0; - tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL); - if (!tmp) - return -ENOMEM; + tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); bkey_reassemble(tmp, k); - ret = bch2_btree_delete_at(k_iter, 0); - if (ret) - goto err; + bkey_init(&delete.k); + delete.k.p = k_iter->pos; + bch2_trans_update(trans, k_iter, &delete, 0); - bch2_btree_iter_unlock(k_iter); + return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, + tmp, 0); +} + +static int fsck_hash_delete_at(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct bch_hash_info *info, + struct btree_iter *iter) +{ + int ret; +retry: + ret = bch2_hash_delete_at(trans, desc, info, iter) ?: + bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); + if (ret == -EINTR) { + ret = bch2_btree_iter_traverse(iter); + if (!ret) + goto retry; + } - bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL, tmp, - BTREE_INSERT_NOFAIL| - BCH_HASH_SET_MUST_CREATE); -err: - kfree(tmp); return ret; } -static int hash_check_key(const struct bch_hash_desc desc, - struct hash_check *h, struct bch_fs *c, - struct btree_iter *k_iter, struct bkey_s_c k) +static int hash_check_duplicates(struct btree_trans *trans, + const struct bch_hash_desc desc, struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) { + struct bch_fs *c = trans->c; + struct btree_iter *iter; + struct bkey_s_c k2; char buf[200]; - u64 hashed; int ret = 0; - if (k.k->type != desc.whiteout_type && - k.k->type != desc.key_type) + if (!bkey_cmp(h->chain->pos, k_iter->pos)) return 0; - if (k.k->p.offset != h->next) { - if (!btree_iter_linked(&h->chain)) { - bch2_btree_iter_link(k_iter, &h->chain); - bch2_btree_iter_link(k_iter, &h->iter); + iter = bch2_trans_copy_iter(trans, h->chain); + + for_each_btree_key_continue(iter, 0, k2, ret) { + if (bkey_cmp(k2.k->p, k.k->p) >= 0) + break; + + if (fsck_err_on(k2.k->type == desc.key_type && + !desc.cmp_bkey(k, k2), c, + "duplicate hash table keys:\n%s", + (bch2_bkey_val_to_text(&PBUF(buf), c, + k), buf))) { + ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter); + if (ret) + return ret; + ret = 1; + break; } - bch2_btree_iter_copy(&h->chain, k_iter); } - h->next = k.k->p.offset + 1; +fsck_err: + bch2_trans_iter_free(trans, iter); + return ret; +} + +static void hash_set_chain_start(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) +{ + bool hole = (k.k->type != KEY_TYPE_hash_whiteout && + k.k->type != desc.key_type); + + if (hole || k.k->p.offset > h->chain_end + 1) + hash_stop_chain(trans, h); + + if (!hole) { + if (!h->chain) + h->chain = bch2_trans_copy_iter(trans, k_iter); + + h->chain_end = k.k->p.offset; + } +} + +static bool key_has_correct_hash(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) +{ + u64 hash; + + hash_set_chain_start(trans, desc, h, k_iter, k); + + if (k.k->type != desc.key_type) + return true; + + hash = desc.hash_bkey(&h->info, k); + + return hash >= h->chain->pos.offset && + hash <= k.k->p.offset; +} + +static int hash_check_key(struct btree_trans *trans, + const struct bch_hash_desc desc, struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + char buf[200]; + u64 hashed; + int ret = 0; + + hash_set_chain_start(trans, desc, h, k_iter, k); if (k.k->type != desc.key_type) return 0; hashed = desc.hash_bkey(&h->info, k); - if (fsck_err_on(hashed < h->chain.pos.offset || + if (fsck_err_on(hashed < h->chain->pos.offset || hashed > k.k->p.offset, c, - "hash table key at wrong offset: %llu, " + "hash table key at wrong offset: btree %u, %llu, " "hashed to %llu chain starts at %llu\n%s", - k.k->p.offset, hashed, h->chain.pos.offset, - bch2_bkey_val_to_text(c, desc.btree_id, - buf, sizeof(buf), k))) { - ret = hash_redo_key(desc, h, c, k_iter, k, hashed); + desc.btree_id, k.k->p.offset, + hashed, h->chain->pos.offset, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { + ret = __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + hash_redo_key(desc, trans, h, k_iter, k, hashed)); if (ret) { bch_err(c, "hash_redo_key err %i", ret); return ret; } - return 1; + return -EINTR; } - if (!bkey_cmp(h->chain.pos, k_iter->pos)) + ret = hash_check_duplicates(trans, desc, h, k_iter, k); +fsck_err: + return ret; +} + +static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h, + struct btree_iter *iter, struct bkey_s_c *k) +{ + struct bch_fs *c = trans->c; + struct bkey_i_dirent *d = NULL; + int ret = -EINVAL; + char buf[200]; + unsigned len; + u64 hash; + + if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k)) return 0; - bch2_btree_iter_copy(&h->iter, &h->chain); - while (bkey_cmp(h->iter.pos, k_iter->pos) < 0) { - struct bkey_s_c k2 = bch2_btree_iter_peek(&h->iter); + len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k)); + BUG_ON(!len); - if (fsck_err_on(k2.k->type == desc.key_type && - !desc.cmp_bkey(k, k2), c, - "duplicate hash table keys:\n%s", - bch2_bkey_val_to_text(c, desc.btree_id, - buf, sizeof(buf), k))) { - ret = bch2_hash_delete_at(desc, &h->info, &h->iter, NULL); - if (ret) - return ret; - return 1; - } - bch2_btree_iter_advance_pos(&h->iter); + memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len); + buf[len] = '\0'; + + d = kmalloc(bkey_bytes(k->k), GFP_KERNEL); + if (!d) { + bch_err(c, "memory allocation failure"); + return -ENOMEM; + } + + bkey_reassemble(&d->k_i, *k); + + do { + --len; + if (!len) + goto err_redo; + + d->k.u64s = BKEY_U64s + dirent_val_u64s(len); + + BUG_ON(bkey_val_bytes(&d->k) < + offsetof(struct bch_dirent, d_name) + len); + + memset(d->v.d_name + len, 0, + bkey_val_bytes(&d->k) - + offsetof(struct bch_dirent, d_name) - len); + + hash = bch2_dirent_hash_desc.hash_bkey(&h->info, + bkey_i_to_s_c(&d->k_i)); + } while (hash < h->chain->pos.offset || + hash > k->k->p.offset); + + if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)", + buf, strlen(buf), d->v.d_name, len)) { + ret = __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + (bch2_trans_update(trans, iter, &d->k_i, 0), 0)); + if (ret) + goto err; + + *k = bch2_btree_iter_peek(iter); + + BUG_ON(k->k->type != KEY_TYPE_dirent); } +err: fsck_err: + kfree(d); return ret; +err_redo: + hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k); + + if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n" + "hash table key at wrong offset: btree %u, offset %llu, " + "hashed to %llu chain starts at %llu\n%s", + buf, strlen(buf), BTREE_ID_dirents, + k->k->p.offset, hash, h->chain->pos.offset, + (bch2_bkey_val_to_text(&PBUF(buf), c, + *k), buf))) { + ret = __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + hash_redo_key(bch2_dirent_hash_desc, trans, + h, iter, *k, hash)); + if (ret) + bch_err(c, "hash_redo_key err %i", ret); + else + ret = 1; + } + + goto err; +} + +static int fix_overlapping_extent(struct btree_trans *trans, + struct bkey_s_c k, struct bpos cut_at) +{ + struct btree_iter *iter; + struct bkey_i *u; + int ret; + + u = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + return ret; + + bkey_reassemble(u, k); + bch2_cut_front(cut_at, u); + + + /* + * We don't want to go through the extent_handle_overwrites path: + * + * XXX: this is going to screw up disk accounting, extent triggers + * assume things about extent overwrites - we should be running the + * triggers manually here + */ + iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p, + BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); + + BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); + bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN); + bch2_trans_iter_put(trans, iter); + + return bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); } /* @@ -245,51 +456,105 @@ noinline_for_stack static int check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; - u64 i_sectors; + struct bkey_buf prev; + u64 i_sectors = 0; int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(BCACHEFS_ROOT_INO, 0), 0, k) { - if (k.k->type == KEY_TYPE_DISCARD) - continue; + bch2_bkey_buf_init(&prev); + prev.k->k = KEY(0, 0, 0); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + + bch_verbose(c, "checking extents"); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_INTENT); +retry: + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k))) { + if (w.have_inode && + w.cur_inum != k.k->p.inode && + !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && + fsck_err_on(w.inode.bi_sectors != i_sectors, c, + "inode %llu has incorrect i_sectors: got %llu, should be %llu", + w.inode.bi_inum, + w.inode.bi_sectors, i_sectors)) { + struct btree_iter *inode_iter = + bch2_trans_get_iter(&trans, BTREE_ID_inodes, + POS(0, w.cur_inum), + BTREE_ITER_INTENT); + + w.inode.bi_sectors = i_sectors; + + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + bch2_inode_write(&trans, inode_iter, &w.inode)); + bch2_trans_iter_put(&trans, inode_iter); + if (ret) + break; + } + + if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { + char buf1[200]; + char buf2[200]; + + bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); + bch2_bkey_val_to_text(&PBUF(buf2), c, k); + + if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) + return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR; + } - ret = walk_inode(c, &w, k.k->p.inode); + ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) break; + if (w.first_this_inode) + i_sectors = 0; + if (fsck_err_on(!w.have_inode, c, - "extent type %u for missing inode %llu", - k.k->type, k.k->p.inode) || + "extent type %u for missing inode %llu", + k.k->type, k.k->p.inode) || fsck_err_on(w.have_inode && - !S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c, - "extent type %u for non regular file, inode %llu mode %o", - k.k->type, k.k->p.inode, w.inode.i_mode)) { - ret = bch2_btree_delete_at(&iter, 0); - if (ret) - goto err; - continue; + !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, + "extent type %u for non regular file, inode %llu mode %o", + k.k->type, k.k->p.inode, w.inode.bi_mode)) { + bch2_fs_lazy_rw(c); + return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, + POS(k.k->p.inode, 0), + POS(k.k->p.inode, U64_MAX), + NULL) ?: -EINTR; } - unfixable_fsck_err_on(w.first_this_inode && - w.have_inode && - !(w.inode.i_flags & BCH_INODE_I_SECTORS_DIRTY) && - w.inode.i_sectors != - (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)), - c, "i_sectors wrong: got %llu, should be %llu", - w.inode.i_sectors, i_sectors); - - unfixable_fsck_err_on(w.have_inode && - !(w.inode.i_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != BCH_RESERVATION && - k.k->p.offset > round_up(w.inode.i_size, PAGE_SIZE) >> 9, c, - "extent type %u offset %llu past end of inode %llu, i_size %llu", - k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size); + if (fsck_err_on(w.have_inode && + !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + k.k->type != KEY_TYPE_reservation && + k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c, + "extent type %u offset %llu past end of inode %llu, i_size %llu", + k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { + bch2_fs_lazy_rw(c); + return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, + POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c))), + POS(k.k->p.inode, U64_MAX), + NULL) ?: -EINTR; + } + + if (bkey_extent_is_allocation(k.k)) + i_sectors += k.k->size; + bch2_bkey_buf_reassemble(&prev, c, k); + + bch2_btree_iter_advance(iter); } -err: fsck_err: - return bch2_btree_iter_unlock(&iter) ?: ret; + if (ret == -EINTR) + goto retry; + bch2_trans_iter_put(&trans, iter); + bch2_bkey_buf_exit(&prev, c); + return bch2_trans_exit(&trans) ?: ret; } /* @@ -301,53 +566,63 @@ static int check_dirents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct hash_check h; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; unsigned name_len; char buf[200]; int ret = 0; - hash_check_init(bch2_dirent_hash_desc, &h, c); + bch_verbose(c, "checking dirents"); + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, - POS(BCACHEFS_ROOT_INO, 0), 0, k) { + hash_check_init(&h); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents, + POS(BCACHEFS_ROOT_INO, 0), 0); +retry: + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k))) { struct bkey_s_c_dirent d; struct bch_inode_unpacked target; bool have_target; u64 d_inum; - ret = walk_inode(c, &w, k.k->p.inode); + ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) break; if (fsck_err_on(!w.have_inode, c, "dirent in nonexisting directory:\n%s", - bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, - buf, sizeof(buf), k)) || - fsck_err_on(!S_ISDIR(w.inode.i_mode), c, + (bch2_bkey_val_to_text(&PBUF(buf), c, + k), buf)) || + fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, "dirent in non directory inode type %u:\n%s", - mode_to_type(w.inode.i_mode), - bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, - buf, sizeof(buf), k))) { - ret = bch2_btree_delete_at(&iter, 0); + mode_to_type(w.inode.bi_mode), + (bch2_bkey_val_to_text(&PBUF(buf), c, + k), buf))) { + ret = bch2_btree_delete_at(&trans, iter, 0); if (ret) goto err; continue; } if (w.first_this_inode && w.have_inode) - hash_check_set_inode(&h, c, &w.inode); + hash_check_set_inode(&trans, &h, &w.inode); - ret = hash_check_key(bch2_dirent_hash_desc, &h, c, &iter, k); + ret = check_dirent_hash(&trans, &h, iter, &k); if (ret > 0) { ret = 0; continue; } + if (ret) + goto fsck_err; if (ret) goto fsck_err; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; d = bkey_s_c_to_dirent(k); @@ -361,8 +636,13 @@ static int check_dirents(struct bch_fs *c) ". dirent") || fsck_err_on(name_len == 2 && !memcmp(d.v->d_name, "..", 2), c, - ".. dirent")) { - ret = remove_dirent(c, &iter, d); + ".. dirent") || + fsck_err_on(name_len == 2 && + !memcmp(d.v->d_name, "..", 2), c, + ".. dirent") || + fsck_err_on(memchr(d.v->d_name, '/', name_len), c, + "dirent name has invalid chars")) { + ret = remove_dirent(&trans, d); if (ret) goto err; continue; @@ -370,15 +650,15 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(d_inum == d.k->p.inode, c, "dirent points to own directory:\n%s", - bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, - buf, sizeof(buf), k))) { - ret = remove_dirent(c, &iter, d); + (bch2_bkey_val_to_text(&PBUF(buf), c, + k), buf))) { + ret = remove_dirent(&trans, d); if (ret) goto err; continue; } - ret = bch2_inode_find_by_inum(c, d_inum, &target); + ret = __bch2_inode_find_by_inum_trans(&trans, d_inum, &target, 0); if (ret && ret != -ENOENT) break; @@ -387,21 +667,54 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!have_target, c, "dirent points to missing inode:\n%s", - bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, - buf, sizeof(buf), k))) { - ret = remove_dirent(c, &iter, d); + (bch2_bkey_val_to_text(&PBUF(buf), c, + k), buf))) { + ret = remove_dirent(&trans, d); if (ret) goto err; continue; } + if (!target.bi_nlink && + !(target.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) && + (target.bi_dir != k.k->p.inode || + target.bi_dir_offset != k.k->p.offset) && + (fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c, + "inode %llu has wrong backpointer:\n" + "got %llu:%llu\n" + "should be %llu:%llu", + d_inum, + target.bi_dir, + target.bi_dir_offset, + k.k->p.inode, + k.k->p.offset) || + c->opts.version_upgrade)) { + struct bkey_inode_buf p; + + target.bi_dir = k.k->p.inode; + target.bi_dir_offset = k.k->p.offset; + bch2_trans_unlock(&trans); + + bch2_inode_pack(c, &p, &target); + + ret = bch2_btree_insert(c, BTREE_ID_inodes, + &p.inode.k_i, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); + if (ret) { + bch_err(c, "error in fsck: error %i updating inode", ret); + goto err; + } + continue; + } + if (fsck_err_on(have_target && d.v->d_type != - mode_to_type(le16_to_cpu(target.i_mode)), c, + mode_to_type(target.bi_mode), c, "incorrect d_type: should be %u:\n%s", - mode_to_type(le16_to_cpu(target.i_mode)), - bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, - buf, sizeof(buf), k))) { + mode_to_type(target.bi_mode), + (bch2_bkey_val_to_text(&PBUF(buf), c, + k), buf))) { struct bkey_i_dirent *n; n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); @@ -411,22 +724,30 @@ static int check_dirents(struct bch_fs *c) } bkey_reassemble(&n->k_i, d.s_c); - n->v.d_type = mode_to_type(le16_to_cpu(target.i_mode)); + n->v.d_type = mode_to_type(target.bi_mode); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, - BTREE_INSERT_NOFAIL, - BTREE_INSERT_ENTRY(&iter, &n->k_i)); + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + (bch2_trans_update(&trans, iter, &n->k_i, 0), 0)); kfree(n); if (ret) goto err; } + + bch2_btree_iter_advance(iter); } + + hash_stop_chain(&trans, &h); err: fsck_err: - bch2_btree_iter_unlock(&h.chain); - bch2_btree_iter_unlock(&h.iter); - return bch2_btree_iter_unlock(&iter) ?: ret; + if (ret == -EINTR) + goto retry; + + bch2_trans_iter_put(&trans, h.chain); + bch2_trans_iter_put(&trans, iter); + return bch2_trans_exit(&trans) ?: ret; } /* @@ -437,39 +758,52 @@ static int check_xattrs(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct hash_check h; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = 0; - hash_check_init(bch2_xattr_hash_desc, &h, c); + bch_verbose(c, "checking xattrs"); - for_each_btree_key(&iter, c, BTREE_ID_XATTRS, - POS(BCACHEFS_ROOT_INO, 0), 0, k) { - ret = walk_inode(c, &w, k.k->p.inode); + hash_check_init(&h); + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, + POS(BCACHEFS_ROOT_INO, 0), 0); +retry: + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k))) { + ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) break; if (fsck_err_on(!w.have_inode, c, "xattr for missing inode %llu", k.k->p.inode)) { - ret = bch2_btree_delete_at(&iter, 0); + ret = bch2_btree_delete_at(&trans, iter, 0); if (ret) - goto err; + break; continue; } if (w.first_this_inode && w.have_inode) - hash_check_set_inode(&h, c, &w.inode); + hash_check_set_inode(&trans, &h, &w.inode); - ret = hash_check_key(bch2_xattr_hash_desc, &h, c, &iter, k); + ret = hash_check_key(&trans, bch2_xattr_hash_desc, + &h, iter, k); if (ret) - goto fsck_err; + break; + + bch2_btree_iter_advance(iter); } -err: fsck_err: - bch2_btree_iter_unlock(&h.chain); - bch2_btree_iter_unlock(&h.iter); - return bch2_btree_iter_unlock(&iter) ?: ret; + if (ret == -EINTR) + goto retry; + + bch2_trans_iter_put(&trans, h.chain); + bch2_trans_iter_put(&trans, iter); + return bch2_trans_exit(&trans) ?: ret; } /* Get root directory, create if it doesn't exist: */ @@ -478,14 +812,18 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) struct bkey_inode_buf packed; int ret; - ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode); + bch_verbose(c, "checking root directory"); + + ret = bch2_trans_do(c, NULL, NULL, 0, + __bch2_inode_find_by_inum_trans(&trans, BCACHEFS_ROOT_INO, + root_inode, 0)); if (ret && ret != -ENOENT) return ret; if (fsck_err_on(ret, c, "root directory missing")) goto create_root; - if (fsck_err_on(!S_ISDIR(root_inode->i_mode), c, + if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c, "root inode not a directory")) goto create_root; @@ -493,13 +831,16 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) fsck_err: return ret; create_root: - bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); - root_inode->inum = BCACHEFS_ROOT_INO; + bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755, + 0, NULL); + root_inode->bi_inum = BCACHEFS_ROOT_INO; - bch2_inode_pack(&packed, root_inode); + bch2_inode_pack(c, &packed, root_inode); - return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, NULL, BTREE_INSERT_NOFAIL); + return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, + NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); } /* Get lost+found, create if it doesn't exist: */ @@ -510,10 +851,11 @@ static int check_lostfound(struct bch_fs *c, struct qstr lostfound = QSTR("lost+found"); struct bch_hash_info root_hash_info = bch2_hash_info_init(c, root_inode); - struct bkey_inode_buf packed; u64 inum; int ret; + bch_verbose(c, "checking lost+found"); + inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info, &lostfound); if (!inum) { @@ -521,14 +863,15 @@ static int check_lostfound(struct bch_fs *c, goto create_lostfound; } - ret = bch2_inode_find_by_inum(c, inum, lostfound_inode); + ret = bch2_trans_do(c, NULL, NULL, 0, + __bch2_inode_find_by_inum_trans(&trans, inum, lostfound_inode, 0)); if (ret && ret != -ENOENT) return ret; if (fsck_err_on(ret, c, "lost+found missing")) goto create_lostfound; - if (fsck_err_on(!S_ISDIR(lostfound_inode->i_mode), c, + if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c, "lost+found inode not a directory")) goto create_lostfound; @@ -536,60 +879,37 @@ static int check_lostfound(struct bch_fs *c, fsck_err: return ret; create_lostfound: - root_inode->i_nlink++; - - bch2_inode_pack(&packed, root_inode); - - ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, NULL, BTREE_INSERT_NOFAIL); - if (ret) - return ret; - - bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); - - ret = bch2_inode_create(c, lostfound_inode, BLOCKDEV_INODE_MAX, 0, - &c->unused_inode_hint); + bch2_inode_init_early(c, lostfound_inode); + + ret = bch2_trans_do(c, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + bch2_create_trans(&trans, + BCACHEFS_ROOT_INO, root_inode, + lostfound_inode, &lostfound, + 0, 0, S_IFDIR|0700, 0, NULL, NULL)); if (ret) - return ret; + bch_err(c, "error creating lost+found: %i", ret); - ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR, - &lostfound, lostfound_inode->inum, NULL, - BTREE_INSERT_NOFAIL); - if (ret) - return ret; - - return 0; + return ret; } -struct inode_bitmap { - unsigned long *bits; - size_t size; -}; +typedef GENRADIX(unsigned long) inode_bitmap; -static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr) +static inline bool inode_bitmap_test(inode_bitmap *b, size_t nr) { - return nr < b->size ? test_bit(nr, b->bits) : false; + unsigned long *w = genradix_ptr(b, nr / BITS_PER_LONG); + return w ? test_bit(nr & (BITS_PER_LONG - 1), w) : false; } -static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr) +static inline int inode_bitmap_set(inode_bitmap *b, size_t nr) { - if (nr >= b->size) { - size_t new_size = max(max(PAGE_SIZE * 8, - b->size * 2), - nr + 1); - void *n; - - new_size = roundup_pow_of_two(new_size); - n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO); - if (!n) { - return -ENOMEM; - } + unsigned long *w = genradix_ptr_alloc(b, nr / BITS_PER_LONG, GFP_KERNEL); - b->bits = n; - b->size = new_size; - } + if (!w) + return -ENOMEM; - __set_bit(nr, b->bits); + *w |= 1UL << (nr & (BITS_PER_LONG - 1)); return 0; } @@ -606,7 +926,7 @@ struct pathbuf { static int path_down(struct pathbuf *p, u64 inum) { if (p->nr == p->size) { - size_t new_size = max(256UL, p->size * 2); + size_t new_size = max_t(size_t, 256UL, p->size * 2); void *n = krealloc(p->entries, new_size * sizeof(p->entries[0]), GFP_KERNEL); @@ -628,18 +948,24 @@ noinline_for_stack static int check_directory_structure(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode) { - struct inode_bitmap dirs_done = { NULL, 0 }; + inode_bitmap dirs_done; struct pathbuf path = { 0, 0, NULL }; struct pathbuf_entry *e; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; bool had_unreachable; u64 d_inum; int ret = 0; + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + + bch_verbose(c, "checking directory structure"); + /* DFS: */ restart_dfs: + genradix_init(&dirs_done); had_unreachable = false; ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO); @@ -649,9 +975,8 @@ restart_dfs: } ret = path_down(&path, BCACHEFS_ROOT_INO); - if (ret) { - return ret; - } + if (ret) + goto err; while (path.nr) { next: @@ -660,14 +985,14 @@ next: if (e->offset == U64_MAX) goto up; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, - POS(e->inum, e->offset + 1), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_dirents, + POS(e->inum, e->offset + 1), 0, k, ret) { if (k.k->p.inode != e->inum) break; e->offset = k.k->p.offset; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); @@ -680,7 +1005,7 @@ next: if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, "directory %llu has multiple hardlinks", d_inum)) { - ret = remove_dirent(c, &iter, dirent); + ret = remove_dirent(&trans, dirent); if (ret) goto err; continue; @@ -697,10 +1022,14 @@ next: goto err; } - bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter); + if (ret) { + bch_err(c, "btree error %i in fsck", ret); + goto err; + } goto next; } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter) ?: ret; if (ret) { bch_err(c, "btree error %i in fsck", ret); goto err; @@ -709,17 +1038,27 @@ up: path.nr--; } - for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { - if (k.k->type != BCH_INODE_FS || - !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode))) + iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS_MIN, 0); +retry: + for_each_btree_key_continue(iter, 0, k, ret) { + if (k.k->type != KEY_TYPE_inode) + continue; + + if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode))) + continue; + + ret = bch2_empty_dir_trans(&trans, k.k->p.inode); + if (ret == -EINTR) + goto retry; + if (!ret) continue; - if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, + if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c, "unreachable directory found (inum %llu)", - k.k->p.inode)) { - bch2_btree_iter_unlock(&iter); + k.k->p.offset)) { + bch2_trans_unlock(&trans); - ret = reattach_inode(c, lostfound_inode, k.k->p.inode); + ret = reattach_inode(c, lostfound_inode, k.k->p.offset); if (ret) { goto err; } @@ -727,27 +1066,24 @@ up: had_unreachable = true; } } - ret = bch2_btree_iter_unlock(&iter); + bch2_trans_iter_free(&trans, iter); if (ret) goto err; if (had_unreachable) { bch_info(c, "reattached unreachable directories, restarting pass to check for loops"); - kfree(dirs_done.bits); + genradix_free(&dirs_done); kfree(path.entries); memset(&dirs_done, 0, sizeof(dirs_done)); memset(&path, 0, sizeof(path)); goto restart_dfs; } - -out: - kfree(dirs_done.bits); - kfree(path.entries); - return ret; err: fsck_err: - ret = bch2_btree_iter_unlock(&iter) ?: ret; - goto out; + ret = bch2_trans_exit(&trans) ?: ret; + genradix_free(&dirs_done); + kfree(path.entries); + return ret; } struct nlink { @@ -766,9 +1102,14 @@ static void inc_link(struct bch_fs *c, nlink_table *links, if (inum < range_start || inum >= *range_end) return; + if (inum - range_start >= SIZE_MAX / sizeof(struct nlink)) { + *range_end = inum; + return; + } + link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL); if (!link) { - bch_verbose(c, "allocation failed during fs gc - will need another pass"); + bch_verbose(c, "allocation failed during fsck - will need another pass"); *range_end = inum; return; } @@ -783,17 +1124,20 @@ noinline_for_stack static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, u64 range_start, u64 *range_end) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent d; u64 d_inum; int ret; + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN, 0, k, ret) { switch (k.k->type) { - case BCH_DIRENT: + case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); @@ -807,115 +1151,168 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, break; } - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter); + bch2_trans_iter_put(&trans, iter); + + ret = bch2_trans_exit(&trans) ?: ret; if (ret) - bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); + bch_err(c, "error in fsck: btree error %i while walking dirents", ret); return ret; } -s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum) +static int check_inode_nlink(struct bch_fs *c, + struct bch_inode_unpacked *lostfound_inode, + struct bch_inode_unpacked *u, + struct nlink *link, + bool *do_update) { - struct btree_iter iter; - struct bkey_s_c k; - u64 sectors = 0; + u32 i_nlink = bch2_inode_nlink_get(u); + u32 real_i_nlink = + link->count * nlink_bias(u->bi_mode) + + link->dir_count; + int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) { - if (k.k->p.inode != inum) - break; + /* + * These should have been caught/fixed by earlier passes, we don't + * repair them here: + */ + if (S_ISDIR(u->bi_mode) && link->count > 1) { + need_fsck_err(c, "directory %llu with multiple hardlinks: %u", + u->bi_inum, link->count); + return 0; + } - if (bkey_extent_is_allocation(k.k)) - sectors += k.k->size; + if (S_ISDIR(u->bi_mode) && !link->count) { + need_fsck_err(c, "unreachable directory found (inum %llu)", + u->bi_inum); + return 0; + } + + if (!S_ISDIR(u->bi_mode) && link->dir_count) { + need_fsck_err(c, "non directory with subdirectories (inum %llu)", + u->bi_inum); + return 0; + } + + if (!link->count && + !(u->bi_flags & BCH_INODE_UNLINKED) && + (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) { + if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)", + u->bi_inum, mode_to_type(u->bi_mode)) == + FSCK_ERR_IGNORE) + return 0; + + ret = reattach_inode(c, lostfound_inode, u->bi_inum); + if (ret) + return ret; + + link->count = 1; + real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count; + goto set_i_nlink; + } + + if (i_nlink < link->count) { + if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)", + u->bi_inum, i_nlink, link->count, + mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE) + return 0; + goto set_i_nlink; + } + + if (i_nlink != real_i_nlink && + c->sb.clean) { + if (fsck_err(c, "filesystem marked clean, " + "but inode %llu has wrong i_nlink " + "(type %u i_nlink %u, should be %u)", + u->bi_inum, mode_to_type(u->bi_mode), + i_nlink, real_i_nlink) == FSCK_ERR_IGNORE) + return 0; + goto set_i_nlink; + } + + if (i_nlink != real_i_nlink && + (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) { + if (fsck_err(c, "inode %llu has wrong i_nlink " + "(type %u i_nlink %u, should be %u)", + u->bi_inum, mode_to_type(u->bi_mode), + i_nlink, real_i_nlink) == FSCK_ERR_IGNORE) + return 0; + goto set_i_nlink; } - return bch2_btree_iter_unlock(&iter) ?: sectors; + if (real_i_nlink && i_nlink != real_i_nlink) + bch_verbose(c, "setting inode %llu nlink from %u to %u", + u->bi_inum, i_nlink, real_i_nlink); +set_i_nlink: + if (i_nlink != real_i_nlink) { + bch2_inode_nlink_set(u, real_i_nlink); + *do_update = true; + } +fsck_err: + return ret; } -static int bch2_gc_do_inode(struct bch_fs *c, - struct bch_inode_unpacked *lostfound_inode, - struct btree_iter *iter, - struct bkey_s_c_inode inode, struct nlink link) +static int check_inode(struct btree_trans *trans, + struct bch_inode_unpacked *lostfound_inode, + struct btree_iter *iter, + struct bkey_s_c_inode inode, + struct nlink *link) { + struct bch_fs *c = trans->c; struct bch_inode_unpacked u; - int ret = 0; - u32 i_nlink, real_i_nlink; bool do_update = false; + int ret = 0; ret = bch2_inode_unpack(inode, &u); + + bch2_trans_unlock(trans); + if (bch2_fs_inconsistent_on(ret, c, "error unpacking inode %llu in fsck", inode.k->p.inode)) return ret; - i_nlink = u.i_nlink + nlink_bias(u.i_mode); - - fsck_err_on(i_nlink < link.count, c, - "inode %llu i_link too small (%u < %u, type %i)", - inode.k->p.inode, i_nlink, - link.count, mode_to_type(u.i_mode)); - - /* These should have been caught/fixed by earlier passes: */ - if (S_ISDIR(u.i_mode)) { - need_fsck_err_on(link.count > 1, c, - "directory %llu with multiple hardlinks: %u", - inode.k->p.inode, link.count); - - real_i_nlink = link.count * 2 + link.dir_count; - } else { - need_fsck_err_on(link.dir_count, c, - "found dirents for non directory %llu", - inode.k->p.inode); - - real_i_nlink = link.count + link.dir_count; + if (link) { + ret = check_inode_nlink(c, lostfound_inode, &u, link, + &do_update); + if (ret) + return ret; } - if (!link.count) { - fsck_err_on(c->sb.clean, c, - "filesystem marked clean, " - "but found orphaned inode %llu", - inode.k->p.inode); - - if (fsck_err_on(S_ISDIR(u.i_mode) && - bch2_empty_dir(c, inode.k->p.inode), c, - "non empty directory with link count 0, " - "inode nlink %u, dir links found %u", - i_nlink, link.dir_count)) { - ret = reattach_inode(c, lostfound_inode, - inode.k->p.inode); - if (ret) - return ret; - } + if (u.bi_flags & BCH_INODE_UNLINKED && + (!c->sb.clean || + fsck_err(c, "filesystem marked clean, but inode %llu unlinked", + u.bi_inum))) { + bch_verbose(c, "deleting inode %llu", u.bi_inum); - bch_verbose(c, "deleting inode %llu", inode.k->p.inode); + bch2_fs_lazy_rw(c); - ret = bch2_inode_rm(c, inode.k->p.inode); + ret = bch2_inode_rm(c, u.bi_inum, false); if (ret) - bch_err(c, "error in fs gc: error %i " - "while deleting inode", ret); + bch_err(c, "error in fsck: error %i while deleting inode", ret); return ret; } - if (u.i_flags & BCH_INODE_I_SIZE_DIRTY) { - fsck_err_on(c->sb.clean, c, - "filesystem marked clean, " - "but inode %llu has i_size dirty", - inode.k->p.inode); + if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY && + (!c->sb.clean || + fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty", + u.bi_inum))) { + bch_verbose(c, "truncating inode %llu", u.bi_inum); - bch_verbose(c, "truncating inode %llu", inode.k->p.inode); + bch2_fs_lazy_rw(c); /* * XXX: need to truncate partial blocks too here - or ideally * just switch units to bytes and that issue goes away */ - - ret = bch2_inode_truncate(c, inode.k->p.inode, - round_up(u.i_size, PAGE_SIZE) >> 9, - NULL, NULL); + ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, + POS(u.bi_inum, round_up(u.bi_size, block_bytes(c))), + POS(u.bi_inum, U64_MAX), + NULL); if (ret) { - bch_err(c, "error in fs gc: error %i " - "truncating inode", ret); + bch_err(c, "error in fsck: error %i truncating inode", ret); return ret; } @@ -923,60 +1320,54 @@ static int bch2_gc_do_inode(struct bch_fs *c, * We truncated without our normal sector accounting hook, just * make sure we recalculate it: */ - u.i_flags |= BCH_INODE_I_SECTORS_DIRTY; + u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY; - u.i_flags &= ~BCH_INODE_I_SIZE_DIRTY; + u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; do_update = true; } - if (u.i_flags & BCH_INODE_I_SECTORS_DIRTY) { + if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY && + (!c->sb.clean || + fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty", + u.bi_inum))) { s64 sectors; - fsck_err_on(c->sb.clean, c, - "filesystem marked clean, " - "but inode %llu has i_sectors dirty", - inode.k->p.inode); - bch_verbose(c, "recounting sectors for inode %llu", - inode.k->p.inode); + u.bi_inum); - sectors = bch2_count_inode_sectors(c, inode.k->p.inode); + sectors = bch2_count_inode_sectors(trans, u.bi_inum); if (sectors < 0) { - bch_err(c, "error in fs gc: error %i " - "recounting inode sectors", + bch_err(c, "error in fsck: error %i recounting inode sectors", (int) sectors); return sectors; } - u.i_sectors = sectors; - u.i_flags &= ~BCH_INODE_I_SECTORS_DIRTY; + u.bi_sectors = sectors; + u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY; do_update = true; } - if (i_nlink != real_i_nlink) { - fsck_err_on(c->sb.clean, c, - "filesystem marked clean, " - "but inode %llu has wrong i_nlink " - "(type %u i_nlink %u, should be %u)", - inode.k->p.inode, mode_to_type(u.i_mode), - i_nlink, real_i_nlink); - - bch_verbose(c, "setting inode %llu nlinks from %u to %u", - inode.k->p.inode, i_nlink, real_i_nlink); - u.i_nlink = real_i_nlink - nlink_bias(u.i_mode);; + if (!S_ISDIR(u.bi_mode) && + u.bi_nlink && + !(u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) && + (fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c, + "inode missing BCH_INODE_BACKPTR_UNTRUSTED flags") || + c->opts.version_upgrade)) { + u.bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED; do_update = true; } if (do_update) { struct bkey_inode_buf p; - bch2_inode_pack(&p, &u); + bch2_inode_pack(c, &p, &u); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, - BTREE_INSERT_NOFAIL, - BTREE_INSERT_ENTRY(iter, &p.inode.k_i)); - if (ret && ret != -EINTR) - bch_err(c, "error in fs gc: error %i " + ret = __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + (bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0)); + if (ret) + bch_err(c, "error in fsck: error %i " "updating inode", ret); } fsck_err: @@ -985,56 +1376,52 @@ fsck_err: noinline_for_stack static int bch2_gc_walk_inodes(struct bch_fs *c, - struct bch_inode_unpacked *lostfound_inode, - nlink_table *links, - u64 range_start, u64 range_end) + struct bch_inode_unpacked *lostfound_inode, + nlink_table *links, + u64 range_start, u64 range_end) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct nlink *link, zero_links = { 0, 0 }; struct genradix_iter nlinks_iter; int ret = 0, ret2 = 0; u64 nlinks_pos; - bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0); - genradix_iter_init(&nlinks_iter); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - while ((k = bch2_btree_iter_peek(&iter)).k && - !btree_iter_err(k)) { + iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, + POS(0, range_start), 0); + nlinks_iter = genradix_iter_init(links, 0); + + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret2 = bkey_err(k)) && + iter->pos.offset < range_end) { peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); - if (!link && (!k.k || iter.pos.inode >= range_end)) + if (!link && (!k.k || iter->pos.offset >= range_end)) break; nlinks_pos = range_start + nlinks_iter.pos; - if (iter.pos.inode > nlinks_pos) { + + if (link && nlinks_pos < iter->pos.offset) { /* Should have been caught by dirents pass: */ - need_fsck_err_on(link && link->count, c, + need_fsck_err_on(link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); genradix_iter_advance(&nlinks_iter, links); goto peek_nlinks; } - if (iter.pos.inode < nlinks_pos || !link) + if (!link || nlinks_pos > iter->pos.offset) link = &zero_links; - if (k.k && k.k->type == BCH_INODE_FS) { - /* - * Avoid potential deadlocks with iter for - * truncate/rm/etc.: - */ - bch2_btree_iter_unlock(&iter); - - ret = bch2_gc_do_inode(c, lostfound_inode, &iter, - bkey_s_c_to_inode(k), *link); - if (ret == -EINTR) - continue; + if (k.k && k.k->type == KEY_TYPE_inode) { + ret = check_inode(&trans, lostfound_inode, iter, + bkey_s_c_to_inode(k), link); + BUG_ON(ret == -EINTR); if (ret) break; - - if (link->count) - atomic_long_inc(&c->nr_inodes); } else { /* Should have been caught by dirents pass: */ need_fsck_err_on(link->count, c, @@ -1042,16 +1429,18 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); nlinks_pos, link->count); } - if (nlinks_pos == iter.pos.inode) + if (nlinks_pos == iter->pos.offset) genradix_iter_advance(&nlinks_iter, links); - bch2_btree_iter_advance_pos(&iter); - bch2_btree_iter_cond_resched(&iter); + bch2_btree_iter_advance(iter); + bch2_trans_cond_resched(&trans); } fsck_err: - ret2 = bch2_btree_iter_unlock(&iter); + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); + if (ret2) - bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2); + bch_err(c, "error in fsck: btree error %i while walking inodes", ret2); return ret ?: ret2; } @@ -1064,6 +1453,8 @@ static int check_inode_nlinks(struct bch_fs *c, u64 this_iter_range_start, next_iter_range_start = 0; int ret = 0; + bch_verbose(c, "checking inode nlinks"); + genradix_init(&links); do { @@ -1094,64 +1485,56 @@ static int check_inode_nlinks(struct bch_fs *c, * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: */ -int bch2_fsck(struct bch_fs *c, bool full_fsck) +int bch2_fsck_full(struct bch_fs *c) { struct bch_inode_unpacked root_inode, lostfound_inode; - int ret; - if (full_fsck) { - bch_verbose(c, "checking extents"); - ret = check_extents(c); - if (ret) - return ret; - - bch_verbose(c, "checking dirents"); - ret = check_dirents(c); - if (ret) - return ret; + return check_extents(c) ?: + check_dirents(c) ?: + check_xattrs(c) ?: + check_root(c, &root_inode) ?: + check_lostfound(c, &root_inode, &lostfound_inode) ?: + check_directory_structure(c, &lostfound_inode) ?: + check_inode_nlinks(c, &lostfound_inode); +} - bch_verbose(c, "checking xattrs"); - ret = check_xattrs(c); - if (ret) - return ret; +int bch2_fsck_inode_nlink(struct bch_fs *c) +{ + struct bch_inode_unpacked root_inode, lostfound_inode; - bch_verbose(c, "checking root directory"); - ret = check_root(c, &root_inode); - if (ret) - return ret; + return check_root(c, &root_inode) ?: + check_lostfound(c, &root_inode, &lostfound_inode) ?: + check_inode_nlinks(c, &lostfound_inode); +} - bch_verbose(c, "checking lost+found"); - ret = check_lostfound(c, &root_inode, &lostfound_inode); - if (ret) - return ret; +int bch2_fsck_walk_inodes_only(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; + struct bkey_s_c_inode inode; + int ret; - bch_verbose(c, "checking directory structure"); - ret = check_directory_structure(c, &lostfound_inode); - if (ret) - return ret; + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - bch_verbose(c, "checking inode nlinks"); - ret = check_inode_nlinks(c, &lostfound_inode); - if (ret) - return ret; - } else { - bch_verbose(c, "checking root directory"); - ret = check_root(c, &root_inode); - if (ret) - return ret; + for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, 0, k, ret) { + if (k.k->type != KEY_TYPE_inode) + continue; - bch_verbose(c, "checking lost+found"); - ret = check_lostfound(c, &root_inode, &lostfound_inode); - if (ret) - return ret; + inode = bkey_s_c_to_inode(k); - bch_verbose(c, "checking inode nlinks"); - ret = check_inode_nlinks(c, &lostfound_inode); - if (ret) - return ret; + if (inode.v->bi_flags & + (BCH_INODE_I_SIZE_DIRTY| + BCH_INODE_I_SECTORS_DIRTY| + BCH_INODE_UNLINKED)) { + ret = check_inode(&trans, NULL, iter, inode, NULL); + if (ret) + break; + } } + bch2_trans_iter_put(&trans, iter); - bch2_flush_fsck_errs(c); + BUG_ON(ret == -EINTR); - return 0; + return bch2_trans_exit(&trans) ?: ret; }