+ if (hash_k.k->type != desc.key_type)
+ return 0;
+
+ hash = desc.hash_bkey(hash_info, hash_k);
+
+ if (likely(hash == hash_k.k->p.offset))
+ return 0;
+
+ if (hash_k.k->p.offset < hash)
+ goto bad_hash;
+
+ for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash),
+ BTREE_ITER_SLOTS, k, ret) {
+ if (!bkey_cmp(k.k->p, hash_k.k->p))
+ break;
+
+ if (fsck_err_on(k.k->type == desc.key_type &&
+ !desc.cmp_bkey(k, hash_k), c,
+ "duplicate hash table keys:\n%s",
+ (bch2_bkey_val_to_text(&PBUF(buf), c,
+ hash_k), buf))) {
+ ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter);
+ if (ret)
+ return ret;
+ ret = 1;
+ break;
+ }
+
+ if (bkey_deleted(k.k)) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto bad_hash;
+ }
+
+ }
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+bad_hash:
+ if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
+ "hashed to %llu\n%s",
+ desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash,
+ (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
+ return 0;
+
+ ret = __bch2_trans_do(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+ hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
+ if (ret) {
+ bch_err(c, "hash_redo_key err %i", ret);
+ return ret;
+ }
+ return -EINTR;
+fsck_err:
+ return ret;
+}
+
+static int check_inode(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *prev,
+ struct bch_inode_unpacked u)
+{
+ struct bch_fs *c = trans->c;
+ bool do_update = false;
+ int ret = 0;
+
+ if (fsck_err_on(prev &&
+ (prev->bi_hash_seed != u.bi_hash_seed ||
+ mode_to_type(prev->bi_mode) != mode_to_type(u.bi_mode)), c,
+ "inodes in different snapshots don't match")) {
+ bch_err(c, "repair not implemented yet");
+ return -EINVAL;
+ }
+
+ if (u.bi_flags & BCH_INODE_UNLINKED &&
+ (!c->sb.clean ||
+ fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
+ u.bi_inum))) {
+ bch2_trans_unlock(trans);
+ bch2_fs_lazy_rw(c);
+
+ ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
+ if (ret)
+ bch_err(c, "error in fsck: error %i while deleting inode", ret);
+ return ret;
+ }
+
+ if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
+ (!c->sb.clean ||
+ fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
+ u.bi_inum))) {
+ bch_verbose(c, "truncating inode %llu", u.bi_inum);
+
+ bch2_trans_unlock(trans);
+ bch2_fs_lazy_rw(c);
+
+ /*
+ * XXX: need to truncate partial blocks too here - or ideally
+ * just switch units to bytes and that issue goes away
+ */
+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
+ SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
+ iter->pos.snapshot),
+ POS(u.bi_inum, U64_MAX),
+ 0, NULL);
+ if (ret) {
+ bch_err(c, "error in fsck: error %i truncating inode", ret);
+ return ret;
+ }
+
+ /*
+ * We truncated without our normal sector accounting hook, just
+ * make sure we recalculate it:
+ */
+ u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
+
+ u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+ do_update = true;
+ }
+
+ if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
+ (!c->sb.clean ||
+ fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
+ u.bi_inum))) {
+ s64 sectors;
+
+ bch_verbose(c, "recounting sectors for inode %llu",
+ u.bi_inum);
+
+ sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
+ if (sectors < 0) {
+ bch_err(c, "error in fsck: error %i recounting inode sectors",
+ (int) sectors);
+ return sectors;
+ }
+
+ u.bi_sectors = sectors;
+ u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
+ do_update = true;
+ }
+
+ if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
+ u.bi_dir = 0;
+ u.bi_dir_offset = 0;
+ u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
+ do_update = true;
+ }
+
+ if (do_update) {
+ ret = write_inode(trans, &u, iter->pos.snapshot);
+ if (ret)
+ bch_err(c, "error in fsck: error %i "
+ "updating inode", ret);
+ }
+fsck_err:
+ return ret;
+}
+
+noinline_for_stack
+static int check_inodes(struct bch_fs *c, bool full)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_inode inode;
+ struct bch_inode_unpacked prev, u;
+ int ret;
+
+ memset(&prev, 0, sizeof(prev));
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+ for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ ret = check_key_has_snapshot(&trans, &iter, k);
+ if (ret)
+ break;
+
+ /*
+ * if snapshot id isn't a leaf node, skip it - deletion in
+ * particular is not atomic, so on the internal snapshot nodes
+ * we can see inodes marked for deletion after a clean shutdown
+ */
+ if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
+ continue;
+
+ if (k.k->type != KEY_TYPE_inode)
+ continue;
+
+ inode = bkey_s_c_to_inode(k);
+
+ if (!full &&
+ !(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
+ BCH_INODE_I_SECTORS_DIRTY|
+ BCH_INODE_UNLINKED)))
+ continue;
+
+ BUG_ON(bch2_inode_unpack(inode, &u));
+
+ ret = check_inode(&trans, &iter,
+ full && prev.bi_inum == u.bi_inum
+ ? &prev : NULL, u);
+ if (ret)
+ break;
+
+ prev = u;
+ }
+ bch2_trans_iter_exit(&trans, &iter);
+
+ BUG_ON(ret == -EINTR);
+
+ return bch2_trans_exit(&trans) ?: ret;
+}
+
+noinline_for_stack
+static int check_subvols(struct bch_fs *c)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+ for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN,
+ 0, k, ret) {
+ }
+ bch2_trans_iter_exit(&trans, &iter);
+
+ bch2_trans_exit(&trans);
+ return ret;
+}
+
+/*
+ * Checking for overlapping extents needs to be reimplemented
+ */
+#if 0
+static int fix_overlapping_extent(struct btree_trans *trans,
+ struct bkey_s_c k, struct bpos cut_at)
+{
+ struct btree_iter iter;
+ struct bkey_i *u;
+ int ret;
+
+ u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ return ret;
+
+ bkey_reassemble(u, k);
+ bch2_cut_front(cut_at, u);
+
+
+ /*
+ * We don't want to go through the extent_handle_overwrites path:
+ *
+ * XXX: this is going to screw up disk accounting, extent triggers
+ * assume things about extent overwrites - we should be running the
+ * triggers manually here
+ */
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
+ BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
+
+ BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+#endif
+
+static int inode_backpointer_exists(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ u32 snapshot)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u32 target_subvol, target_snapshot;
+ u64 target_inum;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
+ SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot), 0);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto out;
+ if (k.k->type != KEY_TYPE_dirent)
+ goto out;
+
+ ret = __bch2_dirent_read_target(trans, bkey_s_c_to_dirent(k),
+ &target_subvol,
+ &target_snapshot,
+ &target_inum,
+ true);
+ if (ret)
+ goto out;
+
+ ret = target_inum == inode->bi_inum;
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static bool inode_backpointer_matches(struct bkey_s_c_dirent d,
+ struct bch_inode_unpacked *inode)
+{
+ return d.k->p.inode == inode->bi_dir &&
+ d.k->p.offset == inode->bi_dir_offset;
+}
+
+static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
+{
+ struct bch_fs *c = trans->c;
+ struct inode_walker_entry *i;
+ int ret = 0, ret2 = 0;
+ s64 count2;
+
+ for (i = w->d; i < w->d + w->nr; i++) {
+ if (i->inode.bi_sectors == i->count)
+ continue;