]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/fsck.c
Update bcachefs sources to bee7b5a4fa21 bcachefs: Pin btree cache in ram for random...
[bcachefs-tools-debian] / libbcachefs / fsck.c
index e4a8a14c46bc922983e91edcdc9ece6fe717d3eb..dfd54708d2a0212f4c7263654c849057886d2d2a 100644 (file)
@@ -252,7 +252,7 @@ create_lostfound:
                goto err;
 
        ret =   bch2_dirent_create_snapshot(trans,
-                               root_inode.bi_inum, snapshot, &root_hash_info,
+                               0, root_inode.bi_inum, snapshot, &root_hash_info,
                                mode_to_type(lostfound->bi_mode),
                                &lostfound_str,
                                lostfound->bi_inum,
@@ -275,9 +275,24 @@ static int reattach_inode(struct btree_trans *trans,
        char name_buf[20];
        struct qstr name;
        u64 dir_offset = 0;
+       u32 dirent_snapshot = inode_snapshot;
        int ret;
 
-       ret = lookup_lostfound(trans, inode_snapshot, &lostfound);
+       if (inode->bi_subvol) {
+               inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
+
+               u64 root_inum;
+               ret = subvol_lookup(trans, inode->bi_parent_subvol,
+                                   &dirent_snapshot, &root_inum);
+               if (ret)
+                       return ret;
+
+               snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol);
+       } else {
+               snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
+       }
+
+       ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
        if (ret)
                return ret;
 
@@ -291,14 +306,16 @@ static int reattach_inode(struct btree_trans *trans,
 
        dir_hash = bch2_hash_info_init(trans->c, &lostfound);
 
-       snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
        name = (struct qstr) QSTR(name_buf);
 
        ret = bch2_dirent_create_snapshot(trans,
-                               lostfound.bi_inum, inode_snapshot,
+                               inode->bi_parent_subvol, lostfound.bi_inum,
+                               dirent_snapshot,
                                &dir_hash,
                                inode_d_type(inode),
-                               &name, inode->bi_inum, &dir_offset,
+                               &name,
+                               inode->bi_subvol ?: inode->bi_inum,
+                               &dir_offset,
                                BCH_HASH_SET_MUST_CREATE);
        if (ret)
                return ret;
@@ -564,13 +581,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
 }
 
 static struct inode_walker_entry *
-lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
-                         u32 snapshot, bool is_whiteout)
+lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
 {
-       struct inode_walker_entry *i;
-
-       snapshot = bch2_snapshot_equiv(c, snapshot);
+       bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
+       u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
 
+       struct inode_walker_entry *i;
        __darray_for_each(w->inodes, i)
                if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
                        goto found;
@@ -581,20 +597,24 @@ found:
 
        if (snapshot != i->snapshot && !is_whiteout) {
                struct inode_walker_entry new = *i;
-               size_t pos;
-               int ret;
 
                new.snapshot = snapshot;
                new.count = 0;
 
-               bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
-                        w->last_pos.inode, snapshot, i->snapshot);
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, k);
+
+               bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
+                        "unexpected because we should always update the inode when we update a key in that inode\n"
+                        "%s",
+                        w->last_pos.inode, snapshot, i->snapshot, buf.buf);
+               printbuf_exit(&buf);
 
                while (i > w->inodes.data && i[-1].snapshot > snapshot)
                        --i;
 
-               pos = i - w->inodes.data;
-               ret = darray_insert_item(&w->inodes, pos, new);
+               size_t pos = i - w->inodes.data;
+               int ret = darray_insert_item(&w->inodes, pos, new);
                if (ret)
                        return ERR_PTR(ret);
 
@@ -605,21 +625,21 @@ found:
 }
 
 static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
-                                            struct inode_walker *w, struct bpos pos,
-                                            bool is_whiteout)
+                                            struct inode_walker *w,
+                                            struct bkey_s_c k)
 {
-       if (w->last_pos.inode != pos.inode) {
-               int ret = get_inodes_all_snapshots(trans, w, pos.inode);
+       if (w->last_pos.inode != k.k->p.inode) {
+               int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
                if (ret)
                        return ERR_PTR(ret);
-       } else if (bkey_cmp(w->last_pos, pos)) {
+       } else if (bkey_cmp(w->last_pos, k.k->p)) {
                darray_for_each(w->inodes, i)
                        i->seen_this_pos = false;
        }
 
-       w->last_pos = pos;
+       w->last_pos = k.k->p;
 
-       return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
+       return lookup_inode_for_snapshot(trans->c, w, k);
 }
 
 static int __get_visible_inodes(struct btree_trans *trans,
@@ -767,6 +787,43 @@ fsck_err:
        goto out;
 }
 
+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
+                                               struct btree_iter *iter,
+                                               struct bpos pos)
+{
+       return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
+}
+
+static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
+                                              struct btree_iter *iter,
+                                              struct bch_inode_unpacked *inode,
+                                              u32 *snapshot)
+{
+       if (inode->bi_subvol) {
+               u64 inum;
+               int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum);
+               if (ret)
+                       return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) });
+       }
+
+       return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
+}
+
+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
+                                  struct bkey_s_c_dirent d)
+{
+       return  inode->bi_dir           == d.k->p.inode &&
+               inode->bi_dir_offset    == d.k->p.offset;
+}
+
+static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
+                                  struct bch_inode_unpacked *inode)
+{
+       return d.v->d_type == DT_SUBVOL
+               ? le32_to_cpu(d.v->d_child_subvol)      == inode->bi_subvol
+               : le64_to_cpu(d.v->d_inum)              == inode->bi_inum;
+}
+
 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
 {
        struct btree_iter iter;
@@ -779,6 +836,49 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
        return k.k->type == KEY_TYPE_set;
 }
 
+static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k,
+                                   struct bch_inode_unpacked *inode,
+                                   u32 inode_snapshot, bool *write_inode)
+{
+       struct bch_fs *c = trans->c;
+       struct printbuf buf = PRINTBUF;
+
+       struct btree_iter dirent_iter = {};
+       struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
+       int ret = bkey_err(d);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(ret,
+                       c, inode_points_to_missing_dirent,
+                       "inode points to missing dirent\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) ||
+           fsck_err_on(!ret && !dirent_points_to_inode(d, inode),
+                       c, inode_points_to_wrong_dirent,
+                       "inode points to dirent that does not point back:\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, inode_k),
+                        prt_newline(&buf),
+                        bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+               /*
+                * We just clear the backpointer fields for now. If we find a
+                * dirent that points to this inode in check_dirents(), we'll
+                * update it then; then when we get to check_path() if the
+                * backpointer is still 0 we'll reattach it.
+                */
+               inode->bi_dir = 0;
+               inode->bi_dir_offset = 0;
+               inode->bi_flags &= ~BCH_INODE_backptr_untrusted;
+               *write_inode = true;
+       }
+
+       ret = 0;
+fsck_err:
+       bch2_trans_iter_exit(trans, &dirent_iter);
+       printbuf_exit(&buf);
+       bch_err_fn(c, ret);
+       return ret;
+}
+
 static int check_inode(struct btree_trans *trans,
                       struct btree_iter *iter,
                       struct bkey_s_c k,
@@ -923,6 +1023,22 @@ static int check_inode(struct btree_trans *trans,
                do_update = true;
        }
 
+       if (u.bi_dir || u.bi_dir_offset) {
+               ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update);
+               if (ret)
+                       goto err;
+       }
+
+       if (fsck_err_on(u.bi_parent_subvol &&
+                       (u.bi_subvol == 0 ||
+                        u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
+                       c, inode_bi_parent_nonzero,
+                       "inode %llu:%u has subvol %u but nonzero parent subvol %u",
+                       u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) {
+               u.bi_parent_subvol = 0;
+               do_update = true;
+       }
+
        if (u.bi_subvol) {
                struct bch_subvolume s;
 
@@ -980,28 +1096,6 @@ int bch2_check_inodes(struct bch_fs *c)
        return ret;
 }
 
-static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
-                                               struct btree_iter *iter,
-                                               struct bpos pos)
-{
-       return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
-}
-
-static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
-                                  struct bkey_s_c_dirent d)
-{
-       return  inode->bi_dir           == d.k->p.inode &&
-               inode->bi_dir_offset    == d.k->p.offset;
-}
-
-static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
-                                  struct bch_inode_unpacked *inode)
-{
-       return d.v->d_type == DT_SUBVOL
-               ? le32_to_cpu(d.v->d_child_subvol)      == inode->bi_subvol
-               : le64_to_cpu(d.v->d_inum)              == inode->bi_inum;
-}
-
 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
 {
        struct bch_fs *c = trans->c;
@@ -1310,7 +1404,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        goto err;
        }
 
-       i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, inode, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret)
                goto err;
@@ -1489,84 +1583,82 @@ fsck_err:
        return ret ?: trans_was_restarted(trans, restart_count);
 }
 
-static int check_inode_backpointer(struct btree_trans *trans,
+static int check_dirent_inode_dirent(struct btree_trans *trans,
                                   struct btree_iter *iter,
                                   struct bkey_s_c_dirent d,
                                   struct bch_inode_unpacked *target,
                                   u32 target_snapshot)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter bp_iter = { NULL };
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
+       if (inode_points_to_dirent(target, d))
+               return 0;
+
        if (!target->bi_dir &&
            !target->bi_dir_offset) {
                target->bi_dir          = d.k->p.inode;
                target->bi_dir_offset   = d.k->p.offset;
-
-               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
-               if (ret)
-                       goto err;
+               return __bch2_fsck_write_inode(trans, target, target_snapshot);
        }
 
-       if (!inode_points_to_dirent(target, d)) {
-               struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
-                                     SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
-               ret = bkey_err(bp_dirent);
-               if (ret && !bch2_err_matches(ret, ENOENT))
-                       goto err;
-
-               bool backpointer_exists = !ret;
-               ret = 0;
-
-               bch2_bkey_val_to_text(&buf, c, d.s_c);
-               prt_newline(&buf);
-               if (backpointer_exists)
-                       bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+       struct btree_iter bp_iter = { NULL };
+       struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
+                             SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
+       ret = bkey_err(bp_dirent);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               goto err;
 
-               if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
-                               c, inode_dir_multiple_links,
-                               "directory %llu:%u with multiple links\n%s",
-                               target->bi_inum, target_snapshot, buf.buf)) {
-                       ret = __remove_dirent(trans, d.k->p);
-                       goto out;
-               }
+       bool backpointer_exists = !ret;
+       ret = 0;
+
+       if (fsck_err_on(!backpointer_exists,
+                       c, inode_wrong_backpointer,
+                       "inode %llu:%u has wrong backpointer:\n"
+                       "got       %llu:%llu\n"
+                       "should be %llu:%llu",
+                       target->bi_inum, target_snapshot,
+                       target->bi_dir,
+                       target->bi_dir_offset,
+                       d.k->p.inode,
+                       d.k->p.offset)) {
+               target->bi_dir          = d.k->p.inode;
+               target->bi_dir_offset   = d.k->p.offset;
+               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+               goto out;
+       }
 
-               /*
-                * hardlinked file with nlink 0:
-                * We're just adjusting nlink here so check_nlinks() will pick
-                * it up, it ignores inodes with nlink 0
-                */
-               if (fsck_err_on(backpointer_exists && !target->bi_nlink,
-                               c, inode_multiple_links_but_nlink_0,
-                               "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
-                               target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
-                       target->bi_nlink++;
-                       target->bi_flags &= ~BCH_INODE_unlinked;
-
-                       ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
-                       if (ret)
-                               goto err;
-               }
+       bch2_bkey_val_to_text(&buf, c, d.s_c);
+       prt_newline(&buf);
+       if (backpointer_exists)
+               bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+
+       if (fsck_err_on(backpointer_exists &&
+                       (S_ISDIR(target->bi_mode) ||
+                        target->bi_subvol),
+                       c, inode_dir_multiple_links,
+                       "%s %llu:%u with multiple links\n%s",
+                       S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
+                       target->bi_inum, target_snapshot, buf.buf)) {
+               ret = __remove_dirent(trans, d.k->p);
+               goto out;
+       }
 
-               if (fsck_err_on(!backpointer_exists,
-                               c, inode_wrong_backpointer,
-                               "inode %llu:%u has wrong backpointer:\n"
-                               "got       %llu:%llu\n"
-                               "should be %llu:%llu",
-                               target->bi_inum, target_snapshot,
-                               target->bi_dir,
-                               target->bi_dir_offset,
-                               d.k->p.inode,
-                               d.k->p.offset)) {
-                       target->bi_dir          = d.k->p.inode;
-                       target->bi_dir_offset   = d.k->p.offset;
-
-                       ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
-                       if (ret)
-                               goto err;
-               }
+       /*
+        * hardlinked file with nlink 0:
+        * We're just adjusting nlink here so check_nlinks() will pick
+        * it up, it ignores inodes with nlink 0
+        */
+       if (fsck_err_on(backpointer_exists && !target->bi_nlink,
+                       c, inode_multiple_links_but_nlink_0,
+                       "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
+                       target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
+               target->bi_nlink++;
+               target->bi_flags &= ~BCH_INODE_unlinked;
+               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+               if (ret)
+                       goto err;
        }
 out:
 err:
@@ -1588,7 +1680,7 @@ static int check_dirent_target(struct btree_trans *trans,
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       ret = check_inode_backpointer(trans, iter, d, target, target_snapshot);
+       ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot);
        if (ret)
                goto err;
 
@@ -1606,27 +1698,12 @@ static int check_dirent_target(struct btree_trans *trans,
 
                bkey_reassemble(&n->k_i, d.s_c);
                n->v.d_type = inode_d_type(target);
-
-               ret = bch2_trans_update(trans, iter, &n->k_i, 0);
-               if (ret)
-                       goto err;
-
-               d = dirent_i_to_s_c(n);
-       }
-
-       if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
-                       target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
-                       c, dirent_d_parent_subvol_wrong,
-                       "dirent has wrong d_parent_subvol field: got %u, should be %u",
-                       le32_to_cpu(d.v->d_parent_subvol),
-                       target->bi_parent_subvol)) {
-               n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
-               ret = PTR_ERR_OR_ZERO(n);
-               if (ret)
-                       goto err;
-
-               bkey_reassemble(&n->k_i, d.s_c);
-               n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
+               if (n->v.d_type == DT_SUBVOL) {
+                       n->v.d_parent_subvol = target->bi_parent_subvol;
+                       n->v.d_child_subvol = target->bi_subvol;
+               } else {
+                       n->v.d_inum = target->bi_inum;
+               }
 
                ret = bch2_trans_update(trans, iter, &n->k_i, 0);
                if (ret)
@@ -1641,45 +1718,113 @@ fsck_err:
        return ret;
 }
 
-static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *iter,
-                              struct bkey_s_c_dirent d)
+/* find a subvolume that's a descendent of @snapshot: */
+static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) {
+               if (k.k->type != KEY_TYPE_subvolume)
+                       continue;
+
+               struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+               if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) {
+                       bch2_trans_iter_exit(trans, &iter);
+                       *subvolid = k.k->p.offset;
+                       goto found;
+               }
+       }
+       if (!ret)
+               ret = -ENOENT;
+found:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
+                                 struct bkey_s_c_dirent d)
 {
        struct bch_fs *c = trans->c;
+       struct btree_iter subvol_iter = {};
        struct bch_inode_unpacked subvol_root;
+       u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
        u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
-       u32 target_snapshot;
-       u64 target_inum;
+       u32 parent_snapshot;
+       u64 parent_inum;
+       struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       ret = subvol_lookup(trans, target_subvol,
-                             &target_snapshot, &target_inum);
+       ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
        if (ret && !bch2_err_matches(ret, ENOENT))
                return ret;
 
-       if (fsck_err_on(ret, c, dirent_to_missing_subvol,
-                       "dirent points to missing subvolume %u",
-                       le32_to_cpu(d.v->d_child_subvol)))
-               return __remove_dirent(trans, d.k->p);
+       if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
+                       "dirent parent_subvol points to missing subvolume\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
+           fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot),
+                       c, dirent_not_visible_in_parent_subvol,
+                       "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
+                       parent_snapshot,
+                       (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+               u32 new_parent_subvol;
+               ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
+               if (ret)
+                       goto err;
 
-       ret = lookup_inode(trans, target_inum,
-                          &subvol_root, &target_snapshot);
+               struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
+               ret = PTR_ERR_OR_ZERO(new_dirent);
+               if (ret)
+                       goto err;
+
+               new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol);
+       }
+
+       struct bkey_s_c_subvolume s =
+               bch2_bkey_get_iter_typed(trans, &subvol_iter,
+                                        BTREE_ID_subvolumes, POS(0, target_subvol),
+                                        0, subvolume);
+       ret = bkey_err(s.s_c);
        if (ret && !bch2_err_matches(ret, ENOENT))
                return ret;
 
-       if (fsck_err_on(ret, c, subvol_to_missing_root,
-                       "subvolume %u points to missing subvolume root %llu",
-                       target_subvol,
-                       target_inum)) {
-               bch_err(c, "repair not implemented yet");
-               return -EINVAL;
+       if (ret) {
+               if (fsck_err(c, dirent_to_missing_subvol,
+                            "dirent points to missing subvolume\n%s",
+                            (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
+                       return __remove_dirent(trans, d.k->p);
+               ret = 0;
+               goto out;
        }
 
-       if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
-                       c, subvol_root_wrong_bi_subvol,
-                       "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
+       if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
+                       c, subvol_fs_path_parent_wrong,
+                       "subvol with wrong fs_path_parent, should be be %u\n%s",
+                       parent_subvol,
+                       (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
+               struct bkey_i_subvolume *n =
+                       bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
+               ret = PTR_ERR_OR_ZERO(n);
+               if (ret)
+                       goto err;
+
+               n->v.fs_path_parent = le32_to_cpu(parent_subvol);
+       }
+
+       u64 target_inum = le64_to_cpu(s.v->inode);
+       u32 target_snapshot = le32_to_cpu(s.v->snapshot);
+
+       ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+                       c, inode_bi_parent_wrong,
+                       "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
                        target_inum,
-                       subvol_root.bi_subvol, target_subvol)) {
-               subvol_root.bi_subvol = target_subvol;
+                       subvol_root.bi_parent_subvol, parent_subvol)) {
+               subvol_root.bi_parent_subvol = parent_subvol;
                ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
                if (ret)
                        return ret;
@@ -1689,7 +1834,11 @@ static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *ite
                                  target_snapshot);
        if (ret)
                return ret;
+out:
+err:
 fsck_err:
+       bch2_trans_iter_exit(trans, &subvol_iter);
+       printbuf_exit(&buf);
        return ret;
 }
 
@@ -1731,7 +1880,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 
        BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
 
-       i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, dir, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret < 0)
                goto err;
@@ -1777,7 +1926,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
        d = bkey_s_c_to_dirent(k);
 
        if (d.v->d_type == DT_SUBVOL) {
-               ret = check_subvol_dirent(trans, iter, d);
+               ret = check_dirent_to_subvol(trans, iter, d);
                if (ret)
                        goto err;
        } else {
@@ -1858,7 +2007,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
        if (ret)
                return ret;
 
-       i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, inode, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret)
                return ret;
@@ -1997,62 +2146,52 @@ static int path_down(struct bch_fs *c, pathbuf *p,
  *
  * XXX: we should also be verifying that inodes are in the right subvolumes
  */
-static int check_path(struct btree_trans *trans,
-                     pathbuf *p,
-                     struct bch_inode_unpacked *inode,
-                     u32 snapshot)
+static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
 {
        struct bch_fs *c = trans->c;
+       struct btree_iter inode_iter = {};
+       struct bch_inode_unpacked inode;
+       struct printbuf buf = PRINTBUF;
+       u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot);
        int ret = 0;
 
-       snapshot = bch2_snapshot_equiv(c, snapshot);
        p->nr = 0;
 
-       while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
-                inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+       BUG_ON(bch2_inode_unpack(inode_k, &inode));
+
+       while (!(inode.bi_inum == BCACHEFS_ROOT_INO &&
+                inode.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
                struct btree_iter dirent_iter;
                struct bkey_s_c_dirent d;
                u32 parent_snapshot = snapshot;
 
-               if (inode->bi_subvol) {
-                       u64 inum;
-
-                       ret = subvol_lookup(trans, inode->bi_parent_subvol,
-                                           &parent_snapshot, &inum);
-                       if (ret)
-                               break;
-               }
-
-               d = dirent_get_by_pos(trans, &dirent_iter,
-                                     SPOS(inode->bi_dir, inode->bi_dir_offset,
-                                          parent_snapshot));
+               d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
                ret = bkey_err(d.s_c);
                if (ret && !bch2_err_matches(ret, ENOENT))
                        break;
 
-               if (!ret && !dirent_points_to_inode(d, inode)) {
+               if (!ret && !dirent_points_to_inode(d, &inode)) {
                        bch2_trans_iter_exit(trans, &dirent_iter);
                        ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
                }
 
                if (bch2_err_matches(ret, ENOENT)) {
-                       if (fsck_err(c,  inode_unreachable,
-                                    "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
-                                    inode->bi_inum, snapshot,
-                                    bch2_d_type_str(inode_d_type(inode)),
-                                    inode->bi_nlink,
-                                    inode->bi_dir,
-                                    inode->bi_dir_offset))
-                               ret = reattach_inode(trans, inode, snapshot);
-                       break;
+                       ret = 0;
+                       if (fsck_err(c, inode_unreachable,
+                                    "unreachable inode\n%s",
+                                    (printbuf_reset(&buf),
+                                     bch2_bkey_val_to_text(&buf, c, inode_k),
+                                     buf.buf)))
+                               ret = reattach_inode(trans, &inode, snapshot);
+                       goto out;
                }
 
                bch2_trans_iter_exit(trans, &dirent_iter);
 
-               if (!S_ISDIR(inode->bi_mode))
+               if (!S_ISDIR(inode.bi_mode))
                        break;
 
-               ret = path_down(c, p, inode->bi_inum, snapshot);
+               ret = path_down(c, p, inode.bi_inum, snapshot);
                if (ret) {
                        bch_err(c, "memory allocation failure");
                        return ret;
@@ -2060,7 +2199,12 @@ static int check_path(struct btree_trans *trans,
 
                snapshot = parent_snapshot;
 
-               ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
+               bch2_trans_iter_exit(trans, &inode_iter);
+               inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
+                                            SPOS(0, inode.bi_dir, snapshot), 0);
+               ret = bkey_err(inode_k) ?:
+                       !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
+                       : bch2_inode_unpack(inode_k, &inode);
                if (ret) {
                        /* Should have been caught in dirents pass */
                        if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -2068,30 +2212,35 @@ static int check_path(struct btree_trans *trans,
                        break;
                }
 
-               if (path_is_dup(p, inode->bi_inum, snapshot)) {
+               snapshot = inode_k.k->p.snapshot;
+
+               if (path_is_dup(p, inode.bi_inum, snapshot)) {
                        /* XXX print path */
                        bch_err(c, "directory structure loop");
 
                        darray_for_each(*p, i)
                                pr_err("%llu:%u", i->inum, i->snapshot);
-                       pr_err("%llu:%u", inode->bi_inum, snapshot);
+                       pr_err("%llu:%u", inode.bi_inum, snapshot);
 
                        if (!fsck_err(c, dir_loop, "directory structure loop"))
                                return 0;
 
-                       ret = remove_backpointer(trans, inode);
+                       ret = remove_backpointer(trans, &inode);
                        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                bch_err_msg(c, ret, "removing dirent");
                        if (ret)
                                break;
 
-                       ret = reattach_inode(trans, inode, snapshot);
+                       ret = reattach_inode(trans, &inode, snapshot);
                        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-                               bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum);
+                               bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
                        break;
                }
        }
+out:
 fsck_err:
+       bch2_trans_iter_exit(trans, &inode_iter);
+       printbuf_exit(&buf);
        bch_err_fn(c, ret);
        return ret;
 }
@@ -2103,7 +2252,6 @@ fsck_err:
  */
 int bch2_check_directory_structure(struct bch_fs *c)
 {
-       struct bch_inode_unpacked u;
        pathbuf path = { 0, };
        int ret;
 
@@ -2116,12 +2264,10 @@ int bch2_check_directory_structure(struct bch_fs *c)
                        if (!bkey_is_inode(k.k))
                                continue;
 
-                       BUG_ON(bch2_inode_unpack(k, &u));
-
-                       if (u.bi_flags & BCH_INODE_unlinked)
+                       if (bch2_inode_flags(k) & BCH_INODE_unlinked)
                                continue;
 
-                       check_path(trans, &path, &u, iter.pos.snapshot);
+                       check_path(trans, &path, k);
                })));
        darray_exit(&path);