]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/inode.c
Update bcachefs sources to d1fd471830 bcachefs: Add more debug checks
[bcachefs-tools-debian] / libbcachefs / inode.c
index 05f617aeaea77f5c0acca74cb4565bcc415bf543..823a1ddec5aca57983b123fc13df6be2ba940aaa 100644 (file)
@@ -1,18 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "btree_key_cache.h"
 #include "bkey_methods.h"
 #include "btree_update.h"
 #include "error.h"
 #include "extents.h"
 #include "inode.h"
-#include "io.h"
-#include "keylist.h"
+#include "str_hash.h"
+#include "varint.h"
 
 #include <linux/random.h>
 
 #include <asm/unaligned.h>
 
-#define FIELD_BYTES()                                          \
+const char * const bch2_inode_opts[] = {
+#define x(name, ...)   #name,
+       BCH_INODE_OPTS()
+#undef  x
+       NULL,
+};
 
 static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
 static const u8 bits_table[8] = {
@@ -83,21 +90,17 @@ static int inode_decode_field(const u8 *in, const u8 *end,
        return bytes;
 }
 
-void bch2_inode_pack(struct bkey_inode_buf *packed,
-                    const struct bch_inode_unpacked *inode)
+static noinline void bch2_inode_pack_v1(struct bkey_inode_buf *packed,
+                                       const struct bch_inode_unpacked *inode)
 {
-       u8 *out = packed->inode.v.fields;
+       struct bkey_i_inode *k = &packed->inode;
+       u8 *out = k->v.fields;
        u8 *end = (void *) &packed[1];
        u8 *last_nonzero_field = out;
        unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
+       unsigned bytes;
 
-       bkey_inode_init(&packed->inode.k_i);
-       packed->inode.k.p.inode         = inode->bi_inum;
-       packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
-       packed->inode.v.bi_flags        = cpu_to_le32(inode->bi_flags);
-       packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
-
-#define BCH_INODE_FIELD(_name, _bits)                                  \
+#define x(_name, _bits)                                                        \
        out += inode_encode_field(out, end, 0, inode->_name);           \
        nr_fields++;                                                    \
                                                                        \
@@ -107,17 +110,78 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
        }
 
        BCH_INODE_FIELDS()
-#undef  BCH_INODE_FIELD
+#undef  x
 
        out = last_nonzero_field;
        nr_fields = last_nonzero_fieldnr;
 
-       set_bkey_val_bytes(&packed->inode.k, out - (u8 *) &packed->inode.v);
-       memset(out, 0,
-              (u8 *) &packed->inode.v +
-              bkey_val_bytes(&packed->inode.k) - out);
+       bytes = out - (u8 *) &packed->inode.v;
+       set_bkey_val_bytes(&packed->inode.k, bytes);
+       memset_u64s_tail(&packed->inode.v, 0, bytes);
+
+       SET_INODE_NR_FIELDS(&k->v, nr_fields);
+}
 
-       SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
+static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
+                              const struct bch_inode_unpacked *inode)
+{
+       struct bkey_i_inode *k = &packed->inode;
+       u8 *out = k->v.fields;
+       u8 *end = (void *) &packed[1];
+       u8 *last_nonzero_field = out;
+       unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
+       unsigned bytes;
+       int ret;
+
+#define x(_name, _bits)                                                        \
+       nr_fields++;                                                    \
+                                                                       \
+       if (inode->_name) {                                             \
+               ret = bch2_varint_encode(out, inode->_name);            \
+               out += ret;                                             \
+                                                                       \
+               if (_bits > 64)                                         \
+                       *out++ = 0;                                     \
+                                                                       \
+               last_nonzero_field = out;                               \
+               last_nonzero_fieldnr = nr_fields;                       \
+       } else {                                                        \
+               *out++ = 0;                                             \
+                                                                       \
+               if (_bits > 64)                                         \
+                       *out++ = 0;                                     \
+       }
+
+       BCH_INODE_FIELDS()
+#undef  x
+       BUG_ON(out > end);
+
+       out = last_nonzero_field;
+       nr_fields = last_nonzero_fieldnr;
+
+       bytes = out - (u8 *) &packed->inode.v;
+       set_bkey_val_bytes(&packed->inode.k, bytes);
+       memset_u64s_tail(&packed->inode.v, 0, bytes);
+
+       SET_INODE_NR_FIELDS(&k->v, nr_fields);
+}
+
+void bch2_inode_pack(struct bch_fs *c,
+                    struct bkey_inode_buf *packed,
+                    const struct bch_inode_unpacked *inode)
+{
+       bkey_inode_init(&packed->inode.k_i);
+       packed->inode.k.p.offset        = inode->bi_inum;
+       packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
+       packed->inode.v.bi_flags        = cpu_to_le32(inode->bi_flags);
+       packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
+
+       if (c->sb.features & (1ULL << BCH_FEATURE_new_varint)) {
+               SET_INODE_NEW_VARINT(&packed->inode.v, true);
+               bch2_inode_pack_v2(packed, inode);
+       } else {
+               bch2_inode_pack_v1(packed, inode);
+       }
 
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
                struct bch_inode_unpacked unpacked;
@@ -129,27 +193,24 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
                BUG_ON(unpacked.bi_hash_seed    != inode->bi_hash_seed);
                BUG_ON(unpacked.bi_mode         != inode->bi_mode);
 
-#define BCH_INODE_FIELD(_name, _bits)  BUG_ON(unpacked._name != inode->_name);
+#define x(_name, _bits)        if (unpacked._name != inode->_name)             \
+                       panic("unpacked %llu should be %llu",           \
+                             (u64) unpacked._name, (u64) inode->_name);
                BCH_INODE_FIELDS()
-#undef  BCH_INODE_FIELD
+#undef  x
        }
 }
 
-int bch2_inode_unpack(struct bkey_s_c_inode inode,
-                     struct bch_inode_unpacked *unpacked)
+static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
+                               struct bch_inode_unpacked *unpacked)
 {
        const u8 *in = inode.v->fields;
-       const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
+       const u8 *end = bkey_val_end(inode);
        u64 field[2];
        unsigned fieldnr = 0, field_bits;
        int ret;
 
-       unpacked->bi_inum       = inode.k->p.inode;
-       unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
-       unpacked->bi_flags      = le32_to_cpu(inode.v->bi_flags);
-       unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
-
-#define BCH_INODE_FIELD(_name, _bits)                                  \
+#define x(_name, _bits)                                        \
        if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {                    \
                memset(&unpacked->_name, 0,                             \
                       sizeof(*unpacked) -                              \
@@ -168,91 +229,206 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
        in += ret;
 
        BCH_INODE_FIELDS()
-#undef  BCH_INODE_FIELD
+#undef  x
 
        /* XXX: signal if there were more fields than expected? */
+       return 0;
+}
+
+static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode,
+                               struct bch_inode_unpacked *unpacked)
+{
+       const u8 *in = inode.v->fields;
+       const u8 *end = bkey_val_end(inode);
+       unsigned fieldnr = 0;
+       int ret;
+       u64 v[2];
+
+#define x(_name, _bits)                                                        \
+       if (fieldnr < INODE_NR_FIELDS(inode.v)) {                       \
+               ret = bch2_varint_decode(in, end, &v[0]);               \
+               if (ret < 0)                                            \
+                       return ret;                                     \
+               in += ret;                                              \
+                                                                       \
+               if (_bits > 64) {                                       \
+                       ret = bch2_varint_decode(in, end, &v[1]);       \
+                       if (ret < 0)                                    \
+                               return ret;                             \
+                       in += ret;                                      \
+               } else {                                                \
+                       v[1] = 0;                                       \
+               }                                                       \
+       } else {                                                        \
+               v[0] = v[1] = 0;                                        \
+       }                                                               \
+                                                                       \
+       unpacked->_name = v[0];                                         \
+       if (v[1] || v[0] != unpacked->_name)                            \
+               return -1;                                              \
+       fieldnr++;
 
+       BCH_INODE_FIELDS()
+#undef  x
+
+       /* XXX: signal if there were more fields than expected? */
        return 0;
 }
 
-static const char *bch2_inode_invalid(const struct bch_fs *c,
-                                     struct bkey_s_c k)
+int bch2_inode_unpack(struct bkey_s_c_inode inode,
+                     struct bch_inode_unpacked *unpacked)
 {
-       if (k.k->p.offset)
-               return "nonzero offset";
+       unpacked->bi_inum       = inode.k->p.offset;
+       unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
+       unpacked->bi_flags      = le32_to_cpu(inode.v->bi_flags);
+       unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
 
-       switch (k.k->type) {
-       case BCH_INODE_FS: {
+       if (INODE_NEW_VARINT(inode.v)) {
+               return bch2_inode_unpack_v2(inode, unpacked);
+       } else {
+               return bch2_inode_unpack_v1(inode, unpacked);
+       }
+
+       return 0;
+}
+
+struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
+                                  struct bch_inode_unpacked *inode,
+                                  u64 inum, unsigned flags)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
+                                  BTREE_ITER_CACHED|flags);
+       if (IS_ERR(iter))
+               return iter;
+
+       k = bch2_btree_iter_peek_cached(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
+       if (ret)
+               goto err;
+
+       ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
+       if (ret)
+               goto err;
+
+       return iter;
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ERR_PTR(ret);
+}
+
+int bch2_inode_write(struct btree_trans *trans,
+                    struct btree_iter *iter,
+                    struct bch_inode_unpacked *inode)
+{
+       struct bkey_inode_buf *inode_p;
+
+       inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+       if (IS_ERR(inode_p))
+               return PTR_ERR(inode_p);
+
+       bch2_inode_pack(trans->c, inode_p, inode);
+       bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
+       return 0;
+}
+
+const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
                struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
                struct bch_inode_unpacked unpacked;
 
-               if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
-                       return "incorrect value size";
+       if (k.k->p.inode)
+               return "nonzero k.p.inode";
 
-               if (k.k->p.inode < BLOCKDEV_INODE_MAX)
-                       return "fs inode in blockdev range";
+       if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
+               return "incorrect value size";
 
-               if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
-                       return "invalid str hash type";
+       if (k.k->p.offset < BLOCKDEV_INODE_MAX)
+               return "fs inode in blockdev range";
 
-               if (bch2_inode_unpack(inode, &unpacked))
-                       return "invalid variable length fields";
+       if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
+               return "invalid str hash type";
 
-               return NULL;
-       }
-       case BCH_INODE_BLOCKDEV:
-               if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev))
-                       return "incorrect value size";
+       if (bch2_inode_unpack(inode, &unpacked))
+               return "invalid variable length fields";
 
-               if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
-                       return "blockdev inode in fs range";
+       if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
+               return "invalid data checksum type";
 
-               return NULL;
-       case BCH_INODE_GENERATION:
-               if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
-                       return "incorrect value size";
+       if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
+               return "invalid data checksum type";
 
-               return NULL;
-       default:
-               return "invalid type";
-       }
+       if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
+           unpacked.bi_nlink != 0)
+               return "flagged as unlinked but bi_nlink != 0";
+
+       return NULL;
 }
 
-static void bch2_inode_to_text(struct bch_fs *c, char *buf,
-                              size_t size, struct bkey_s_c k)
+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
+                      struct bkey_s_c k)
 {
-       struct bkey_s_c_inode inode;
+       struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
        struct bch_inode_unpacked unpacked;
 
-       switch (k.k->type) {
-       case BCH_INODE_FS:
-               inode = bkey_s_c_to_inode(k);
-               if (bch2_inode_unpack(inode, &unpacked)) {
-                       scnprintf(buf, size, "(unpack error)");
-                       break;
-               }
-
-               scnprintf(buf, size, "i_size %llu", unpacked.bi_size);
-               break;
+       if (bch2_inode_unpack(inode, &unpacked)) {
+               pr_buf(out, "(unpack error)");
+               return;
        }
+
+       pr_buf(out, "mode: %o ", unpacked.bi_mode);
+
+#define x(_name, _bits)                                                \
+       pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
+       BCH_INODE_FIELDS()
+#undef  x
 }
 
-const struct bkey_ops bch2_bkey_inode_ops = {
-       .key_invalid    = bch2_inode_invalid,
-       .val_to_text    = bch2_inode_to_text,
-};
+const char *bch2_inode_generation_invalid(const struct bch_fs *c,
+                                         struct bkey_s_c k)
+{
+       if (k.k->p.inode)
+               return "nonzero k.p.inode";
 
-void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
-                    uid_t uid, gid_t gid, umode_t mode, dev_t rdev)
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
+                                  struct bkey_s_c k)
 {
-       s64 now = timespec_to_bch2_time(c, CURRENT_TIME);
+       struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
+
+       pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
+}
+
+void bch2_inode_init_early(struct bch_fs *c,
+                          struct bch_inode_unpacked *inode_u)
+{
+       enum bch_str_hash_type str_hash =
+               bch2_str_hash_opt_to_type(c, c->opts.str_hash);
 
        memset(inode_u, 0, sizeof(*inode_u));
 
        /* ick */
-       inode_u->bi_flags |= c->opts.str_hash << INODE_STR_HASH_OFFSET;
-       get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed));
+       inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
+       get_random_bytes(&inode_u->bi_hash_seed,
+                        sizeof(inode_u->bi_hash_seed));
+}
 
+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
+                         uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+                         struct bch_inode_unpacked *parent)
+{
        inode_u->bi_mode        = mode;
        inode_u->bi_uid         = uid;
        inode_u->bi_gid         = gid;
@@ -261,125 +437,122 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
        inode_u->bi_mtime       = now;
        inode_u->bi_ctime       = now;
        inode_u->bi_otime       = now;
+
+       if (parent && parent->bi_mode & S_ISGID) {
+               inode_u->bi_gid = parent->bi_gid;
+               if (S_ISDIR(mode))
+                       inode_u->bi_mode |= S_ISGID;
+       }
+
+       if (parent) {
+#define x(_name, ...)  inode_u->bi_##_name = parent->bi_##_name;
+               BCH_INODE_OPTS()
+#undef x
+       }
 }
 
-int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
-                     u64 min, u64 max, u64 *hint)
+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
+                    uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+                    struct bch_inode_unpacked *parent)
 {
-       struct bkey_inode_buf inode_p;
-       struct btree_iter iter;
-       bool searched_from_start = false;
-       int ret;
-
-       if (!max)
-               max = ULLONG_MAX;
+       bch2_inode_init_early(c, inode_u);
+       bch2_inode_init_late(inode_u, bch2_current_time(c),
+                            uid, gid, mode, rdev, parent);
+}
 
-       if (c->opts.inodes_32bit)
-               max = min_t(u64, max, U32_MAX);
+static inline u32 bkey_generation(struct bkey_s_c k)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_inode:
+               BUG();
+       case KEY_TYPE_inode_generation:
+               return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
+       default:
+               return 0;
+       }
+}
 
-       if (*hint >= max || *hint < min)
-               *hint = min;
+int bch2_inode_create(struct btree_trans *trans,
+                     struct bch_inode_unpacked *inode_u)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_inode_buf *inode_p;
+       struct btree_iter *iter = NULL;
+       struct bkey_s_c k;
+       u64 min, max, start, *hint;
+       int ret;
 
-       if (*hint == min)
-               searched_from_start = true;
-again:
-       bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(*hint, 0),
-                            BTREE_ITER_INTENT);
-
-       while (1) {
-               struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
-               u32 bi_generation = 0;
-
-               ret = btree_iter_err(k);
-               if (ret) {
-                       bch2_btree_iter_unlock(&iter);
-                       return ret;
-               }
-
-               switch (k.k->type) {
-               case BCH_INODE_BLOCKDEV:
-               case BCH_INODE_FS:
-                       /* slot used */
-                       if (iter.pos.inode == max)
-                               goto out;
-
-                       bch2_btree_iter_advance_pos(&iter);
-                       break;
+       unsigned cpu = raw_smp_processor_id();
+       unsigned bits = (c->opts.inodes_32bit
+               ? 31 : 63) - c->inode_shard_bits;
 
-               case BCH_INODE_GENERATION: {
-                       struct bkey_s_c_inode_generation g =
-                               bkey_s_c_to_inode_generation(k);
-                       bi_generation = le32_to_cpu(g.v->bi_generation);
-                       /* fallthrough: */
-               }
-               default:
-                       inode_u->bi_generation = bi_generation;
+       min = (cpu << bits);
+       max = (cpu << bits) | ~(ULLONG_MAX << bits);
 
-                       bch2_inode_pack(&inode_p, inode_u);
-                       inode_p.inode.k.p = k.k->p;
+       min = max_t(u64, min, BLOCKDEV_INODE_MAX);
+       hint = c->unused_inode_hints + cpu;
 
-                       ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
-                                       BTREE_INSERT_ATOMIC,
-                                       BTREE_INSERT_ENTRY(&iter,
-                                                          &inode_p.inode.k_i));
+       start = READ_ONCE(*hint);
 
-                       if (ret != -EINTR) {
-                               bch2_btree_iter_unlock(&iter);
+       if (start >= max || start < min)
+               start = min;
 
-                               if (!ret) {
-                                       inode_u->bi_inum =
-                                               inode_p.inode.k.p.inode;
-                                       *hint = inode_p.inode.k.p.inode + 1;
-                               }
+       inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+       if (IS_ERR(inode_p))
+               return PTR_ERR(inode_p);
+again:
+       for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+               if (bkey_cmp(iter->pos, POS(0, max)) > 0)
+                       break;
 
-                               return ret;
-                       }
+               /*
+                * There's a potential cache coherency issue with the btree key
+                * cache code here - we're iterating over the btree, skipping
+                * that cache. We should never see an empty slot that isn't
+                * actually empty due to a pending update in the key cache
+                * because the update that creates the inode isn't done with a
+                * cached iterator, but - better safe than sorry, check the
+                * cache before using a slot:
+                */
+               if (k.k->type != KEY_TYPE_inode &&
+                   !bch2_btree_key_cache_find(c, BTREE_ID_INODES, iter->pos))
+                       goto found_slot;
+       }
 
-                       if (ret == -EINTR)
-                               continue;
+       bch2_trans_iter_put(trans, iter);
 
-               }
-       }
-out:
-       bch2_btree_iter_unlock(&iter);
+       if (ret)
+               return ret;
 
-       if (!searched_from_start) {
+       if (start != min) {
                /* Retry from start */
-               *hint = min;
-               searched_from_start = true;
+               start = min;
                goto again;
        }
 
        return -ENOSPC;
-}
+found_slot:
+       *hint                   = k.k->p.offset;
+       inode_u->bi_inum        = k.k->p.offset;
+       inode_u->bi_generation  = bkey_generation(k);
 
-int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size,
-                       struct extent_insert_hook *hook, u64 *journal_seq)
-{
-       return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-                                      POS(inode_nr, new_size),
-                                      POS(inode_nr + 1, 0),
-                                      ZERO_VERSION, NULL, hook,
-                                      journal_seq);
+       ret = bch2_inode_write(trans, iter, inode_u);
+       bch2_trans_iter_put(trans, iter);
+       return ret;
 }
 
 int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_i_inode_generation delete;
+       struct bpos start = POS(inode_nr, 0);
+       struct bpos end = POS(inode_nr + 1, 0);
+       struct bkey_s_c k;
+       u64 bi_generation;
        int ret;
 
-       ret = bch2_inode_truncate(c, inode_nr, 0, NULL, NULL);
-       if (ret < 0)
-               return ret;
-
-       ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-                                    POS(inode_nr, 0),
-                                    POS(inode_nr + 1, 0),
-                                    ZERO_VERSION, NULL, NULL, NULL);
-       if (ret < 0)
-               return ret;
-
        /*
         * If this was a directory, there shouldn't be any real dirents left -
         * but there could be whiteouts (from hash collisions) that we should
@@ -388,147 +561,98 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
         * XXX: the dirent could ideally would delete whiteouts when they're no
         * longer needed
         */
-       ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
-                                    POS(inode_nr, 0),
-                                    POS(inode_nr + 1, 0),
-                                    ZERO_VERSION, NULL, NULL, NULL);
-       if (ret < 0)
+       ret   = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
+                                       start, end, NULL) ?:
+               bch2_btree_delete_range(c, BTREE_ID_XATTRS,
+                                       start, end, NULL) ?:
+               bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
+                                       start, end, NULL);
+       if (ret)
                return ret;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inode_nr, 0),
-                            BTREE_ITER_INTENT);
-       do {
-               struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
-               u32 bi_generation = 0;
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
 
-               ret = btree_iter_err(k);
-               if (ret) {
-                       bch2_btree_iter_unlock(&iter);
-                       return ret;
-               }
+       bi_generation = 0;
 
-               bch2_fs_inconsistent_on(k.k->type != BCH_INODE_FS, c,
-                                       "inode %llu not found when deleting",
-                                       inode_nr);
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
+                                  BTREE_ITER_CACHED|BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek_cached(iter);
 
-               switch (k.k->type) {
-               case BCH_INODE_FS: {
-                       struct bch_inode_unpacked inode_u;
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
 
-                       if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
-                               bi_generation = cpu_to_le32(inode_u.bi_generation) + 1;
-                       break;
-               }
-               case BCH_INODE_GENERATION: {
-                       struct bkey_s_c_inode_generation g =
-                               bkey_s_c_to_inode_generation(k);
-                       bi_generation = le32_to_cpu(g.v->bi_generation);
-                       break;
-               }
-               }
-
-               if (!bi_generation) {
-                       bkey_init(&delete.k);
-                       delete.k.p.inode = inode_nr;
-               } else {
-                       bkey_inode_generation_init(&delete.k_i);
-                       delete.k.p.inode = inode_nr;
-                       delete.v.bi_generation = cpu_to_le32(bi_generation);
-               }
-
-               ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
-                               BTREE_INSERT_ATOMIC|
-                               BTREE_INSERT_NOFAIL,
-                               BTREE_INSERT_ENTRY(&iter, &delete.k_i));
-       } while (ret == -EINTR);
-
-       bch2_btree_iter_unlock(&iter);
-       return ret;
-}
+       bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
+                               "inode %llu not found when deleting",
+                               inode_nr);
 
-int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
-                           struct bch_inode_unpacked *inode)
-{
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       int ret = -ENOENT;
-
-       for_each_btree_key(&iter, c, BTREE_ID_INODES,
-                          POS(inode_nr, 0),
-                          BTREE_ITER_WITH_HOLES, k) {
-               switch (k.k->type) {
-               case BCH_INODE_FS:
-                       ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
-                       break;
-               default:
-                       /* hole, not found */
-                       break;
-               }
+       switch (k.k->type) {
+       case KEY_TYPE_inode: {
+               struct bch_inode_unpacked inode_u;
 
+               if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
+                       bi_generation = inode_u.bi_generation + 1;
                break;
+       }
+       case KEY_TYPE_inode_generation: {
+               struct bkey_s_c_inode_generation g =
+                       bkey_s_c_to_inode_generation(k);
+               bi_generation = le32_to_cpu(g.v->bi_generation);
+               break;
+       }
+       }
 
+       if (!bi_generation) {
+               bkey_init(&delete.k);
+               delete.k.p.offset = inode_nr;
+       } else {
+               bkey_inode_generation_init(&delete.k_i);
+               delete.k.p.offset = inode_nr;
+               delete.v.bi_generation = cpu_to_le32(bi_generation);
        }
 
-       return bch2_btree_iter_unlock(&iter) ?: ret;
+       bch2_trans_update(&trans, iter, &delete.k_i, 0);
+
+       ret = bch2_trans_commit(&trans, NULL, NULL,
+                               BTREE_INSERT_NOFAIL);
+err:
+       if (ret == -EINTR)
+               goto retry;
+
+       bch2_trans_exit(&trans);
+       return ret;
 }
 
-int bch2_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid,
-                                      struct bkey_i_inode_blockdev *ret)
+int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
+                                 struct bch_inode_unpacked *inode)
 {
-       struct btree_iter iter;
+       struct btree_iter *iter;
        struct bkey_s_c k;
+       int ret;
 
-       for_each_btree_key(&iter, c, BTREE_ID_INODES, POS(0, 0), 0, k) {
-               if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
-                       break;
-
-               if (k.k->type == BCH_INODE_BLOCKDEV) {
-                       struct bkey_s_c_inode_blockdev inode =
-                               bkey_s_c_to_inode_blockdev(k);
-
-                       pr_debug("found inode %llu: %pU (u64s %u)",
-                                inode.k->p.inode, inode.v->i_uuid.b,
-                                inode.k->u64s);
-
-                       if (CACHED_DEV(inode.v) &&
-                           !memcmp(uuid, &inode.v->i_uuid, 16)) {
-                               bkey_reassemble(&ret->k_i, k);
-                               bch2_btree_iter_unlock(&iter);
-                               return 0;
-                       }
-               }
-
-               bch2_btree_iter_cond_resched(&iter);
-       }
-       bch2_btree_iter_unlock(&iter);
-       return -ENOENT;
+       iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
+                       POS(0, inode_nr), BTREE_ITER_CACHED);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       k = bch2_btree_iter_peek_cached(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       ret = k.k->type == KEY_TYPE_inode
+               ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
+               : -ENOENT;
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
 }
 
-#ifdef CONFIG_BCACHEFS_DEBUG
-void bch2_inode_pack_test(void)
+int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
+                           struct bch_inode_unpacked *inode)
 {
-       struct bch_inode_unpacked *u, test_inodes[] = {
-               {
-                       .bi_atime       = U64_MAX,
-                       .bi_ctime       = U64_MAX,
-                       .bi_mtime       = U64_MAX,
-                       .bi_otime       = U64_MAX,
-                       .bi_size        = U64_MAX,
-                       .bi_sectors     = U64_MAX,
-                       .bi_uid         = U32_MAX,
-                       .bi_gid         = U32_MAX,
-                       .bi_nlink       = U32_MAX,
-                       .bi_generation  = U32_MAX,
-                       .bi_dev         = U32_MAX,
-               },
-       };
-
-       for (u = test_inodes;
-            u < test_inodes + ARRAY_SIZE(test_inodes);
-            u++) {
-               struct bkey_inode_buf p;
-
-               bch2_inode_pack(&p, u);
-       }
+       return bch2_trans_do(c, NULL, NULL, 0,
+               bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
 }
-#endif