]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 275cba438e bcachefs: Fix inodes pass in fsck
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 30 Mar 2020 18:34:20 +0000 (14:34 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Mon, 30 Mar 2020 18:34:20 +0000 (14:34 -0400)
23 files changed:
.bcachefs_revision
libbcachefs/bcachefs_format.h
libbcachefs/bkey_methods.c
libbcachefs/bkey_methods.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_io.h
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/compress.c
libbcachefs/ec.c
libbcachefs/extent_update.c
libbcachefs/extents.c
libbcachefs/extents.h
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/journal.c
libbcachefs/journal_io.c
libbcachefs/recovery.c

index 9bf45e9ec06050cd2f5c7326728b7b3392f4f001..f42996589021eaa05b36202fcf484e7da4f90133 100644 (file)
@@ -1 +1 @@
-96b991466ac851ea3c7adbd2e30184837573e2a0
+275cba438ed6630d5e4db7c9164ac5334a8a4cd7
index a78988e3ded7347df61c3d13c074438339d15e40..616863ef77d4f51a4c8339678710cb92668ed7cd 100644 (file)
@@ -1156,7 +1156,8 @@ enum bcachefs_metadata_version {
        bcachefs_metadata_version_min                   = 9,
        bcachefs_metadata_version_new_versioning        = 10,
        bcachefs_metadata_version_bkey_renumber         = 10,
-       bcachefs_metadata_version_max                   = 11,
+       bcachefs_metadata_version_inode_btree_change    = 11,
+       bcachefs_metadata_version_max                   = 12,
 };
 
 #define bcachefs_metadata_version_current      (bcachefs_metadata_version_max - 1)
index 0aa3d3b9a281c7fad15df4914b7f007861e3cad0..c97e1e9002cb6efa42e817d4da532644098ddcd7 100644 (file)
@@ -273,3 +273,59 @@ void bch2_bkey_renumber(enum btree_node_type btree_node_type,
                        break;
                }
 }
+
+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+                       unsigned version, unsigned big_endian,
+                       int write,
+                       struct bkey_format *f,
+                       struct bkey_packed *k)
+{
+       const struct bkey_ops *ops;
+       struct bkey uk;
+       struct bkey_s u;
+
+       if (big_endian != CPU_BIG_ENDIAN)
+               bch2_bkey_swab_key(f, k);
+
+       if (version < bcachefs_metadata_version_bkey_renumber)
+               bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
+
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_id == BTREE_ID_INODES) {
+               if (!bkey_packed(k)) {
+                       struct bkey_i *u = packed_to_bkey(k);
+                       swap(u->k.p.inode, u->k.p.offset);
+               } else if (f->bits_per_field[BKEY_FIELD_INODE] &&
+                          f->bits_per_field[BKEY_FIELD_OFFSET]) {
+                       struct bkey_format tmp = *f, *in = f, *out = &tmp;
+
+                       swap(tmp.bits_per_field[BKEY_FIELD_INODE],
+                            tmp.bits_per_field[BKEY_FIELD_OFFSET]);
+                       swap(tmp.field_offset[BKEY_FIELD_INODE],
+                            tmp.field_offset[BKEY_FIELD_OFFSET]);
+
+                       if (!write)
+                               swap(in, out);
+
+                       uk = __bch2_bkey_unpack_key(in, k);
+                       swap(uk.p.inode, uk.p.offset);
+                       BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
+               }
+       }
+
+       if (!bkey_packed(k)) {
+               u = bkey_i_to_s(packed_to_bkey(k));
+       } else {
+               uk = __bch2_bkey_unpack_key(f, k);
+               u.k = &uk;
+               u.v = bkeyp_val(f, k);
+       }
+
+       if (big_endian != CPU_BIG_ENDIAN)
+               bch2_bkey_swab_val(u);
+
+       ops = &bch2_bkey_ops[k->type];
+
+       if (ops->compat)
+               ops->compat(btree_id, version, big_endian, write, u);
+}
index d36468b752237eb8935a4dae7774cc6c54ac6bee..0bca725ae3b8c5d3d719cc878c07bb1812cc6802 100644 (file)
@@ -33,6 +33,9 @@ struct bkey_ops {
        bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
        enum merge_result (*key_merge)(struct bch_fs *,
                                       struct bkey_s, struct bkey_s);
+       void            (*compat)(enum btree_id id, unsigned version,
+                                 unsigned big_endian, int write,
+                                 struct bkey_s);
 };
 
 const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
@@ -60,4 +63,20 @@ enum merge_result bch2_bkey_merge(struct bch_fs *,
 
 void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
 
+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
+                       int, struct bkey_format *, struct bkey_packed *);
+
+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+                              unsigned version, unsigned big_endian,
+                              int write,
+                              struct bkey_format *f,
+                              struct bkey_packed *k)
+{
+       if (version < bcachefs_metadata_version_current ||
+           big_endian != CPU_BIG_ENDIAN)
+               __bch2_bkey_compat(level, btree_id, version,
+                                  big_endian, write, f, k);
+
+}
+
 #endif /* _BCACHEFS_BKEY_METHODS_H */
index 5c3e7e165fcfb5fa3e24d5fcb57d26d85f20c92b..c12f8a6b52051ae18ebfafde882e2934a845a553 100644 (file)
@@ -923,8 +923,7 @@ out:
                if (sib != btree_prev_sib)
                        swap(n1, n2);
 
-               BUG_ON(bkey_cmp(btree_type_successor(n1->btree_id,
-                                                    n1->key.k.p),
+               BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
                                n2->data->min_key));
        }
 
index 7c89a6dd7f5a1302f1e95b6c3cbb31803b75a3e9..674a1dac46b937cff4d4c31dc92ea0fea31eb575 100644 (file)
@@ -47,65 +47,42 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
        __gc_pos_set(c, new_pos);
 }
 
-/* range_checks - for validating min/max pos of each btree node: */
-
-struct range_checks {
-       struct range_level {
-               struct bpos     min;
-               struct bpos     max;
-       }                       l[BTREE_MAX_DEPTH];
-       unsigned                depth;
-};
-
-static void btree_node_range_checks_init(struct range_checks *r, unsigned depth)
+static int bch2_gc_check_topology(struct bch_fs *c,
+                                 struct bkey_s_c k,
+                                 struct bpos *expected_start,
+                                 struct bpos expected_end,
+                                 bool is_last)
 {
-       unsigned i;
-
-       for (i = 0; i < BTREE_MAX_DEPTH; i++)
-               r->l[i].min = r->l[i].max = POS_MIN;
-       r->depth = depth;
-}
-
-static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
-                                   struct range_checks *r)
-{
-       struct range_level *l = &r->l[b->level];
-
-       struct bpos expected_min = bkey_cmp(l->min, l->max)
-               ? btree_type_successor(b->btree_id, l->max)
-               : l->max;
-
-       bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, expected_min), c,
-               "btree node has incorrect min key: %llu:%llu != %llu:%llu",
-               b->data->min_key.inode,
-               b->data->min_key.offset,
-               expected_min.inode,
-               expected_min.offset);
-
-       l->max = b->data->max_key;
+       int ret = 0;
 
-       if (b->level > r->depth) {
-               l = &r->l[b->level - 1];
+       if (k.k->type == KEY_TYPE_btree_ptr_v2) {
+               struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
 
-               bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, l->min), c,
-                       "btree node min doesn't match min of child nodes: %llu:%llu != %llu:%llu",
-                       b->data->min_key.inode,
-                       b->data->min_key.offset,
-                       l->min.inode,
-                       l->min.offset);
+               if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c,
+                               "btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu",
+                               bp.v->min_key.inode,
+                               bp.v->min_key.offset,
+                               expected_start->inode,
+                               expected_start->offset)) {
+                       BUG();
+               }
+       }
 
-               bch2_fs_inconsistent_on(bkey_cmp(b->data->max_key, l->max), c,
-                       "btree node max doesn't match max of child nodes: %llu:%llu != %llu:%llu",
-                       b->data->max_key.inode,
-                       b->data->max_key.offset,
-                       l->max.inode,
-                       l->max.offset);
-
-               if (bkey_cmp(b->data->max_key, POS_MAX))
-                       l->min = l->max =
-                               btree_type_successor(b->btree_id,
-                                                    b->data->max_key);
+       *expected_start = bkey_cmp(k.k->p, POS_MAX)
+               ? bkey_successor(k.k->p)
+               : k.k->p;
+
+       if (fsck_err_on(is_last &&
+                       bkey_cmp(k.k->p, expected_end), c,
+                       "btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu",
+                       k.k->p.inode,
+                       k.k->p.offset,
+                       expected_end.inode,
+                       expected_end.offset)) {
+               BUG();
        }
+fsck_err:
+       return ret;
 }
 
 /* marking of btree keys/nodes: */
@@ -187,6 +164,7 @@ fsck_err:
 static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
                              bool initial)
 {
+       struct bpos next_node_start = b->data->min_key;
        struct btree_node_iter iter;
        struct bkey unpacked;
        struct bkey_s_c k;
@@ -197,13 +175,25 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
        if (!btree_node_type_needs_gc(btree_node_type(b)))
                return 0;
 
-       for_each_btree_node_key_unpack(b, k, &iter,
-                                      &unpacked) {
+       bch2_btree_node_iter_init_from_start(&iter, b);
+
+       while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
                bch2_bkey_debugcheck(c, b, k);
 
                ret = bch2_gc_mark_key(c, k, max_stale, initial);
                if (ret)
                        break;
+
+               bch2_btree_node_iter_advance(&iter, b);
+
+               if (b->level) {
+                       ret = bch2_gc_check_topology(c, k,
+                                       &next_node_start,
+                                       b->data->max_key,
+                                       bch2_btree_node_iter_end(&iter));
+                       if (ret)
+                               break;
+               }
        }
 
        return ret;
@@ -215,7 +205,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
        struct btree_trans trans;
        struct btree_iter *iter;
        struct btree *b;
-       struct range_checks r;
        unsigned depth = metadata_only                  ? 1
                : expensive_debug_checks(c)             ? 0
                : !btree_node_type_needs_gc(btree_id)   ? 1
@@ -227,12 +216,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 
        gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
 
-       btree_node_range_checks_init(&r, depth);
-
        __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
                              0, depth, BTREE_ITER_PREFETCH, b) {
-               btree_node_range_checks(c, b, &r);
-
                bch2_verify_btree_nr_keys(b);
 
                gc_pos_set(c, gc_pos_btree_node(b));
@@ -274,11 +259,12 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 }
 
 static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
-                                        struct journal_keys *journal_keys,
-                                        unsigned target_depth)
+                                     struct journal_keys *journal_keys,
+                                     unsigned target_depth)
 {
        struct btree_and_journal_iter iter;
        struct bkey_s_c k;
+       struct bpos next_node_start = b->data->min_key;
        u8 max_stale = 0;
        int ret = 0;
 
@@ -287,28 +273,46 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
        while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                bch2_bkey_debugcheck(c, b, k);
 
+               BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
+               BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
+
                ret = bch2_gc_mark_key(c, k, &max_stale, true);
                if (ret)
                        break;
 
-               if (b->level > target_depth) {
+               if (b->level) {
                        struct btree *child;
                        BKEY_PADDED(k) tmp;
 
                        bkey_reassemble(&tmp.k, k);
+                       k = bkey_i_to_s_c(&tmp.k);
+
+                       bch2_btree_and_journal_iter_advance(&iter);
 
-                       child = bch2_btree_node_get_noiter(c, &tmp.k,
-                                               b->btree_id, b->level - 1);
-                       ret = PTR_ERR_OR_ZERO(child);
+                       ret = bch2_gc_check_topology(c, k,
+                                       &next_node_start,
+                                       b->data->max_key,
+                                       !bch2_btree_and_journal_iter_peek(&iter).k);
                        if (ret)
                                break;
 
-                       bch2_gc_btree_init_recurse(c, child,
-                                       journal_keys, target_depth);
-                       six_unlock_read(&child->lock);
-               }
+                       if (b->level > target_depth) {
+                               child = bch2_btree_node_get_noiter(c, &tmp.k,
+                                                       b->btree_id, b->level - 1);
+                               ret = PTR_ERR_OR_ZERO(child);
+                               if (ret)
+                                       break;
 
-               bch2_btree_and_journal_iter_advance(&iter);
+                               ret = bch2_gc_btree_init_recurse(c, child,
+                                               journal_keys, target_depth);
+                               six_unlock_read(&child->lock);
+
+                               if (ret)
+                                       break;
+                       }
+               } else {
+                       bch2_btree_and_journal_iter_advance(&iter);
+               }
        }
 
        return ret;
@@ -333,6 +337,20 @@ static int bch2_gc_btree_init(struct bch_fs *c,
                return 0;
 
        six_lock_read(&b->lock);
+       if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
+                       "btree root with incorrect min_key: %llu:%llu",
+                       b->data->min_key.inode,
+                       b->data->min_key.offset)) {
+               BUG();
+       }
+
+       if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
+                       "btree root with incorrect min_key: %llu:%llu",
+                       b->data->max_key.inode,
+                       b->data->max_key.offset)) {
+               BUG();
+       }
+
        if (b->level >= target_depth)
                ret = bch2_gc_btree_init_recurse(c, b,
                                        journal_keys, target_depth);
@@ -340,6 +358,7 @@ static int bch2_gc_btree_init(struct bch_fs *c,
        if (!ret)
                ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
                                       &max_stale, true);
+fsck_err:
        six_unlock_read(&b->lock);
 
        return ret;
@@ -985,9 +1004,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
                        n1->key.k.p = n1->data->max_key =
                                bkey_unpack_pos(n1, last);
 
-                       n2->data->min_key =
-                               btree_type_successor(iter->btree_id,
-                                                    n1->data->max_key);
+                       n2->data->min_key = bkey_successor(n1->data->max_key);
 
                        memcpy_u64s(vstruct_last(s1),
                                    s2->start, u64s);
index b48d48b8c27df7c67ac000fe2aa5c91a01885553..ac8b98861aae1f5178db3e16c002ba7775a8bbdd 100644 (file)
@@ -19,6 +19,7 @@
 #include "journal_seq_blacklist.h"
 #include "super-io.h"
 
+#include <linux/sched/mm.h>
 #include <trace/events/bcachefs.h>
 
 static void verify_no_dups(struct btree *b,
@@ -68,17 +69,19 @@ static void btree_bounce_free(struct bch_fs *c, unsigned order,
 static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
                                bool *used_mempool)
 {
+       unsigned flags = memalloc_nofs_save();
        void *p;
 
        BUG_ON(order > btree_page_order(c));
 
        *used_mempool = false;
        p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order);
-       if (p)
-               return p;
-
-       *used_mempool = true;
-       return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
+       if (!p) {
+               *used_mempool = true;
+               p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
+       }
+       memalloc_nofs_restore(flags);
+       return p;
 }
 
 static void sort_bkey_ptrs(const struct btree *bt,
@@ -706,83 +709,107 @@ out:                                                                     \
 
 static int validate_bset(struct bch_fs *c, struct btree *b,
                         struct bset *i, unsigned sectors,
-                        unsigned *whiteout_u64s, int write,
-                        bool have_retry)
+                        int write, bool have_retry)
 {
-       struct bkey_packed *k, *prev = NULL;
-       bool seen_non_whiteout = false;
-       unsigned version;
+       unsigned version = le16_to_cpu(i->version);
        const char *err;
        int ret = 0;
 
+       btree_err_on((version != BCH_BSET_VERSION_OLD &&
+                     version < bcachefs_metadata_version_min) ||
+                    version >= bcachefs_metadata_version_max,
+                    BTREE_ERR_FATAL, c, b, i,
+                    "unsupported bset version");
+
+       if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
+                        BTREE_ERR_FIXABLE, c, b, i,
+                        "bset past end of btree node")) {
+               i->u64s = 0;
+               return 0;
+       }
+
+       btree_err_on(b->written && !i->u64s,
+                    BTREE_ERR_FIXABLE, c, b, i,
+                    "empty bset");
+
        if (!b->written) {
+               struct btree_node *bn =
+                       container_of(i, struct btree_node, keys);
                /* These indicate that we read the wrong btree node: */
-               btree_err_on(BTREE_NODE_ID(b->data) != b->btree_id,
+               btree_err_on(BTREE_NODE_ID(bn) != b->btree_id,
                             BTREE_ERR_MUST_RETRY, c, b, i,
                             "incorrect btree id");
 
-               btree_err_on(BTREE_NODE_LEVEL(b->data) != b->level,
+               btree_err_on(BTREE_NODE_LEVEL(bn) != b->level,
                             BTREE_ERR_MUST_RETRY, c, b, i,
                             "incorrect level");
 
                if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
-                       u64 *p = (u64 *) &b->data->ptr;
+                       u64 *p = (u64 *) &bn->ptr;
 
                        *p = swab64(*p);
-                       bch2_bpos_swab(&b->data->min_key);
-                       bch2_bpos_swab(&b->data->max_key);
                }
 
+               if (!write)
+                       compat_btree_node(b->level, b->btree_id, version,
+                                         BSET_BIG_ENDIAN(i), write, bn);
+
                if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
                        struct bch_btree_ptr_v2 *bp =
                                &bkey_i_to_btree_ptr_v2(&b->key)->v;
 
                        btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
                                     BTREE_ERR_MUST_RETRY, c, b, NULL,
-                                    "incorrect min_key");
+                                    "incorrect min_key: got %llu:%llu should be %llu:%llu",
+                                    b->data->min_key.inode,
+                                    b->data->min_key.offset,
+                                    bp->min_key.inode,
+                                    bp->min_key.offset);
                }
 
-               btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p),
+               btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
                             BTREE_ERR_MUST_RETRY, c, b, i,
                             "incorrect max key");
 
+               if (write)
+                       compat_btree_node(b->level, b->btree_id, version,
+                                         BSET_BIG_ENDIAN(i), write, bn);
+
                /* XXX: ideally we would be validating min_key too */
 #if 0
                /*
                 * not correct anymore, due to btree node write error
                 * handling
                 *
-                * need to add b->data->seq to btree keys and verify
+                * need to add bn->seq to btree keys and verify
                 * against that
                 */
                btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
-                                                 b->data->ptr),
+                                                 bn->ptr),
                             BTREE_ERR_FATAL, c, b, i,
                             "incorrect backpointer");
 #endif
-               err = bch2_bkey_format_validate(&b->data->format);
+               err = bch2_bkey_format_validate(&bn->format);
                btree_err_on(err,
                             BTREE_ERR_FATAL, c, b, i,
                             "invalid bkey format: %s", err);
-       }
 
-       version = le16_to_cpu(i->version);
-       btree_err_on((version != BCH_BSET_VERSION_OLD &&
-                     version < bcachefs_metadata_version_min) ||
-                    version >= bcachefs_metadata_version_max,
-                    BTREE_ERR_FATAL, c, b, i,
-                    "unsupported bset version");
-
-       if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
-                        BTREE_ERR_FIXABLE, c, b, i,
-                        "bset past end of btree node")) {
-               i->u64s = 0;
-               return 0;
+               compat_bformat(b->level, b->btree_id, version,
+                              BSET_BIG_ENDIAN(i), write,
+                              &bn->format);
        }
+fsck_err:
+       return ret;
+}
 
-       btree_err_on(b->written && !i->u64s,
-                    BTREE_ERR_FIXABLE, c, b, i,
-                    "empty bset");
+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
+                        struct bset *i, unsigned *whiteout_u64s,
+                        int write, bool have_retry)
+{
+       unsigned version = le16_to_cpu(i->version);
+       struct bkey_packed *k, *prev = NULL;
+       bool seen_non_whiteout = false;
+       int ret = 0;
 
        if (!BSET_SEPARATE_WHITEOUTS(i)) {
                seen_non_whiteout = true;
@@ -811,18 +838,14 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                        continue;
                }
 
-               if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
-                       bch2_bkey_swab_key(&b->format, k);
-
-               if (!write &&
-                   version < bcachefs_metadata_version_bkey_renumber)
-                       bch2_bkey_renumber(btree_node_type(b), k, write);
+               /* XXX: validate k->u64s */
+               if (!write)
+                       bch2_bkey_compat(b->level, b->btree_id, version,
+                                   BSET_BIG_ENDIAN(i), write,
+                                   &b->format, k);
 
                u = __bkey_disassemble(b, k, &tmp);
 
-               if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
-                       bch2_bkey_swab_val(u);
-
                invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?:
                        bch2_bkey_in_btree_node(b, u.s_c) ?:
                        (write ? bch2_bkey_val_invalid(c, u.s_c) : NULL);
@@ -839,9 +862,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                        continue;
                }
 
-               if (write &&
-                   version < bcachefs_metadata_version_bkey_renumber)
-                       bch2_bkey_renumber(btree_node_type(b), k, write);
+               if (write)
+                       bch2_bkey_compat(b->level, b->btree_id, version,
+                                   BSET_BIG_ENDIAN(i), write,
+                                   &b->format, k);
 
                /*
                 * with the separate whiteouts thing (used for extents), the
@@ -872,8 +896,6 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                prev = k;
                k = bkey_next_skip_noops(k, vstruct_last(i));
        }
-
-       SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
 fsck_err:
        return ret;
 }
@@ -941,8 +963,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
                                set_btree_node_old_extent_overwrite(b);
 
                        sectors = vstruct_sectors(b->data, c->block_bits);
-
-                       btree_node_set_format(b, b->data->format);
                } else {
                        bne = write_block(b);
                        i = &bne->keys;
@@ -966,11 +986,21 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
                        sectors = vstruct_sectors(bne, c->block_bits);
                }
 
-               ret = validate_bset(c, b, i, sectors, &whiteout_u64s,
+               ret = validate_bset(c, b, i, sectors,
                                    READ, have_retry);
                if (ret)
                        goto fsck_err;
 
+               if (!b->written)
+                       btree_node_set_format(b, b->data->format);
+
+               ret = validate_bset_keys(c, b, i, &whiteout_u64s,
+                                   READ, have_retry);
+               if (ret)
+                       goto fsck_err;
+
+               SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
                b->written += sectors;
 
                blacklisted = bch2_journal_seq_is_blacklisted(c,
@@ -1413,7 +1443,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
        if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
                return -1;
 
-       ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false);
+       ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
+               validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
        if (ret)
                bch2_inconsistent_error(c);
 
@@ -1563,8 +1594,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
                validate_before_checksum = true;
 
        /* validate_bset will be modifying: */
-       if (le16_to_cpu(i->version) <
-           bcachefs_metadata_version_bkey_renumber)
+       if (le16_to_cpu(i->version) < bcachefs_metadata_version_max)
                validate_before_checksum = true;
 
        /* if we're going to be encrypting, check metadata validity first: */
index fd719dda7d91696efa4121a3ac4ed717352dfebb..1f16394fd5c35249b835ef5ce3116b70d5a4a1b7 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _BCACHEFS_BTREE_IO_H
 #define _BCACHEFS_BTREE_IO_H
 
+#include "bkey_methods.h"
 #include "bset.h"
 #include "btree_locking.h"
 #include "extents.h"
@@ -140,4 +141,50 @@ void bch2_btree_flush_all_writes(struct bch_fs *);
 void bch2_btree_verify_flushed(struct bch_fs *);
 ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
 
+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
+                                unsigned version, unsigned big_endian,
+                                int write, struct bkey_format *f)
+{
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_id == BTREE_ID_INODES) {
+               swap(f->bits_per_field[BKEY_FIELD_INODE],
+                    f->bits_per_field[BKEY_FIELD_OFFSET]);
+               swap(f->field_offset[BKEY_FIELD_INODE],
+                    f->field_offset[BKEY_FIELD_OFFSET]);
+       }
+}
+
+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
+                              unsigned version, unsigned big_endian,
+                              int write, struct bpos *p)
+{
+       if (big_endian != CPU_BIG_ENDIAN)
+               bch2_bpos_swab(p);
+
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_id == BTREE_ID_INODES)
+               swap(p->inode, p->offset);
+}
+
+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
+                                    unsigned version, unsigned big_endian,
+                                    int write,
+                                    struct btree_node *bn)
+{
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_node_type_is_extents(btree_id) &&
+           bkey_cmp(bn->min_key, POS_MIN) &&
+           write)
+               bn->min_key = bkey_predecessor(bn->min_key);
+
+       compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
+       compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
+
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_node_type_is_extents(btree_id) &&
+           bkey_cmp(bn->min_key, POS_MIN) &&
+           !write)
+               bn->min_key = bkey_successor(bn->min_key);
+}
+
 #endif /* _BCACHEFS_BTREE_IO_H */
index 6ed688cdcfdee2ca681801559cede96372f1bf6f..7345fec8a98ff2b49620b86055006febe23f745d 100644 (file)
@@ -39,7 +39,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
 static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
                                              struct btree *b)
 {
-       return bkey_cmp(iter->pos, b->data->min_key) < 0;
+       return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0;
 }
 
 static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
@@ -1284,10 +1284,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
                if (btree_node_read_locked(iter, iter->level))
                        btree_node_unlock(iter, iter->level);
 
-               /* ick: */
-               iter->pos       = iter->btree_id == BTREE_ID_INODES
-                       ? btree_type_successor(iter->btree_id, iter->pos)
-                       : bkey_successor(iter->pos);
+               iter->pos       = bkey_successor(iter->pos);
                iter->level     = iter->min_depth;
 
                btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
@@ -1395,8 +1392,8 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
        iter->k.p = iter->pos = l->b->key.k.p;
 
        ret = bkey_cmp(iter->pos, POS_MAX) != 0;
-       if (ret)
-               iter->k.p = iter->pos = btree_type_successor(iter->btree_id, iter->pos);
+       if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+               iter->k.p = iter->pos = bkey_successor(iter->pos);
 
        btree_iter_pos_changed(iter, 1);
        return ret;
@@ -1412,8 +1409,12 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
        iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
 
        ret = bkey_cmp(iter->pos, POS_MIN) != 0;
-       if (ret)
-               iter->k.p = iter->pos = btree_type_predecessor(iter->btree_id, iter->pos);
+       if (ret) {
+               iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+
+               if (iter->flags & BTREE_ITER_IS_EXTENTS)
+                       iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+       }
 
        btree_iter_pos_changed(iter, -1);
        return ret;
@@ -1500,7 +1501,9 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
                return bkey_s_c_null;
 
        bch2_btree_iter_set_pos(iter,
-               btree_type_successor(iter->btree_id, iter->k.p));
+               (iter->flags & BTREE_ITER_IS_EXTENTS)
+               ? iter->k.p
+               : bkey_successor(iter->k.p));
 
        return bch2_btree_iter_peek(iter);
 }
@@ -1553,7 +1556,9 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
 
                if (k.k && bkey_deleted(k.k)) {
                        bch2_btree_iter_set_pos(iter,
-                               btree_type_successor(iter->btree_id, iter->k.p));
+                               (iter->flags & BTREE_ITER_IS_EXTENTS)
+                               ? iter->k.p
+                               : bkey_successor(iter->k.p));
                        continue;
                }
 
@@ -1582,7 +1587,9 @@ struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
                return bkey_s_c_null;
 
        bch2_btree_iter_set_pos(iter,
-               btree_type_successor(iter->btree_id, iter->k.p));
+               (iter->flags & BTREE_ITER_IS_EXTENTS)
+               ? iter->k.p
+               : bkey_successor(iter->k.p));
 
        return bch2_btree_iter_peek_with_updates(iter);
 }
@@ -1749,7 +1756,9 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
                return bkey_s_c_null;
 
        bch2_btree_iter_set_pos(iter,
-               btree_type_successor(iter->btree_id, iter->k.p));
+               (iter->flags & BTREE_ITER_IS_EXTENTS)
+               ? iter->k.p
+               : bkey_successor(iter->k.p));
 
        return bch2_btree_iter_peek_slot(iter);
 }
index 6f51ef35db75bbc03ea86582c05eb108c18a5e74..1a3672a23b861f630008660b1e4c1e0d1a7f159e 100644 (file)
@@ -172,32 +172,6 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
 void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
 void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
 
-static inline struct bpos btree_type_successor(enum btree_id id,
-                                              struct bpos pos)
-{
-       if (id == BTREE_ID_INODES) {
-               pos.inode++;
-               pos.offset = 0;
-       } else if (!btree_node_type_is_extents(id)) {
-               pos = bkey_successor(pos);
-       }
-
-       return pos;
-}
-
-static inline struct bpos btree_type_predecessor(enum btree_id id,
-                                              struct bpos pos)
-{
-       if (id == BTREE_ID_INODES) {
-               --pos.inode;
-               pos.offset = 0;
-       } else {
-               pos = bkey_predecessor(pos);
-       }
-
-       return pos;
-}
-
 static inline int __btree_iter_cmp(enum btree_id id,
                                   struct bpos pos,
                                   const struct btree_iter *r)
index 31a5c215ca3496eb77d2c2a929ca29ccd159c8c4..e2649503cc9bb193223d81deb7f4caa10e3f8ea4 100644 (file)
@@ -272,7 +272,11 @@ struct btree_insert_entry {
        struct btree_iter       *iter;
 };
 
+#ifndef CONFIG_LOCKDEP
 #define BTREE_ITER_MAX         64
+#else
+#define BTREE_ITER_MAX         32
+#endif
 
 struct btree_trans {
        struct bch_fs           *c;
index bc7749c8060abfb94a8e641ab66e15920b91b8bc..c4235f72f517ab9984992d01c271e52afef5d472 100644 (file)
@@ -581,7 +581,7 @@ err_free:
 
 /* Asynchronous interior node update machinery */
 
-static void bch2_btree_update_free(struct btree_update *as)
+static void __bch2_btree_update_free(struct btree_update *as)
 {
        struct bch_fs *c = as->c;
 
@@ -596,28 +596,32 @@ static void bch2_btree_update_free(struct btree_update *as)
        if (as->reserve)
                bch2_btree_reserve_put(c, as->reserve);
 
-       mutex_lock(&c->btree_interior_update_lock);
        list_del(&as->list);
 
        closure_debug_destroy(&as->cl);
        mempool_free(as, &c->btree_interior_update_pool);
 
        closure_wake_up(&c->btree_interior_update_wait);
-       mutex_unlock(&c->btree_interior_update_lock);
 }
 
-static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
+static void bch2_btree_update_free(struct btree_update *as)
 {
        struct bch_fs *c = as->c;
 
        mutex_lock(&c->btree_interior_update_lock);
+       __bch2_btree_update_free(as);
+       mutex_unlock(&c->btree_interior_update_lock);
+}
+
+static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
+{
+       struct bch_fs *c = as->c;
 
        while (as->nr_new_nodes) {
                struct btree *b = as->new_nodes[--as->nr_new_nodes];
 
                BUG_ON(b->will_make_reachable != (unsigned long) as);
                b->will_make_reachable = 0;
-               mutex_unlock(&c->btree_interior_update_lock);
 
                /*
                 * b->will_make_reachable prevented it from being written, so
@@ -626,14 +630,11 @@ static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
                btree_node_lock_type(c, b, SIX_LOCK_read);
                bch2_btree_node_write_cond(c, b, btree_node_need_write(b));
                six_unlock_read(&b->lock);
-               mutex_lock(&c->btree_interior_update_lock);
        }
 
        while (as->nr_pending)
                bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending],
                                            seq);
-
-       mutex_unlock(&c->btree_interior_update_lock);
 }
 
 static void btree_update_nodes_written(struct closure *cl)
@@ -667,9 +668,12 @@ again:
                mutex_unlock(&c->btree_interior_update_lock);
                btree_node_lock_type(c, b, SIX_LOCK_intent);
                six_unlock_intent(&b->lock);
-               goto out;
+               mutex_lock(&c->btree_interior_update_lock);
+               goto again;
        }
 
+       list_del(&as->unwritten_list);
+
        journal_u64s = 0;
 
        if (as->mode != BTREE_INTERIOR_UPDATING_ROOT)
@@ -709,23 +713,10 @@ again:
 
                bch2_btree_add_journal_pin(c, b, res.seq);
                six_unlock_write(&b->lock);
-
-               list_del(&as->unwritten_list);
-               mutex_unlock(&c->btree_interior_update_lock);
-
-               /*
-                * b->write_blocked prevented it from being written, so
-                * write it now if it needs to be written:
-                */
-               btree_node_write_if_need(c, b, SIX_LOCK_intent);
-               six_unlock_intent(&b->lock);
                break;
 
        case BTREE_INTERIOR_UPDATING_AS:
                BUG_ON(b);
-
-               list_del(&as->unwritten_list);
-               mutex_unlock(&c->btree_interior_update_lock);
                break;
 
        case BTREE_INTERIOR_UPDATING_ROOT: {
@@ -739,9 +730,6 @@ again:
                r->alive = true;
                c->btree_roots_dirty = true;
                mutex_unlock(&c->btree_root_lock);
-
-               list_del(&as->unwritten_list);
-               mutex_unlock(&c->btree_interior_update_lock);
                break;
        }
        }
@@ -751,16 +739,24 @@ again:
        bch2_journal_res_put(&c->journal, &res);
        bch2_journal_preres_put(&c->journal, &as->journal_preres);
 
+       /* Do btree write after dropping journal res: */
+       if (b) {
+               /*
+                * b->write_blocked prevented it from being written, so
+                * write it now if it needs to be written:
+                */
+               btree_node_write_if_need(c, b, SIX_LOCK_intent);
+               six_unlock_intent(&b->lock);
+       }
+
        btree_update_nodes_reachable(as, res.seq);
 free_update:
-       bch2_btree_update_free(as);
+       __bch2_btree_update_free(as);
        /*
         * for flush_held_btree_writes() waiting on updates to flush or
         * nodes to be writeable:
         */
        closure_wake_up(&c->btree_interior_update_wait);
-out:
-       mutex_lock(&c->btree_interior_update_lock);
        goto again;
 }
 
@@ -1200,7 +1196,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
        BUG_ON(!prev);
 
        btree_set_max(n1, bkey_unpack_pos(n1, prev));
-       btree_set_min(n2, btree_type_successor(n1->btree_id, n1->key.k.p));
+       btree_set_min(n2, bkey_successor(n1->key.k.p));
 
        set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
        set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
index f94bc6a0b699c52a99a70c7a79c227606e6b4815..da2b93b58eed0e7189bdc538b1a440845f78fc45 100644 (file)
@@ -58,8 +58,11 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
        EBUG_ON(btree_node_just_written(b));
        EBUG_ON(bset_written(b, btree_bset_last(b)));
        EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
-       EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 ||
-               bkey_cmp(insert->k.p, b->data->max_key) > 0);
+       EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
+               bkey_cmp(bkey_start_pos(&insert->k),
+                        bkey_predecessor(b->data->min_key)) < 0);
+       EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0);
+       EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0);
        EBUG_ON(insert->k.u64s >
                bch_btree_keys_u64s_remaining(iter->trans->c, b));
        EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
index 0959bb864dc55cd58474307f2a5fadff21fcdf4c..0713286d7999bbbcbb58fcfedd2e18b51192cfb0 100644 (file)
@@ -17,7 +17,6 @@ struct bbuf {
                BB_NONE,
                BB_VMAP,
                BB_KMALLOC,
-               BB_VMALLOC,
                BB_MEMPOOL,
        }               type;
        int             rw;
@@ -33,17 +32,7 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
        if (b)
                return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
 
-       b = mempool_alloc(&c->compression_bounce[rw], GFP_NOWAIT);
-       b = b ? page_address(b) : NULL;
-       if (b)
-               return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
-
-       b = vmalloc(size);
-       if (b)
-               return (struct bbuf) { .b = b, .type = BB_VMALLOC, .rw = rw };
-
        b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
-       b = b ? page_address(b) : NULL;
        if (b)
                return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
 
@@ -129,12 +118,8 @@ static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
        case BB_KMALLOC:
                kfree(buf.b);
                break;
-       case BB_VMALLOC:
-               vfree(buf.b);
-               break;
        case BB_MEMPOOL:
-               mempool_free(virt_to_page(buf.b),
-                            &c->compression_bounce[buf.rw]);
+               mempool_free(buf.b, &c->compression_bounce[buf.rw]);
                break;
        }
 }
@@ -561,15 +546,15 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
 have_compressed:
 
        if (!mempool_initialized(&c->compression_bounce[READ])) {
-               ret = mempool_init_page_pool(&c->compression_bounce[READ],
-                                            1, order);
+               ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
+                                                 1, order);
                if (ret)
                        goto out;
        }
 
        if (!mempool_initialized(&c->compression_bounce[WRITE])) {
-               ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
-                                            1, order);
+               ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
+                                                 1, order);
                if (ret)
                        goto out;
        }
index bb0193876f436ebc5052db8995358ea45cf435f2..933945b659252866a26b7be656538ab558129eb1 100644 (file)
@@ -804,8 +804,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
                        continue;
                }
 
-               bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
-
                dev = s->key.v.ptrs[idx].dev;
 
                bkey_on_stack_reassemble(&sk, c, k);
@@ -820,6 +818,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
 
                extent_stripe_ptr_add(e, s, ec_ptr, idx);
 
+               bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
                bch2_trans_update(&trans, iter, sk.k, 0);
 
                ret = bch2_trans_commit(&trans, NULL, NULL,
index 8e5070d5a39b4d72ddba9fcb8fa932c46fa57aa9..2a7d913bdda3f053d6df6612f9a37ecc7408a404 100644 (file)
@@ -115,7 +115,9 @@ int bch2_extent_atomic_end(struct btree_iter *iter,
        b = iter->l[0].b;
        node_iter = iter->l[0].iter;
 
-       BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
+       BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
+              bkey_cmp(bkey_start_pos(&insert->k),
+                       bkey_predecessor(b->data->min_key)) < 0);
 
        *end = bpos_min(insert->k.p, b->key.k.p);
 
index cb88dd15a86cc55399e359acdaaeac4895d04cac..792c9c1e50b1d1901db1f8ce90ad14a444827403 100644 (file)
@@ -9,6 +9,7 @@
 #include "bcachefs.h"
 #include "bkey_methods.h"
 #include "btree_gc.h"
+#include "btree_io.h"
 #include "btree_iter.h"
 #include "buckets.h"
 #include "checksum.h"
@@ -214,6 +215,22 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
        bch2_bkey_ptrs_to_text(out, c, k);
 }
 
+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
+                             unsigned big_endian, int write,
+                             struct bkey_s k)
+{
+       struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
+
+       compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
+
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_node_type_is_extents(btree_id) &&
+           bkey_cmp(bp.v->min_key, POS_MIN))
+               bp.v->min_key = write
+                       ? bkey_predecessor(bp.v->min_key)
+                       : bkey_successor(bp.v->min_key);
+}
+
 /* KEY_TYPE_extent: */
 
 const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
index 70b7d70269dc14af02bd76c52100d42a4c9d6104..8ff2eac3ee2b1c1476c0f875e379634e8919f416 100644 (file)
@@ -371,6 +371,8 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
 void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
+                             int, struct bkey_s);
 
 #define bch2_bkey_ops_btree_ptr (struct bkey_ops) {            \
        .key_invalid    = bch2_btree_ptr_invalid,               \
@@ -384,6 +386,7 @@ void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
        .key_debugcheck = bch2_btree_ptr_debugcheck,            \
        .val_to_text    = bch2_btree_ptr_to_text,               \
        .swab           = bch2_ptr_swab,                        \
+       .compat         = bch2_btree_ptr_v2_compat,             \
 }
 
 /* KEY_TYPE_extent: */
index 822541e6adfc8b999adbb42c079442d50995c1bc..3ab621c62c43ddc70998ac8da34c6d275b650861 100644 (file)
@@ -1038,12 +1038,12 @@ retry:
                if (!ret)
                        continue;
 
-               if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
+               if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
                                "unreachable directory found (inum %llu)",
-                               k.k->p.inode)) {
+                               k.k->p.offset)) {
                        bch2_trans_unlock(&trans);
 
-                       ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
+                       ret = reattach_inode(c, lostfound_inode, k.k->p.offset);
                        if (ret) {
                                goto err;
                        }
@@ -1353,18 +1353,18 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
-                                  POS(range_start, 0), 0);
+                                  POS(0, range_start), 0);
        nlinks_iter = genradix_iter_init(links, 0);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
               !(ret2 = bkey_err(k))) {
 peek_nlinks:   link = genradix_iter_peek(&nlinks_iter, links);
 
-               if (!link && (!k.k || iter->pos.inode >= range_end))
+               if (!link && (!k.k || iter->pos.offset >= range_end))
                        break;
 
                nlinks_pos = range_start + nlinks_iter.pos;
-               if (iter->pos.inode > nlinks_pos) {
+               if (iter->pos.offset > nlinks_pos) {
                        /* Should have been caught by dirents pass: */
                        need_fsck_err_on(link && link->count, c,
                                "missing inode %llu (nlink %u)",
@@ -1373,7 +1373,7 @@ peek_nlinks:      link = genradix_iter_peek(&nlinks_iter, links);
                        goto peek_nlinks;
                }
 
-               if (iter->pos.inode < nlinks_pos || !link)
+               if (iter->pos.offset < nlinks_pos || !link)
                        link = &zero_links;
 
                if (k.k && k.k->type == KEY_TYPE_inode) {
@@ -1389,7 +1389,7 @@ peek_nlinks:      link = genradix_iter_peek(&nlinks_iter, links);
                                nlinks_pos, link->count);
                }
 
-               if (nlinks_pos == iter->pos.inode)
+               if (nlinks_pos == iter->pos.offset)
                        genradix_iter_advance(&nlinks_iter, links);
 
                bch2_btree_iter_next(iter);
index 26171ff754a686d655affca1431ef1c389989792..7d20f082ad45a48d67fded8629aaad613cddbdee 100644 (file)
@@ -98,7 +98,7 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
        unsigned bytes;
 
        bkey_inode_init(&packed->inode.k_i);
-       packed->inode.k.p.inode         = inode->bi_inum;
+       packed->inode.k.p.offset        = inode->bi_inum;
        packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
        packed->inode.v.bi_flags        = cpu_to_le32(inode->bi_flags);
        packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
@@ -149,7 +149,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
        unsigned fieldnr = 0, field_bits;
        int ret;
 
-       unpacked->bi_inum       = inode.k->p.inode;
+       unpacked->bi_inum       = inode.k->p.offset;
        unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
        unpacked->bi_flags      = le32_to_cpu(inode.v->bi_flags);
        unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
@@ -188,7 +188,7 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inum, 0),
+       iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
                                   BTREE_ITER_SLOTS|flags);
        if (IS_ERR(iter))
                return iter;
@@ -232,13 +232,13 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
                struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
                struct bch_inode_unpacked unpacked;
 
-       if (k.k->p.offset)
-               return "nonzero offset";
+       if (k.k->p.inode)
+               return "nonzero k.p.inode";
 
        if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
                return "incorrect value size";
 
-       if (k.k->p.inode < BLOCKDEV_INODE_MAX)
+       if (k.k->p.offset < BLOCKDEV_INODE_MAX)
                return "fs inode in blockdev range";
 
        if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
@@ -280,8 +280,8 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
 const char *bch2_inode_generation_invalid(const struct bch_fs *c,
                                          struct bkey_s_c k)
 {
-       if (k.k->p.offset)
-               return "nonzero offset";
+       if (k.k->p.inode)
+               return "nonzero k.p.inode";
 
        if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
                return "incorrect value size";
@@ -383,9 +383,9 @@ int bch2_inode_create(struct btree_trans *trans,
        if (IS_ERR(inode_p))
                return PTR_ERR(inode_p);
 again:
-       for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(start, 0),
+       for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               if (iter->pos.inode > max)
+               if (bkey_cmp(iter->pos, POS(0, max)) > 0)
                        break;
 
                if (k.k->type != KEY_TYPE_inode)
@@ -405,8 +405,8 @@ again:
 
        return -ENOSPC;
 found_slot:
-       *hint                   = k.k->p.inode;
-       inode_u->bi_inum        = k.k->p.inode;
+       *hint                   = k.k->p.offset;
+       inode_u->bi_inum        = k.k->p.offset;
        inode_u->bi_generation  = bkey_generation(k);
 
        bch2_inode_pack(inode_p, inode_u);
@@ -443,7 +443,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0),
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
        do {
                struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
@@ -475,10 +475,10 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
 
                if (!bi_generation) {
                        bkey_init(&delete.k);
-                       delete.k.p.inode = inode_nr;
+                       delete.k.p.offset = inode_nr;
                } else {
                        bkey_inode_generation_init(&delete.k_i);
-                       delete.k.p.inode = inode_nr;
+                       delete.k.p.offset = inode_nr;
                        delete.v.bi_generation = cpu_to_le32(bi_generation);
                }
 
@@ -500,7 +500,7 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
        int ret;
 
        iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
-                       POS(inode_nr, 0), BTREE_ITER_SLOTS);
+                       POS(0, inode_nr), BTREE_ITER_SLOTS);
        if (IS_ERR(iter))
                return PTR_ERR(iter);
 
index 9f03a479c9a2f01342607987ec712ed5df2d553b..0a4538b3dc604e206ba5057d17a5517fc35e53af 100644 (file)
@@ -376,7 +376,8 @@ unlock:
                goto retry;
 
        if (ret == -ENOSPC) {
-               BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED));
+               WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
+                         "JOURNAL_RES_GET_RESERVED set but journal full");
 
                /*
                 * Journal is full - can't rely on reclaim from work item due to
index 0974805c892374f5e6c7e04b285fc37d592bf393..39bb2154cce19df860823fb32f657ffef644e2dd 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "alloc_foreground.h"
+#include "btree_io.h"
 #include "buckets.h"
 #include "checksum.h"
 #include "error.h"
@@ -138,7 +139,8 @@ static void journal_entry_null_range(void *start, void *end)
 
 static int journal_validate_key(struct bch_fs *c, struct jset *jset,
                                struct jset_entry *entry,
-                               struct bkey_i *k, enum btree_node_type key_type,
+                               unsigned level, enum btree_id btree_id,
+                               struct bkey_i *k,
                                const char *type, int write)
 {
        void *next = vstruct_next(entry);
@@ -171,16 +173,13 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
                return 0;
        }
 
-       if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) {
-               bch2_bkey_swab_key(NULL, bkey_to_packed(k));
-               bch2_bkey_swab_val(bkey_i_to_s(k));
-       }
-
-       if (!write &&
-           version < bcachefs_metadata_version_bkey_renumber)
-               bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
+       if (!write)
+               bch2_bkey_compat(level, btree_id, version,
+                           JSET_BIG_ENDIAN(jset), write,
+                           NULL, bkey_to_packed(k));
 
-       invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type);
+       invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
+                                   __btree_node_type(level, btree_id));
        if (invalid) {
                char buf[160];
 
@@ -194,9 +193,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
                return 0;
        }
 
-       if (write &&
-           version < bcachefs_metadata_version_bkey_renumber)
-               bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
+       if (write)
+               bch2_bkey_compat(level, btree_id, version,
+                           JSET_BIG_ENDIAN(jset), write,
+                           NULL, bkey_to_packed(k));
 fsck_err:
        return ret;
 }
@@ -209,10 +209,10 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c,
        struct bkey_i *k;
 
        vstruct_for_each(entry, k) {
-               int ret = journal_validate_key(c, jset, entry, k,
-                               __btree_node_type(entry->level,
-                                                 entry->btree_id),
-                               "key", write);
+               int ret = journal_validate_key(c, jset, entry,
+                                              entry->level,
+                                              entry->btree_id,
+                                              k, "key", write);
                if (ret)
                        return ret;
        }
@@ -242,7 +242,7 @@ static int journal_entry_validate_btree_root(struct bch_fs *c,
                return 0;
        }
 
-       return journal_validate_key(c, jset, entry, k, BKEY_TYPE_BTREE,
+       return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
                                    "btree root", write);
 fsck_err:
        return ret;
@@ -1018,8 +1018,7 @@ void bch2_journal_write(struct closure *cl)
        if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
                validate_before_checksum = true;
 
-       if (le32_to_cpu(jset->version) <
-           bcachefs_metadata_version_bkey_renumber)
+       if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max)
                validate_before_checksum = true;
 
        if (validate_before_checksum &&
index 5f27b8402d716322c065a08b28065cae45f4f09b..8cfae639e23fbf8e14430d595bd57f45d9833bc6 100644 (file)
@@ -820,7 +820,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
        }
 
-       if (!c->sb.clean || c->opts.fsck) {
+       if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
                struct jset *j;
 
                ret = bch2_journal_read(c, &c->journal_entries);