]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Merge pull request #26 from unquietwiki/master
authorkoverstreet <kent.overstreet@gmail.com>
Wed, 26 Feb 2020 18:22:30 +0000 (13:22 -0500)
committerGitHub <noreply@github.com>
Wed, 26 Feb 2020 18:22:30 +0000 (13:22 -0500)
RPM packaging support

45 files changed:
.bcachefs_revision
include/linux/kobject.h
libbcachefs/alloc_background.c
libbcachefs/bcachefs_format.h
libbcachefs/bkey.h
libbcachefs/bkey_methods.c
libbcachefs/bkey_methods.h
libbcachefs/bkey_sort.c
libbcachefs/bset.c
libbcachefs/bset.h
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/checksum.c
libbcachefs/checksum.h
libbcachefs/compress.c
libbcachefs/ec.c
libbcachefs/ec.h
libbcachefs/extents.c
libbcachefs/extents.h
libbcachefs/fs-common.c
libbcachefs/inode.c
libbcachefs/io.c
libbcachefs/io.h
libbcachefs/io_types.h
libbcachefs/journal_io.c
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/rebalance.c
libbcachefs/recovery.c
libbcachefs/recovery.h
libbcachefs/reflink.c
libbcachefs/reflink.h
libbcachefs/replicas.c
libbcachefs/str_hash.h
libbcachefs/sysfs.c

index fba12e904c03a96659e77522a8284407717b7cc8..337949f29b9a3e0e4ac514535163a2d329c67c24 100644 (file)
@@ -1 +1 @@
-d763e8ab17ff1f5bdd9c5474ac15eb8791d31582
+9017d858547faedabdef6ca21317e317791526bd
index bba5c638631d9dac0ee657fa7b6dd7bd5407d345..d5241786723b82af17ecd12d63573c985ae0ff2b 100644 (file)
@@ -121,7 +121,10 @@ static inline struct kobject *kobject_get(struct kobject *kobj)
        return kobj;
 }
 
-static inline void kset_unregister(struct kset *kset) {}
+static inline void kset_unregister(struct kset *kset)
+{
+       kfree(kset);
+}
 
 #define kset_create_and_add(_name, _u, _parent)                                \
        ((struct kset *) kzalloc(sizeof(struct kset), GFP_KERNEL))
index c57df50168e05f91045371c82109bacc7f953571..b2d1b8f9c9b8e86d477fe2b219ad77aa854d3ff8 100644 (file)
@@ -211,33 +211,31 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
 int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_and_journal_iter iter;
        struct bkey_s_c k;
        struct bch_dev *ca;
-       struct journal_key *j;
        unsigned i;
-       int ret;
+       int ret = 0;
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret)
+       bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
+                                        BTREE_ID_ALLOC, POS_MIN);
+
+       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                bch2_mark_key(c, k, 0, 0, NULL, 0,
                              BTREE_TRIGGER_ALLOC_READ|
                              BTREE_TRIGGER_NOATOMIC);
 
+               bch2_btree_and_journal_iter_advance(&iter);
+       }
+
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret) {
                bch_err(c, "error reading alloc info: %i", ret);
                return ret;
        }
 
-       for_each_journal_key(*journal_keys, j)
-               if (j->btree_id == BTREE_ID_ALLOC)
-                       bch2_mark_key(c, bkey_i_to_s_c(j->k),
-                                     0, 0, NULL, 0,
-                                     BTREE_TRIGGER_ALLOC_READ|
-                                     BTREE_TRIGGER_NOATOMIC);
-
        percpu_down_write(&c->mark_lock);
        bch2_dev_usage_from_buckets(c);
        percpu_up_write(&c->mark_lock);
index f6141fde830b9c5d43d91ae291ef617c8135601d..bb251fcb4bb0a326d90105bbee7b5bf0db868eb8 100644 (file)
@@ -339,7 +339,8 @@ static inline void bkey_init(struct bkey *k)
        x(stripe,               14)                     \
        x(reflink_p,            15)                     \
        x(reflink_v,            16)                     \
-       x(inline_data,          17)
+       x(inline_data,          17)                     \
+       x(btree_ptr_v2,         18)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -595,6 +596,19 @@ struct bch_btree_ptr {
        __u64                   _data[0];
 } __attribute__((packed, aligned(8)));
 
+struct bch_btree_ptr_v2 {
+       struct bch_val          v;
+
+       __u64                   mem_ptr;
+       __le64                  seq;
+       __le16                  sectors_written;
+       /* In case we ever decide to do variable size btree nodes: */
+       __le16                  sectors;
+       struct bpos             min_key;
+       struct bch_extent_ptr   start[0];
+       __u64                   _data[0];
+} __attribute__((packed, aligned(8)));
+
 struct bch_extent {
        struct bch_val          v;
 
@@ -626,7 +640,8 @@ struct bch_reservation {
 
 /* Btree pointers don't carry around checksums: */
 #define BKEY_BTREE_PTR_VAL_U64s_MAX                            \
-       ((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX)
+       ((sizeof(struct bch_btree_ptr_v2) +                     \
+         sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
 #define BKEY_BTREE_PTR_U64s_MAX                                        \
        (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
 
@@ -1294,7 +1309,9 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3],  0, 16);
        x(reflink,                      6)      \
        x(new_siphash,                  7)      \
        x(inline_data,                  8)      \
-       x(new_extent_overwrite,         9)
+       x(new_extent_overwrite,         9)      \
+       x(incompressible,               10)     \
+       x(btree_ptr_v2,                 11)
 
 enum bch_sb_feature {
 #define x(f, n) BCH_FEATURE_##f,
@@ -1374,11 +1391,12 @@ enum bch_csum_opts {
 };
 
 #define BCH_COMPRESSION_TYPES()                \
-       x(none,         0)              \
-       x(lz4_old,      1)              \
-       x(gzip,         2)              \
-       x(lz4,          3)              \
-       x(zstd,         4)
+       x(none,                 0)      \
+       x(lz4_old,              1)      \
+       x(gzip,                 2)      \
+       x(lz4,                  3)      \
+       x(zstd,                 4)      \
+       x(incompressible,       5)
 
 enum bch_compression_type {
 #define x(t, n) BCH_COMPRESSION_TYPE_##t,
index f2d5f3009b210e440b639d27b3abd996720e4869..9106bea9ac067d2a7eb4c4908ce461f037d5e3fe 100644 (file)
@@ -565,6 +565,7 @@ BKEY_VAL_ACCESSORS(stripe);
 BKEY_VAL_ACCESSORS(reflink_p);
 BKEY_VAL_ACCESSORS(reflink_v);
 BKEY_VAL_ACCESSORS(inline_data);
+BKEY_VAL_ACCESSORS(btree_ptr_v2);
 
 /* byte order helpers */
 
index 320e17d108d2660b93ecab1a119e90113ff82f1a..c064cf468a9b9a2d0570c95ce210e0f3afed75b5 100644 (file)
@@ -202,15 +202,12 @@ void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
        bch2_val_to_text(out, c, k);
 }
 
-void bch2_bkey_swab(const struct bkey_format *f,
-                   struct bkey_packed *k)
+void bch2_bkey_swab_val(struct bkey_s k)
 {
-       const struct bkey_ops *ops = &bch2_bkey_ops[k->type];
-
-       bch2_bkey_swab_key(f, k);
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
 
        if (ops->swab)
-               ops->swab(f, k);
+               ops->swab(k);
 }
 
 bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
index 8568b65c1ed2e9ac1a15f8a2f437961f678698ef..d36468b752237eb8935a4dae7774cc6c54ac6bee 100644 (file)
@@ -29,7 +29,7 @@ struct bkey_ops {
        void            (*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
        void            (*val_to_text)(struct printbuf *, struct bch_fs *,
                                       struct bkey_s_c);
-       void            (*swab)(const struct bkey_format *, struct bkey_packed *);
+       void            (*swab)(struct bkey_s);
        bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
        enum merge_result (*key_merge)(struct bch_fs *,
                                       struct bkey_s, struct bkey_s);
@@ -51,7 +51,7 @@ void bch2_val_to_text(struct printbuf *, struct bch_fs *,
 void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
                           struct bkey_s_c);
 
-void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *);
+void bch2_bkey_swab_val(struct bkey_s);
 
 bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
 
index 18f842012f05ab1afe57a630c5c2c898c281669a..7cbb57042af10de4cf8fc0241e77d1bcdda4b06b 100644 (file)
@@ -210,28 +210,38 @@ bch2_sort_repack_merge(struct bch_fs *c,
                       bool filter_whiteouts)
 {
        struct bkey_packed *prev = NULL, *k_packed;
-       struct bkey_s k;
+       struct bkey_on_stack k;
        struct btree_nr_keys nr;
-       struct bkey unpacked;
 
        memset(&nr, 0, sizeof(nr));
+       bkey_on_stack_init(&k);
 
        while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
                if (filter_whiteouts && bkey_whiteout(k_packed))
                        continue;
 
-               k = __bkey_disassemble(src, k_packed, &unpacked);
+               /*
+                * NOTE:
+                * bch2_bkey_normalize may modify the key we pass it (dropping
+                * stale pointers) and we don't have a write lock on the src
+                * node; we have to make a copy of the entire key before calling
+                * normalize
+                */
+               bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
+               bch2_bkey_unpack(src, k.k, k_packed);
 
                if (filter_whiteouts &&
-                   bch2_bkey_normalize(c, k))
+                   bch2_bkey_normalize(c, bkey_i_to_s(k.k)))
                        continue;
 
-               extent_sort_append(c, out_f, &nr, vstruct_last(dst), &prev, k);
+               extent_sort_append(c, out_f, &nr, vstruct_last(dst),
+                                  &prev, bkey_i_to_s(k.k));
        }
 
        extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
 
        dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
+       bkey_on_stack_exit(&k, c);
        return nr;
 }
 
@@ -254,23 +264,18 @@ unsigned bch2_sort_keys(struct bkey_packed *dst,
        sort_iter_sort(iter, sort_keys_cmp);
 
        while ((in = sort_iter_next(iter, sort_keys_cmp))) {
+               bool needs_whiteout = false;
+
                if (bkey_whiteout(in) &&
                    (filter_whiteouts || !in->needs_whiteout))
                        continue;
 
-               if (bkey_whiteout(in) &&
-                   (next = sort_iter_peek(iter)) &&
-                   !bkey_cmp_packed(iter->b, in, next)) {
+               while ((next = sort_iter_peek(iter)) &&
+                      !bkey_cmp_packed(iter->b, in, next)) {
                        BUG_ON(in->needs_whiteout &&
                               next->needs_whiteout);
-                       /*
-                        * XXX racy, called with read lock from write path
-                        *
-                        * leads to spurious BUG_ON() in bkey_unpack_key() in
-                        * debug mode
-                        */
-                       next->needs_whiteout |= in->needs_whiteout;
-                       continue;
+                       needs_whiteout |= in->needs_whiteout;
+                       in = sort_iter_next(iter, sort_keys_cmp);
                }
 
                if (bkey_whiteout(in)) {
@@ -279,6 +284,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst,
                } else {
                        bkey_copy(out, in);
                }
+               out->needs_whiteout |= needs_whiteout;
                out = bkey_next(out);
        }
 
index d6792d276ecbbd0e584e5b1d635b3f4d2ead712b..abf87ebd4906bda0b6354911ab7ba2d281617cec 100644 (file)
@@ -1206,7 +1206,8 @@ void bch2_bset_insert(struct btree *b,
        memcpy_u64s(bkeyp_val(f, where), &insert->v,
                    bkeyp_val_u64s(f, src));
 
-       bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
+       if (src->u64s != clobber_u64s)
+               bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
 
        bch2_verify_btree_nr_keys(b);
 }
@@ -1397,21 +1398,21 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
 {
        if (lossy_packed_search)
                while (m != btree_bkey_last(b, t) &&
-                      bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search,
-                                             m) > 0)
+                      bkey_iter_cmp_p_or_unp(b, m,
+                                       lossy_packed_search, search) < 0)
                        m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
 
        if (!packed_search)
                while (m != btree_bkey_last(b, t) &&
-                      bkey_iter_pos_cmp(b, search, m) > 0)
+                      bkey_iter_pos_cmp(b, m, search) < 0)
                        m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
 
        if (btree_keys_expensive_checks(b)) {
                struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
 
                BUG_ON(prev &&
-                      bkey_iter_cmp_p_or_unp(b, search, packed_search,
-                                             prev) <= 0);
+                      bkey_iter_cmp_p_or_unp(b, prev,
+                                       packed_search, search) >= 0);
        }
 
        return m;
index 2653a74b3b14ad5741d0ab15af4ef685c2c4455d..7338ccbc8cbd5ba5e32a5a837b9f1df84b61a8ce 100644 (file)
@@ -199,12 +199,6 @@ __bkey_unpack_key_format_checked(const struct btree *b,
                if (btree_keys_expensive_checks(b)) {
                        struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
 
-                       /*
-                        * hack around a harmless race when compacting whiteouts
-                        * for a write:
-                        */
-                       dst2.needs_whiteout = dst->needs_whiteout;
-
                        BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
                }
        }
@@ -360,7 +354,7 @@ void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned);
 static inline int bkey_cmp_p_or_unp(const struct btree *b,
                                    const struct bkey_packed *l,
                                    const struct bkey_packed *r_packed,
-                                   struct bpos *r)
+                                   const struct bpos *r)
 {
        EBUG_ON(r_packed && !bkey_packed(r_packed));
 
@@ -449,7 +443,7 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
  * XXX: only need to compare pointers for keys that are both within a
  * btree_node_iterator - we need to break ties for prev() to work correctly
  */
-static inline int bkey_iter_cmp(struct btree *b,
+static inline int bkey_iter_cmp(const struct btree *b,
                                const struct bkey_packed *l,
                                const struct bkey_packed *r)
 {
@@ -458,7 +452,7 @@ static inline int bkey_iter_cmp(struct btree *b,
                ?: cmp_int(l, r);
 }
 
-static inline int btree_node_iter_cmp(struct btree *b,
+static inline int btree_node_iter_cmp(const struct btree *b,
                                      struct btree_node_iter_set l,
                                      struct btree_node_iter_set r)
 {
@@ -467,22 +461,22 @@ static inline int btree_node_iter_cmp(struct btree *b,
                        __btree_node_offset_to_key(b, r.k));
 }
 
-/* These assume l (the search key) is not a deleted key: */
-static inline int bkey_iter_pos_cmp(struct btree *b,
-                       struct bpos *l,
-                       const struct bkey_packed *r)
+/* These assume r (the search key) is not a deleted key: */
+static inline int bkey_iter_pos_cmp(const struct btree *b,
+                       const struct bkey_packed *l,
+                       const struct bpos *r)
 {
-       return -bkey_cmp_left_packed(b, r, l)
-               ?: (int) bkey_deleted(r);
+       return bkey_cmp_left_packed(b, l, r)
+               ?: -((int) bkey_deleted(l));
 }
 
-static inline int bkey_iter_cmp_p_or_unp(struct btree *b,
-                       struct bpos *l,
-                       const struct bkey_packed *l_packed,
-                       const struct bkey_packed *r)
+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
+                                   const struct bkey_packed *l,
+                                   const struct bkey_packed *r_packed,
+                                   const struct bpos *r)
 {
-       return -bkey_cmp_p_or_unp(b, r, l_packed, l)
-               ?: (int) bkey_deleted(r);
+       return bkey_cmp_p_or_unp(b, l, r_packed, r)
+               ?: -((int) bkey_deleted(l));
 }
 
 static inline struct bkey_packed *
index 0c737f35f430aa21af52a5832936279da97be716..2c9c3c18defecb5445baa1762519c90ae6346be2 100644 (file)
@@ -62,13 +62,13 @@ static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
        const struct btree *b = obj;
        const u64 *v = arg->key;
 
-       return PTR_HASH(&b->key) == *v ? 0 : 1;
+       return b->hash_val == *v ? 0 : 1;
 }
 
 static const struct rhashtable_params bch_btree_cache_params = {
        .head_offset    = offsetof(struct btree, hash),
-       .key_offset     = offsetof(struct btree, key.v),
-       .key_len        = sizeof(struct bch_extent_ptr),
+       .key_offset     = offsetof(struct btree, hash_val),
+       .key_len        = sizeof(u64),
        .obj_cmpfn      = bch2_btree_cache_cmp_fn,
 };
 
@@ -114,11 +114,14 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
        rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
 
        /* Cause future lookups for this node to fail: */
-       PTR_HASH(&b->key) = 0;
+       b->hash_val = 0;
 }
 
 int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
 {
+       BUG_ON(b->hash_val);
+       b->hash_val = btree_ptr_hash_val(&b->key);
+
        return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
                                             bch_btree_cache_params);
 }
@@ -144,8 +147,9 @@ __flatten
 static inline struct btree *btree_cache_find(struct btree_cache *bc,
                                     const struct bkey_i *k)
 {
-       return rhashtable_lookup_fast(&bc->table, &PTR_HASH(k),
-                                     bch_btree_cache_params);
+       u64 v = btree_ptr_hash_val(k);
+
+       return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
 }
 
 /*
@@ -199,7 +203,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
                btree_node_wait_on_io(b);
        }
 out:
-       if (PTR_HASH(&b->key) && !ret)
+       if (b->hash_val && !ret)
                trace_btree_node_reap(c, b);
        return ret;
 out_unlock:
@@ -607,7 +611,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
                /* raced with another fill: */
 
                /* mark as unhashed... */
-               PTR_HASH(&b->key) = 0;
+               b->hash_val = 0;
 
                mutex_lock(&bc->lock);
                list_add(&b->list, &bc->freeable);
@@ -710,7 +714,7 @@ retry:
                 * free it:
                 *
                 * To guard against this, btree nodes are evicted from the cache
-                * when they're freed - and PTR_HASH() is zeroed out, which we
+                * when they're freed - and b->hash_val is zeroed out, which we
                 * check for after we lock the node.
                 *
                 * Then, bch2_btree_node_relock() on the parent will fail - because
@@ -723,7 +727,7 @@ retry:
                if (!btree_node_lock(b, k->k.p, level, iter, lock_type))
                        return ERR_PTR(-EINTR);
 
-               if (unlikely(PTR_HASH(&b->key) != PTR_HASH(k) ||
+               if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
                             b->level != level ||
                             race_fault())) {
                        six_unlock_type(&b->lock, lock_type);
index c5873c58439cf8d39927cb699386592dd2642a3b..d27acd87e4b8c92241b96482b6cf733e8360a639 100644 (file)
@@ -35,13 +35,22 @@ void bch2_fs_btree_cache_exit(struct bch_fs *);
 int bch2_fs_btree_cache_init(struct bch_fs *);
 void bch2_fs_btree_cache_init_early(struct btree_cache *);
 
-#define PTR_HASH(_k)   *((u64 *) &bkey_i_to_btree_ptr_c(_k)->v)
+static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
+{
+       switch (k->k.type) {
+       case KEY_TYPE_btree_ptr:
+               return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
+       case KEY_TYPE_btree_ptr_v2:
+               return bkey_i_to_btree_ptr_v2_c(k)->v.seq;
+       default:
+               return 0;
+       }
+}
 
 /* is btree node in hash table? */
 static inline bool btree_node_hashed(struct btree *b)
 {
-       return b->key.k.type == KEY_TYPE_btree_ptr &&
-               PTR_HASH(&b->key);
+       return b->hash_val != 0;
 }
 
 #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos)               \
@@ -75,7 +84,7 @@ static inline unsigned btree_blocks(struct bch_fs *c)
        return c->opts.btree_node_size >> c->block_bits;
 }
 
-#define BTREE_SPLIT_THRESHOLD(c)               (btree_blocks(c) * 3 / 4)
+#define BTREE_SPLIT_THRESHOLD(c)               (btree_max_u64s(c) * 3 / 4)
 
 #define BTREE_FOREGROUND_MERGE_THRESHOLD(c)    (btree_max_u64s(c) * 1 / 3)
 #define BTREE_FOREGROUND_MERGE_HYSTERESIS(c)                   \
index 05879b66d6af03be86ed2ab7fa8483e8be5c5d35..3705c41f515121525059c87445174ba53b8be49f 100644 (file)
@@ -124,7 +124,11 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
                BUG_ON(journal_seq_verify(c) &&
                       k.k->version.lo > journal_cur_seq(&c->journal));
 
-               if (k.k->version.lo > atomic64_read(&c->key_version))
+               /* XXX change to fsck check */
+               if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
+                               "key version number higher than recorded: %llu > %llu",
+                               k.k->version.lo,
+                               atomic64_read(&c->key_version)))
                        atomic64_set(&c->key_version, k.k->version.lo);
 
                if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
index 5f1c3183fa857ce8e37bad32828f2825fac63524..84fbceea5027f4b1f29ad9927792f05d055bbbaf 100644 (file)
@@ -735,6 +735,15 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                        bch2_bpos_swab(&b->data->max_key);
                }
 
+               if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+                       struct bch_btree_ptr_v2 *bp =
+                               &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+                       btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
+                                    BTREE_ERR_MUST_RETRY, c, b, NULL,
+                                    "incorrect min_key");
+               }
+
                btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p),
                             BTREE_ERR_MUST_RETRY, c, b, i,
                             "incorrect max key");
@@ -784,7 +793,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
 
        for (k = i->start;
             k != vstruct_last(i);) {
-               struct bkey_s_c u;
+               struct bkey_s u;
                struct bkey tmp;
                const char *invalid;
 
@@ -805,21 +814,24 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                }
 
                if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
-                       bch2_bkey_swab(&b->format, k);
+                       bch2_bkey_swab_key(&b->format, k);
 
                if (!write &&
                    version < bcachefs_metadata_version_bkey_renumber)
                        bch2_bkey_renumber(btree_node_type(b), k, write);
 
-               u = bkey_disassemble(b, k, &tmp);
+               u = __bkey_disassemble(b, k, &tmp);
 
-               invalid = __bch2_bkey_invalid(c, u, btree_node_type(b)) ?:
-                       bch2_bkey_in_btree_node(b, u) ?:
-                       (write ? bch2_bkey_val_invalid(c, u) : NULL);
+               if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
+                       bch2_bkey_swab_val(u);
+
+               invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?:
+                       bch2_bkey_in_btree_node(b, u.s_c) ?:
+                       (write ? bch2_bkey_val_invalid(c, u.s_c) : NULL);
                if (invalid) {
                        char buf[160];
 
-                       bch2_bkey_val_to_text(&PBUF(buf), c, u);
+                       bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
                        btree_err(BTREE_ERR_FIXABLE, c, b, i,
                                  "invalid bkey:\n%s\n%s", invalid, buf);
 
@@ -895,6 +907,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
                     BTREE_ERR_MUST_RETRY, c, b, NULL,
                     "bad btree header");
 
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               struct bch_btree_ptr_v2 *bp =
+                       &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+               btree_err_on(b->data->keys.seq != bp->seq,
+                            BTREE_ERR_MUST_RETRY, c, b, NULL,
+                            "got wrong btree node");
+       }
+
        while (b->written < c->opts.btree_node_size) {
                unsigned sectors, whiteout_u64s = 0;
                struct nonce nonce;
@@ -1002,15 +1023,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
        i = &b->data->keys;
        for (k = i->start; k != vstruct_last(i);) {
                struct bkey tmp;
-               struct bkey_s_c u = bkey_disassemble(b, k, &tmp);
-               const char *invalid = bch2_bkey_val_invalid(c, u);
+               struct bkey_s u = __bkey_disassemble(b, k, &tmp);
+               const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
 
                if (invalid ||
                    (inject_invalid_keys(c) &&
                     !bversion_cmp(u.k->version, MAX_VERSION))) {
                        char buf[160];
 
-                       bch2_bkey_val_to_text(&PBUF(buf), c, u);
+                       bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
                        btree_err(BTREE_ERR_FIXABLE, c, b, i,
                                  "invalid bkey %s: %s", buf, invalid);
 
@@ -1023,6 +1044,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
                        continue;
                }
 
+               if (u.k->type == KEY_TYPE_btree_ptr_v2) {
+                       struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
+
+                       bp.v->mem_ptr = 0;
+               }
+
                k = bkey_next_skip_noops(k, vstruct_last(i));
        }
 
@@ -1252,8 +1279,6 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
 {
        struct btree *b         = wbio->wbio.bio.bi_private;
        __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-       struct bkey_i_btree_ptr *new_key;
-       struct bkey_s_btree_ptr bp;
        struct bch_extent_ptr *ptr;
        struct btree_trans trans;
        struct btree_iter *iter;
@@ -1279,16 +1304,13 @@ retry:
 
        bkey_copy(&tmp.k, &b->key);
 
-       new_key = bkey_i_to_btree_ptr(&tmp.k);
-       bp = btree_ptr_i_to_s(new_key);
-
        bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
                bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
 
-       if (!bch2_bkey_nr_ptrs(bp.s_c))
+       if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k)))
                goto err;
 
-       ret = bch2_btree_node_update_key(c, iter, b, new_key);
+       ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
        if (ret == -EINTR)
                goto retry;
        if (ret)
index 988c550c85ebb59e9fbfe117914016d3b91835b7..c365a2aff4464b98f3970ecf1483c78ec0f041e1 100644 (file)
 #include <linux/prefetch.h>
 #include <trace/events/bcachefs.h>
 
-static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *,
-                                                   struct btree_iter_level *,
-                                                   struct bkey *);
-
 #define BTREE_ITER_NO_NODE_GET_LOCKS   ((struct btree *) 1)
 #define BTREE_ITER_NO_NODE_DROP                ((struct btree *) 2)
 #define BTREE_ITER_NO_NODE_LOCK_ROOT   ((struct btree *) 3)
@@ -29,37 +25,14 @@ static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
                (unsigned long) iter->l[l].b >= 128;
 }
 
-/* Returns < 0 if @k is before iter pos, > 0 if @k is after */
-static inline int __btree_iter_pos_cmp(struct btree_iter *iter,
-                                      const struct btree *b,
-                                      const struct bkey_packed *k,
-                                      bool interior_node)
+static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
 {
-       int cmp = bkey_cmp_left_packed(b, k, &iter->pos);
-
-       if (cmp)
-               return cmp;
-       if (bkey_deleted(k))
-               return -1;
+       struct bpos pos = iter->pos;
 
-       /*
-        * Normally, for extents we want the first key strictly greater than
-        * the iterator position - with the exception that for interior nodes,
-        * we don't want to advance past the last key if the iterator position
-        * is POS_MAX:
-        */
-       if (iter->flags & BTREE_ITER_IS_EXTENTS &&
-           (!interior_node ||
-            bkey_cmp_left_packed_byval(b, k, POS_MAX)))
-               return -1;
-       return 1;
-}
-
-static inline int btree_iter_pos_cmp(struct btree_iter *iter,
-                                    const struct btree *b,
-                                    const struct bkey_packed *k)
-{
-       return __btree_iter_pos_cmp(iter, b, k, b->level != 0);
+       if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+           bkey_cmp(pos, POS_MAX))
+               pos = bkey_successor(pos);
+       return pos;
 }
 
 /* Btree node locking: */
@@ -415,6 +388,7 @@ void bch2_trans_unlock(struct btree_trans *trans)
 static void __bch2_btree_iter_verify(struct btree_iter *iter,
                                     struct btree *b)
 {
+       struct bpos pos = btree_iter_search_key(iter);
        struct btree_iter_level *l = &iter->l[b->level];
        struct btree_node_iter tmp = l->iter;
        struct bkey_packed *k;
@@ -434,20 +408,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
         * For extents, the iterator may have skipped past deleted keys (but not
         * whiteouts)
         */
-       k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
+       k = b->level || btree_node_type_is_extents(iter->btree_id)
                ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_discard)
                : bch2_btree_node_iter_prev_all(&tmp, b);
-       if (k && btree_iter_pos_cmp(iter, b, k) > 0) {
+       if (k && bkey_iter_pos_cmp(b, k, &pos) >= 0) {
                char buf[100];
                struct bkey uk = bkey_unpack_key(b, k);
 
                bch2_bkey_to_text(&PBUF(buf), &uk);
-               panic("prev key should be before iter pos:\n%s\n%llu:%llu\n",
+               panic("iterator should be before prev key:\n%s\n%llu:%llu\n",
                      buf, iter->pos.inode, iter->pos.offset);
        }
 
        k = bch2_btree_node_iter_peek_all(&l->iter, b);
-       if (k && btree_iter_pos_cmp(iter, b, k) < 0) {
+       if (k && bkey_iter_pos_cmp(b, k, &pos) < 0) {
                char buf[100];
                struct bkey uk = bkey_unpack_key(b, k);
 
@@ -495,15 +469,19 @@ static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
 }
 
 static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
-                                                   struct btree *b,
-                                                   struct bkey_packed *where)
+                                              struct btree *b,
+                                              struct bkey_packed *where)
 {
-       struct btree_node_iter *node_iter = &iter->l[0].iter;
+       struct btree_iter_level *l = &iter->l[b->level];
+       struct bpos pos = btree_iter_search_key(iter);
 
-       if (where == bch2_btree_node_iter_peek_all(node_iter, b)) {
-               bkey_disassemble(b, where, &iter->k);
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-       }
+       if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
+               return;
+
+       if (bkey_iter_pos_cmp(l->b, where, &pos) < 0)
+               bch2_btree_node_iter_advance(&l->iter, l->b);
+
+       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
 }
 
 void bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
@@ -535,6 +513,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
        bool iter_current_key_modified =
                orig_iter_pos >= offset &&
                orig_iter_pos <= offset + clobber_u64s;
+       struct bpos iter_pos = btree_iter_search_key(iter);
 
        btree_node_iter_for_each(node_iter, set)
                if (set->end == old_end)
@@ -542,7 +521,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
 
        /* didn't find the bset in the iterator - might have to readd it: */
        if (new_u64s &&
-           btree_iter_pos_cmp(iter, b, where) > 0) {
+           bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
                bch2_btree_node_iter_push(node_iter, b, where, end);
                goto fixup_done;
        } else {
@@ -557,7 +536,7 @@ found:
                return;
 
        if (new_u64s &&
-           btree_iter_pos_cmp(iter, b, where) > 0) {
+           bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
                set->k = offset;
        } else if (set->k < offset + clobber_u64s) {
                set->k = offset + new_u64s;
@@ -584,7 +563,7 @@ fixup_done:
        if (!bch2_btree_node_iter_end(node_iter) &&
            iter_current_key_modified &&
            (b->level ||
-            (iter->flags & BTREE_ITER_IS_EXTENTS))) {
+            btree_node_type_is_extents(iter->btree_id))) {
                struct bset_tree *t;
                struct bkey_packed *k, *k2, *p;
 
@@ -702,11 +681,12 @@ static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
                                             struct btree_iter_level *l,
                                             int max_advance)
 {
+       struct bpos pos = btree_iter_search_key(iter);
        struct bkey_packed *k;
        int nr_advanced = 0;
 
        while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
-              btree_iter_pos_cmp(iter, l->b, k) < 0) {
+              bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
                if (max_advance > 0 && nr_advanced >= max_advance)
                        return false;
 
@@ -765,13 +745,7 @@ static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
 static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
                                             struct btree *b)
 {
-       int cmp = bkey_cmp(b->key.k.p, iter->pos);
-
-       if (!cmp &&
-           (iter->flags & BTREE_ITER_IS_EXTENTS) &&
-           bkey_cmp(b->key.k.p, POS_MAX))
-               cmp = -1;
-       return cmp < 0;
+       return bkey_cmp(b->key.k.p, btree_iter_search_key(iter)) < 0;
 }
 
 static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
@@ -785,16 +759,10 @@ static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
 static inline void __btree_iter_init(struct btree_iter *iter,
                                     unsigned level)
 {
+       struct bpos pos = btree_iter_search_key(iter);
        struct btree_iter_level *l = &iter->l[level];
 
-       bch2_btree_node_iter_init(&l->iter, l->b, &iter->pos);
-
-       if (iter->flags & BTREE_ITER_IS_EXTENTS)
-               btree_iter_advance_to_pos(iter, l, -1);
-
-       /* Skip to first non whiteout: */
-       if (level)
-               bch2_btree_node_iter_peek(&l->iter, l->b);
+       bch2_btree_node_iter_init(&l->iter, l->b, &pos);
 
        btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
 }
@@ -1303,6 +1271,29 @@ static unsigned btree_iter_pos_changed(struct btree_iter *iter, int cmp)
        return l;
 }
 
+void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos,
+                              bool strictly_greater)
+{
+       struct bpos old = btree_iter_search_key(iter);
+       unsigned l;
+       int cmp;
+
+       iter->flags &= ~BTREE_ITER_IS_EXTENTS;
+       iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0;
+       iter->pos = new_pos;
+
+       cmp = bkey_cmp(btree_iter_search_key(iter), old);
+       if (!cmp)
+               return;
+
+       l = btree_iter_pos_changed(iter, cmp);
+
+       if (l != iter->level)
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+       else
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+}
+
 void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
 {
        int cmp = bkey_cmp(new_pos, iter->pos);
@@ -1371,12 +1362,6 @@ static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
                if (debug_check_iterators(iter->trans->c)) {
                        struct bkey k = bkey_unpack_key(l->b, _k);
 
-                       /*
-                        * this flag is internal to the btree code,
-                        * we don't care if it doesn't match - if it's now set
-                        * it just means the key has been written out to disk:
-                        */
-                       k.needs_whiteout = iter->k.needs_whiteout;
                        BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
                }
 
@@ -1564,9 +1549,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
        int ret;
 
 recheck:
-       while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
-              bkey_cmp(k.k->p, iter->pos) <= 0)
-               bch2_btree_node_iter_advance(&l->iter, l->b);
+       btree_iter_advance_to_pos(iter, l, -1);
 
        /*
         * iterator is now at the correct position for inserting at iter->pos,
@@ -1575,9 +1558,27 @@ recheck:
         */
 
        node_iter = l->iter;
-       if (k.k && bkey_whiteout(k.k))
-               k = __btree_iter_unpack(iter, l, &iter->k,
-                       bch2_btree_node_iter_peek(&node_iter, l->b));
+       k = __btree_iter_unpack(iter, l, &iter->k,
+               bch2_btree_node_iter_peek(&node_iter, l->b));
+
+       if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
+               /*
+                * If there wasn't actually a hole, want the iterator to be
+                * pointed at the key we found:
+                *
+                * XXX: actually, we shouldn't be changing the iterator here:
+                * the iterator needs to be correct for inserting at iter->pos,
+                * and there may be whiteouts between iter->pos and what this
+                * iterator points at:
+                */
+               l->iter = node_iter;
+
+               EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0);
+               iter->uptodate = BTREE_ITER_UPTODATE;
+
+               __bch2_btree_iter_verify(iter, l->b);
+               return k;
+       }
 
        /*
         * If we got to the end of the node, check if we need to traverse to the
@@ -1592,24 +1593,6 @@ recheck:
                goto recheck;
        }
 
-       if (k.k &&
-           !bkey_whiteout(k.k) &&
-           bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
-               /*
-                * if we skipped forward to find the first non whiteout and
-                * there _wasn't_ actually a hole, we want the iterator to be
-                * pointed at the key we found:
-                */
-               l->iter = node_iter;
-
-               EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
-               EBUG_ON(bkey_deleted(k.k));
-               iter->uptodate = BTREE_ITER_UPTODATE;
-
-               __bch2_btree_iter_verify(iter, l->b);
-               return k;
-       }
-
        /* hole */
 
        /* holes can't span inode numbers: */
@@ -1749,8 +1732,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
        iter->nodes_locked              = 0;
        iter->nodes_intent_locked       = 0;
        for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-               iter->l[i].b            = NULL;
-       iter->l[iter->level].b          = BTREE_ITER_NO_NODE_INIT;
+               iter->l[i].b            = BTREE_ITER_NO_NODE_INIT;
 
        prefetch(c->btree_roots[btree_id].b);
 }
@@ -1769,7 +1751,12 @@ static inline void __bch2_trans_iter_free(struct btree_trans *trans,
 int bch2_trans_iter_put(struct btree_trans *trans,
                        struct btree_iter *iter)
 {
-       int ret = btree_iter_err(iter);
+       int ret;
+
+       if (IS_ERR_OR_NULL(iter))
+               return 0;
+
+       ret = btree_iter_err(iter);
 
        if (!(trans->iters_touched & (1ULL << iter->idx)) &&
            !(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT))
@@ -1782,6 +1769,9 @@ int bch2_trans_iter_put(struct btree_trans *trans,
 int bch2_trans_iter_free(struct btree_trans *trans,
                         struct btree_iter *iter)
 {
+       if (IS_ERR_OR_NULL(iter))
+               return 0;
+
        trans->iters_touched &= ~(1ULL << iter->idx);
 
        return bch2_trans_iter_put(trans, iter);
@@ -1979,7 +1969,8 @@ struct btree_iter *bch2_trans_get_iter(struct btree_trans *trans,
                __btree_trans_get_iter(trans, btree_id, pos, flags);
 
        if (!IS_ERR(iter))
-               bch2_btree_iter_set_pos(iter, pos);
+               __bch2_btree_iter_set_pos(iter, pos,
+                       btree_node_type_is_extents(btree_id));
        return iter;
 }
 
@@ -2021,8 +2012,8 @@ struct btree_iter *bch2_trans_copy_iter(struct btree_trans *trans,
 
        trans->iters_live |= 1ULL << iter->idx;
        /*
-        * Don't mark it as touched, we don't need to preserve this iter since
-        * it's cheap to copy it again:
+        * We don't need to preserve this iter since it's cheap to copy it
+        * again - this will cause trans_iter_put() to free it right away:
         */
        trans->iters_touched &= ~(1ULL << iter->idx);
 
index 96238092551136eb532278890d6ef746a91ca895..336901f9780b5665e9677893b162ccfe77e18c8f 100644 (file)
@@ -166,6 +166,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
 
 void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
+void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
 void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
 
 static inline struct bpos btree_type_successor(enum btree_id id,
index 86e52468c1aaf7c6a6da833f9a3cf684bfe7cd0f..20757d0c3e536d9b9b8cf1e6a3c39bc9d6d555a4 100644 (file)
@@ -64,9 +64,7 @@ struct btree_alloc {
 struct btree {
        /* Hottest entries first */
        struct rhash_head       hash;
-
-       /* Key/pointer for this btree node */
-       __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
+       u64                     hash_val;
 
        struct six_lock         lock;
 
@@ -133,6 +131,9 @@ struct btree {
 #ifdef CONFIG_BCACHEFS_DEBUG
        bool                    *expensive_debug_checks;
 #endif
+
+       /* Key/pointer for this btree node */
+       __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
 };
 
 struct btree_cache {
@@ -184,9 +185,25 @@ enum btree_iter_type {
 
 #define BTREE_ITER_TYPE                        ((1 << 2) - 1)
 
+/*
+ * Iterate over all possible positions, synthesizing deleted keys for holes:
+ */
 #define BTREE_ITER_SLOTS               (1 << 2)
+/*
+ * Indicates that intent locks should be taken on leaf nodes, because we expect
+ * to be doing updates:
+ */
 #define BTREE_ITER_INTENT              (1 << 3)
+/*
+ * Causes the btree iterator code to prefetch additional btree nodes from disk:
+ */
 #define BTREE_ITER_PREFETCH            (1 << 4)
+/*
+ * Indicates that this iterator should not be reused until transaction commit,
+ * either because a pending update references it or because the update depends
+ * on that particular key being locked (e.g. by the str_hash code, for hash
+ * table consistency)
+ */
 #define BTREE_ITER_KEEP_UNTIL_COMMIT   (1 << 5)
 /*
  * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
index 2c34bae6428101ef3cc8404b076a7da024413ba2..be4fe818eac8966fea24b82b022afbf1bbb26a48 100644 (file)
@@ -70,7 +70,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
 int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
                            __le64, unsigned);
 int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
-                              struct btree *, struct bkey_i_btree_ptr *);
+                              struct btree *, struct bkey_i *);
 
 int bch2_trans_update(struct btree_trans *, struct btree_iter *,
                      struct bkey_i *, enum btree_trigger_flags);
index d84bb6806683ae8ba04efcd3532d4cdc9934946f..3cb3a0fff9a81d454802ab6c31f348c739acee53 100644 (file)
@@ -332,7 +332,11 @@ retry:
                goto retry;
        }
 
-       bkey_btree_ptr_init(&tmp.k);
+       if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
+               bkey_btree_ptr_v2_init(&tmp.k);
+       else
+               bkey_btree_ptr_init(&tmp.k);
+
        bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
 
        bch2_open_bucket_get(c, wp, &ob);
@@ -354,14 +358,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
 {
        struct bch_fs *c = as->c;
        struct btree *b;
+       int ret;
 
        BUG_ON(level >= BTREE_MAX_DEPTH);
        BUG_ON(!as->reserve->nr);
 
        b = as->reserve->b[--as->reserve->nr];
 
-       BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id));
-
        set_btree_node_accessed(b);
        set_btree_node_dirty(b);
        set_btree_node_need_write(b);
@@ -372,7 +375,16 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
        b->data->flags = 0;
        SET_BTREE_NODE_ID(b->data, as->btree_id);
        SET_BTREE_NODE_LEVEL(b->data, level);
-       b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
+       b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
+
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
+
+               bp->v.mem_ptr           = 0;
+               bp->v.seq               = b->data->keys.seq;
+               bp->v.sectors_written   = 0;
+               bp->v.sectors           = cpu_to_le16(c->opts.btree_node_size);
+       }
 
        if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
                SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
@@ -385,10 +397,26 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
 
        btree_node_will_make_reachable(as, b);
 
+       ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
+       BUG_ON(ret);
+
        trace_btree_node_alloc(c, b);
        return b;
 }
 
+static void btree_set_min(struct btree *b, struct bpos pos)
+{
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
+               bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
+       b->data->min_key = pos;
+}
+
+static void btree_set_max(struct btree *b, struct bpos pos)
+{
+       b->key.k.p = pos;
+       b->data->max_key = pos;
+}
+
 struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
                                                  struct btree *b,
                                                  struct bkey_format format)
@@ -397,11 +425,12 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
 
        n = bch2_btree_node_alloc(as, b->level);
 
-       n->data->min_key        = b->data->min_key;
-       n->data->max_key        = b->data->max_key;
-       n->data->format         = format;
        SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
 
+       btree_set_min(n, b->data->min_key);
+       btree_set_max(n, b->data->max_key);
+
+       n->data->format         = format;
        btree_node_set_format(n, format);
 
        bch2_btree_sort_into(as->c, n, b);
@@ -431,10 +460,9 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
 {
        struct btree *b = bch2_btree_node_alloc(as, level);
 
-       b->data->min_key = POS_MIN;
-       b->data->max_key = POS_MAX;
+       btree_set_min(b, POS_MIN);
+       btree_set_max(b, POS_MAX);
        b->data->format = bch2_btree_calc_format(b);
-       b->key.k.p = POS_MAX;
 
        btree_node_set_format(b, b->data->format);
        bch2_btree_build_aux_trees(b);
@@ -1191,7 +1219,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
                                     BTREE_TRIGGER_GC);
 
        while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
-              bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
+              bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
                bch2_btree_node_iter_advance(node_iter, b);
 
        /*
@@ -1263,10 +1291,8 @@ static struct btree *__btree_split_node(struct btree_update *as,
 
        BUG_ON(!prev);
 
-       n1->key.k.p = bkey_unpack_pos(n1, prev);
-       n1->data->max_key = n1->key.k.p;
-       n2->data->min_key =
-               btree_type_successor(n1->btree_id, n1->key.k.p);
+       btree_set_max(n1, bkey_unpack_pos(n1, prev));
+       btree_set_min(n2, btree_type_successor(n1->btree_id, n1->key.k.p));
 
        set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
        set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
@@ -1385,7 +1411,7 @@ static void btree_split(struct btree_update *as, struct btree *b,
        if (keys)
                btree_split_insert_keys(as, n1, iter, keys);
 
-       if (vstruct_blocks(n1->data, c->block_bits) > BTREE_SPLIT_THRESHOLD(c)) {
+       if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
                trace_btree_split(c, b);
 
                n2 = __btree_split_node(as, n1, iter);
@@ -1749,10 +1775,9 @@ retry:
 
        n = bch2_btree_node_alloc(as, b->level);
 
-       n->data->min_key        = prev->data->min_key;
-       n->data->max_key        = next->data->max_key;
+       btree_set_min(n, prev->data->min_key);
+       btree_set_max(n, next->data->max_key);
        n->data->format         = new_f;
-       n->key.k.p              = next->key.k.p;
 
        btree_node_set_format(n, new_f);
 
@@ -1944,7 +1969,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                                         struct btree_update *as,
                                         struct btree_iter *iter,
                                         struct btree *b, struct btree *new_hash,
-                                        struct bkey_i_btree_ptr *new_key)
+                                        struct bkey_i *new_key)
 {
        struct btree *parent;
        int ret;
@@ -1989,20 +2014,20 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
         */
        ret = bch2_disk_reservation_add(c, &as->reserve->disk_res,
                        c->opts.btree_node_size *
-                       bch2_bkey_nr_ptrs(bkey_i_to_s_c(&new_key->k_i)),
+                       bch2_bkey_nr_ptrs(bkey_i_to_s_c(new_key)),
                        BCH_DISK_RESERVATION_NOFAIL);
        BUG_ON(ret);
 
        parent = btree_node_parent(iter, b);
        if (parent) {
                if (new_hash) {
-                       bkey_copy(&new_hash->key, &new_key->k_i);
+                       bkey_copy(&new_hash->key, new_key);
                        ret = bch2_btree_node_hash_insert(&c->btree_cache,
                                        new_hash, b->level, b->btree_id);
                        BUG_ON(ret);
                }
 
-               bch2_keylist_add(&as->parent_keys, &new_key->k_i);
+               bch2_keylist_add(&as->parent_keys, new_key);
                bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0);
 
                if (new_hash) {
@@ -2011,12 +2036,12 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 
                        bch2_btree_node_hash_remove(&c->btree_cache, b);
 
-                       bkey_copy(&b->key, &new_key->k_i);
+                       bkey_copy(&b->key, new_key);
                        ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
                        BUG_ON(ret);
                        mutex_unlock(&c->btree_cache.lock);
                } else {
-                       bkey_copy(&b->key, &new_key->k_i);
+                       bkey_copy(&b->key, new_key);
                }
        } else {
                struct bch_fs_usage *fs_usage;
@@ -2029,11 +2054,11 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                percpu_down_read(&c->mark_lock);
                fs_usage = bch2_fs_usage_scratch_get(c);
 
-               bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
+               bch2_mark_key_locked(c, bkey_i_to_s_c(new_key),
                              0, 0, fs_usage, 0,
                              BTREE_TRIGGER_INSERT);
                if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
-                       bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
+                       bch2_mark_key_locked(c, bkey_i_to_s_c(new_key),
                                             0, 0, NULL, 0,
                                             BTREE_TRIGGER_INSERT||
                                             BTREE_TRIGGER_GC);
@@ -2047,16 +2072,16 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                percpu_up_read(&c->mark_lock);
                mutex_unlock(&c->btree_interior_update_lock);
 
-               if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
+               if (btree_ptr_hash_val(new_key) != b->hash_val) {
                        mutex_lock(&c->btree_cache.lock);
                        bch2_btree_node_hash_remove(&c->btree_cache, b);
 
-                       bkey_copy(&b->key, &new_key->k_i);
+                       bkey_copy(&b->key, new_key);
                        ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
                        BUG_ON(ret);
                        mutex_unlock(&c->btree_cache.lock);
                } else {
-                       bkey_copy(&b->key, &new_key->k_i);
+                       bkey_copy(&b->key, new_key);
                }
 
                btree_update_updated_root(as);
@@ -2068,7 +2093,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 
 int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                               struct btree *b,
-                              struct bkey_i_btree_ptr *new_key)
+                              struct bkey_i *new_key)
 {
        struct btree *parent = btree_node_parent(iter, b);
        struct btree_update *as = NULL;
@@ -2091,8 +2116,11 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                }
        }
 
-       /* check PTR_HASH() after @b is locked by btree_iter_traverse(): */
-       if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
+       /*
+        * check btree_ptr_hash_val() after @b is locked by
+        * btree_iter_traverse():
+        */
+       if (btree_ptr_hash_val(new_key) != b->hash_val) {
                /* bch2_btree_reserve_get will unlock */
                ret = bch2_btree_cache_cannibalize_lock(c, &cl);
                if (ret) {
@@ -2134,7 +2162,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                        goto err;
        }
 
-       ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&new_key->k_i));
+       ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
        if (ret)
                goto err_free_update;
 
@@ -2193,14 +2221,14 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
 
        bkey_btree_ptr_init(&b->key);
        b->key.k.p = POS_MAX;
-       PTR_HASH(&b->key) = U64_MAX - id;
+       *((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id;
 
        bch2_bset_init_first(b, &b->data->keys);
        bch2_btree_build_aux_trees(b);
 
        b->data->flags = 0;
-       b->data->min_key = POS_MIN;
-       b->data->max_key = POS_MAX;
+       btree_set_min(b, POS_MIN);
+       btree_set_max(b, POS_MAX);
        b->data->format = bch2_btree_calc_format(b);
        btree_node_set_format(b, b->data->format);
 
index dfbe5dcd2b7759d3d999173f4c1127e8658bd3f1..a036c7dd1fc1aa20219e00a8a41787865ba9decd 100644 (file)
@@ -53,9 +53,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
                                struct btree_node_iter *node_iter,
                                struct bkey_i *insert)
 {
-       const struct bkey_format *f = &b->format;
        struct bkey_packed *k;
-       unsigned clobber_u64s;
+       unsigned clobber_u64s = 0, new_u64s = 0;
 
        EBUG_ON(btree_node_just_written(b));
        EBUG_ON(bset_written(b, btree_bset_last(b)));
@@ -64,40 +63,40 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
                bkey_cmp(insert->k.p, b->data->max_key) > 0);
 
        k = bch2_btree_node_iter_peek_all(node_iter, b);
-       if (k && !bkey_cmp_packed(b, k, &insert->k)) {
-               BUG_ON(bkey_whiteout(k));
-
-               if (!bkey_written(b, k) &&
-                   bkey_val_u64s(&insert->k) == bkeyp_val_u64s(f, k) &&
-                   !bkey_whiteout(&insert->k)) {
-                       k->type = insert->k.type;
-                       memcpy_u64s(bkeyp_val(f, k), &insert->v,
-                                   bkey_val_u64s(&insert->k));
-                       return true;
-               }
+       if (k && bkey_cmp_packed(b, k, &insert->k))
+               k = NULL;
 
-               btree_account_key_drop(b, k);
+       /* @k is the key being overwritten/deleted, if any: */
+       EBUG_ON(k && bkey_whiteout(k));
 
-               if (bkey_whiteout(&insert->k)) {
-                       unsigned clobber_u64s = k->u64s, new_u64s = k->u64s;
+       /* Deleting, but not found? nothing to do: */
+       if (bkey_whiteout(&insert->k) && !k)
+               return false;
 
-                       k->type = KEY_TYPE_deleted;
+       if (bkey_whiteout(&insert->k)) {
+               /* Deleting: */
+               btree_account_key_drop(b, k);
+               k->type = KEY_TYPE_deleted;
 
-                       if (k->needs_whiteout) {
-                               push_whiteout(iter->trans->c, b, k);
-                               k->needs_whiteout = false;
-                       }
+               if (k->needs_whiteout)
+                       push_whiteout(iter->trans->c, b, k);
+               k->needs_whiteout = false;
 
-                       if (k >= btree_bset_last(b)->start) {
-                               bch2_bset_delete(b, k, clobber_u64s);
-                               new_u64s = 0;
-                       }
+               if (k >= btree_bset_last(b)->start) {
+                       clobber_u64s = k->u64s;
+                       bch2_bset_delete(b, k, clobber_u64s);
+                       goto fix_iter;
+               } else {
+                       bch2_btree_iter_fix_key_modified(iter, b, k);
+               }
 
-                       bch2_btree_node_iter_fix(iter, b, node_iter, k,
-                                                clobber_u64s, new_u64s);
-                       return true;
+               return true;
+       }
 
-               }
+       if (k) {
+               /* Overwriting: */
+               btree_account_key_drop(b, k);
+               k->type = KEY_TYPE_deleted;
 
                insert->k.needs_whiteout = k->needs_whiteout;
                k->needs_whiteout = false;
@@ -105,31 +104,19 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
                if (k >= btree_bset_last(b)->start) {
                        clobber_u64s = k->u64s;
                        goto overwrite;
+               } else {
+                       bch2_btree_iter_fix_key_modified(iter, b, k);
                }
-
-               k->type = KEY_TYPE_deleted;
-               /*
-                * XXX: we should be able to do this without two calls to
-                * bch2_btree_node_iter_fix:
-                */
-               bch2_btree_node_iter_fix(iter, b, node_iter, k,
-                                        k->u64s, k->u64s);
-       } else {
-               /*
-                * Deleting, but the key to delete wasn't found - nothing to do:
-                */
-               if (bkey_whiteout(&insert->k))
-                       return false;
-
-               insert->k.needs_whiteout = false;
        }
 
        k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
-       clobber_u64s = 0;
 overwrite:
        bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
-       bch2_btree_node_iter_fix(iter, b, node_iter, k,
-                                clobber_u64s, k->u64s);
+       new_u64s = k->u64s;
+fix_iter:
+       if (clobber_u64s != new_u64s)
+               bch2_btree_node_iter_fix(iter, b, node_iter, k,
+                                        clobber_u64s, new_u64s);
        return true;
 }
 
@@ -707,10 +694,18 @@ int __bch2_trans_commit(struct btree_trans *trans)
                trans_trigger_run = false;
 
                trans_for_each_update(trans, i) {
-                       /* we know trans->nounlock won't be set here: */
-                       if (unlikely(!(i->iter->locks_want < 1
-                                      ? __bch2_btree_iter_upgrade(i->iter, 1)
-                                      : i->iter->uptodate <= BTREE_ITER_NEED_PEEK))) {
+                       if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK)) {
+                               trace_trans_restart_traverse(trans->ip);
+                               ret = -EINTR;
+                               goto out;
+                       }
+
+                       /*
+                        * We're not using bch2_btree_iter_upgrade here because
+                        * we know trans->nounlock can't be set:
+                        */
+                       if (unlikely(i->iter->locks_want < 1 &&
+                                    !__bch2_btree_iter_upgrade(i->iter, 1))) {
                                trace_trans_restart_upgrade(trans->ip);
                                ret = -EINTR;
                                goto out;
@@ -785,7 +780,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
 
        iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
 
-       if (iter->flags & BTREE_ITER_IS_EXTENTS) {
+       if (btree_node_type_is_extents(iter->btree_id)) {
                iter->pos_after_commit = k->k.p;
                iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
        }
@@ -903,7 +898,7 @@ retry:
                 */
                delete.k.p = iter->pos;
 
-               if (iter->flags & BTREE_ITER_IS_EXTENTS) {
+               if (btree_node_type_is_extents(iter->btree_id)) {
                        unsigned max_sectors =
                                KEY_SIZE_MAX & (~0 << trans->c->block_bits);
 
index 731b932558764c2c6500e39013dd7874c7372690..b9bc524f373bbbfcc1b73f68eaf12745e9fe9151 100644 (file)
@@ -1194,6 +1194,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
                ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
                sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
                        ?  c->opts.btree_node_size
                        : -c->opts.btree_node_size;
@@ -1729,6 +1730,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
 
        switch (k.k->type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
                sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
                        ?  c->opts.btree_node_size
                        : -c->opts.btree_node_size;
index 4717a1a6f56862ed211c0ed04c7a4c5b02a47302..c1cc63af9feb4d646e04823b14e2c34a3494ef23 100644 (file)
@@ -97,7 +97,8 @@ static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
 static inline enum bch_data_type ptr_data_type(const struct bkey *k,
                                               const struct bch_extent_ptr *ptr)
 {
-       if (k->type == KEY_TYPE_btree_ptr)
+       if (k->type == KEY_TYPE_btree_ptr ||
+           k->type == KEY_TYPE_btree_ptr_v2)
                return BCH_DATA_BTREE;
 
        return ptr->cached ? BCH_DATA_CACHED : BCH_DATA_USER;
index a5c947e8adf34a0a2e8f1ce9777675679a136d6e..6f1afa4a31199fb11ed7135c10d6c6803500a1d5 100644 (file)
@@ -325,7 +325,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
 
        BUG_ON(len_a + len_b > bio_sectors(bio));
        BUG_ON(crc_old.uncompressed_size != bio_sectors(bio));
-       BUG_ON(crc_old.compression_type);
+       BUG_ON(crc_is_compressed(crc_old));
        BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) !=
               bch2_csum_type_is_encryption(new_csum_type));
 
@@ -354,6 +354,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
                if (i->crc)
                        *i->crc = (struct bch_extent_crc_unpacked) {
                                .csum_type              = i->csum_type,
+                               .compression_type       = crc_old.compression_type,
                                .compressed_size        = i->len,
                                .uncompressed_size      = i->len,
                                .offset                 = 0,
index ca9e45906dc8813f707076670caabfdabcab4bd9..24dee8039d57bd37f652e85273d0089abd9a5c3d 100644 (file)
@@ -155,13 +155,16 @@ static inline struct nonce null_nonce(void)
 static inline struct nonce extent_nonce(struct bversion version,
                                        struct bch_extent_crc_unpacked crc)
 {
-       unsigned size = crc.compression_type ? crc.uncompressed_size : 0;
+       unsigned compression_type = crc_is_compressed(crc)
+               ? crc.compression_type
+               : 0;
+       unsigned size = compression_type ? crc.uncompressed_size : 0;
        struct nonce nonce = (struct nonce) {{
                [0] = cpu_to_le32(size << 22),
                [1] = cpu_to_le32(version.lo),
                [2] = cpu_to_le32(version.lo >> 32),
                [3] = cpu_to_le32(version.hi|
-                                 (crc.compression_type << 24))^BCH_NONCE_EXTENT,
+                                 (compression_type << 24))^BCH_NONCE_EXTENT,
        }};
 
        return nonce_add(nonce, crc.nonce << 9);
index bb557eda111b5c8a1820e5870adbc03a68f7e6bc..0959bb864dc55cd58474307f2a5fadff21fcdf4c 100644 (file)
@@ -434,7 +434,7 @@ out:
        bio_unmap_or_unbounce(c, dst_data);
        return compression_type;
 err:
-       compression_type = 0;
+       compression_type = BCH_COMPRESSION_TYPE_incompressible;
        goto out;
 }
 
index a49d0745c720c56c57b8e7f1e1fc3ec8b31acc3b..bd813ddfd23b68c06c32c828656e5e3dbe92eb53 100644 (file)
@@ -1280,9 +1280,8 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
 int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
 {
        struct btree_trans trans;
-       struct btree_iter *btree_iter;
-       struct journal_iter journal_iter;
-       struct bkey_s_c btree_k, journal_k;
+       struct btree_and_journal_iter iter;
+       struct bkey_s_c k;
        int ret;
 
        ret = bch2_fs_ec_start(c);
@@ -1291,38 +1290,16 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       btree_iter      = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN, 0);
-       journal_iter    = bch2_journal_iter_init(journal_keys, BTREE_ID_EC);
+       bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
+                                        BTREE_ID_EC, POS_MIN);
 
-       btree_k         = bch2_btree_iter_peek(btree_iter);
-       journal_k       = bch2_journal_iter_peek(&journal_iter);
 
-       while (1) {
-               bool btree;
-
-               if (btree_k.k && journal_k.k) {
-                       int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
-
-                       if (!cmp)
-                               btree_k = bch2_btree_iter_next(btree_iter);
-                       btree = cmp < 0;
-               } else if (btree_k.k) {
-                       btree = true;
-               } else if (journal_k.k) {
-                       btree = false;
-               } else {
-                       break;
-               }
-
-               bch2_mark_key(c, btree ? btree_k : journal_k,
-                             0, 0, NULL, 0,
+       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+               bch2_mark_key(c, k, 0, 0, NULL, 0,
                              BTREE_TRIGGER_ALLOC_READ|
                              BTREE_TRIGGER_NOATOMIC);
 
-               if (btree)
-                       btree_k = bch2_btree_iter_next(btree_iter);
-               else
-                       journal_k = bch2_journal_iter_next(&journal_iter);
+               bch2_btree_and_journal_iter_advance(&iter);
        }
 
        ret = bch2_trans_exit(&trans) ?: ret;
index 8d9fbfd19f66f3ca5069e9ef58a48f82fb227b7a..cf67abd4849007281415f65ca5c5eb20d3319379 100644 (file)
@@ -12,6 +12,7 @@ void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
 #define bch2_bkey_ops_stripe (struct bkey_ops) {       \
        .key_invalid    = bch2_stripe_invalid,          \
        .val_to_text    = bch2_stripe_to_text,          \
+       .swab           = bch2_ptr_swab,                \
 }
 
 static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
index c4b0b9e15a8fe0f5638f139d82ec77c9bf13cad2..10feb856e31498898f83b46029e55b56a3aa7a99 100644 (file)
@@ -337,7 +337,7 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
                        if (!bch2_checksum_mergeable(crc_l.csum_type))
                                return BCH_MERGE_NOMERGE;
 
-                       if (crc_l.compression_type)
+                       if (crc_is_compressed(crc_l))
                                return BCH_MERGE_NOMERGE;
 
                        if (crc_l.csum_type &&
@@ -448,7 +448,7 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
 static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
                                  struct bch_extent_crc_unpacked n)
 {
-       return !u.compression_type &&
+       return !crc_is_compressed(u) &&
                u.csum_type &&
                u.uncompressed_size > u.live_size &&
                bch2_csum_type_is_encryption(u.csum_type) ==
@@ -492,7 +492,7 @@ bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
        /* Find a checksum entry that covers only live data: */
        if (!n.csum_type) {
                bkey_for_each_crc(&k->k, ptrs, u, i)
-                       if (!u.compression_type &&
+                       if (!crc_is_compressed(u) &&
                            u.csum_type &&
                            u.live_size == u.uncompressed_size) {
                                n = u;
@@ -501,7 +501,7 @@ bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
                return false;
        }
 found:
-       BUG_ON(n.compression_type);
+       BUG_ON(crc_is_compressed(n));
        BUG_ON(n.offset);
        BUG_ON(n.live_size != k->k.size);
 
@@ -610,8 +610,7 @@ unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k)
                struct extent_ptr_decoded p;
 
                bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-                       ret += !p.ptr.cached &&
-                               p.crc.compression_type == BCH_COMPRESSION_TYPE_none;
+                       ret += !p.ptr.cached && !crc_is_compressed(p.crc);
        }
 
        return ret;
@@ -625,13 +624,24 @@ unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k)
        unsigned ret = 0;
 
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-               if (!p.ptr.cached &&
-                   p.crc.compression_type != BCH_COMPRESSION_TYPE_none)
+               if (!p.ptr.cached && crc_is_compressed(p.crc))
                        ret += p.crc.compressed_size;
 
        return ret;
 }
 
+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct bch_extent_crc_unpacked crc;
+
+       bkey_for_each_crc(k.k, ptrs, crc, entry)
+               if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
+                       return true;
+       return false;
+}
+
 bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
                                unsigned nr_replicas)
 {
@@ -739,6 +749,7 @@ void bch2_bkey_append_ptr(struct bkey_i *k,
 
        switch (k->k.type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
        case KEY_TYPE_extent:
                EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
 
@@ -1021,6 +1032,8 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
 
        if (k.k->type == KEY_TYPE_btree_ptr)
                size_ondisk = c->opts.btree_node_size;
+       if (k.k->type == KEY_TYPE_btree_ptr_v2)
+               size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
 
        bkey_extent_entry_for_each(ptrs, entry) {
                if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
@@ -1069,17 +1082,19 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
        return NULL;
 }
 
-void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
+void bch2_ptr_swab(struct bkey_s k)
 {
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
        union bch_extent_entry *entry;
-       u64 *d = (u64 *) bkeyp_val(f, k);
-       unsigned i;
+       u64 *d;
 
-       for (i = 0; i < bkeyp_val_u64s(f, k); i++)
-               d[i] = swab64(d[i]);
+       for (d =  (u64 *) ptrs.start;
+            d != (u64 *) ptrs.end;
+            d++)
+               *d = swab64(*d);
 
-       for (entry = (union bch_extent_entry *) d;
-            entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
+       for (entry = ptrs.start;
+            entry < ptrs.end;
             entry = extent_entry_next(entry)) {
                switch (extent_entry_type(entry)) {
                case BCH_EXTENT_ENTRY_ptr:
index 7c5a41e6d79db0a4f4c0e6a2b614d975fa51b019..70b7d70269dc14af02bd76c52100d42a4c9d6104 100644 (file)
@@ -175,6 +175,12 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 #undef common_fields
 }
 
+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
+{
+       return (crc.compression_type != BCH_COMPRESSION_TYPE_none &&
+               crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
+}
+
 /* bkey_ptrs: generically over any key type that has ptrs */
 
 struct bkey_ptrs_c {
@@ -219,6 +225,13 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
                        bkey_val_end(r),
                };
        }
+       case KEY_TYPE_btree_ptr_v2: {
+               struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
+               return (struct bkey_ptrs_c) {
+                       to_entry(&e.v->start[0]),
+                       to_entry(extent_entry_last(e))
+               };
+       }
        default:
                return (struct bkey_ptrs_c) { NULL, NULL };
        }
@@ -366,6 +379,13 @@ void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
        .swab           = bch2_ptr_swab,                        \
 }
 
+#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) {         \
+       .key_invalid    = bch2_btree_ptr_invalid,               \
+       .key_debugcheck = bch2_btree_ptr_debugcheck,            \
+       .val_to_text    = bch2_btree_ptr_to_text,               \
+       .swab           = bch2_ptr_swab,                        \
+}
+
 /* KEY_TYPE_extent: */
 
 const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
@@ -410,6 +430,7 @@ static inline bool bkey_extent_is_direct_data(const struct bkey *k)
 {
        switch (k->type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
        case KEY_TYPE_extent:
        case KEY_TYPE_reflink_v:
                return true;
@@ -483,6 +504,7 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
 unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
 unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
 unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
+bool bch2_bkey_is_incompressible(struct bkey_s_c);
 unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
 bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
 unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
@@ -525,7 +547,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
 const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
 
-void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
+void bch2_ptr_swab(struct bkey_s);
 
 /* Generic extent code: */
 
index 96f7bbe0a3eddf2483e9067805743f7e73519488..878419d409927c7d33902993d0f37a4b5357e362 100644 (file)
@@ -19,14 +19,15 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
                      struct posix_acl *acl)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *dir_iter;
+       struct btree_iter *dir_iter = NULL;
        struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
        u64 now = bch2_current_time(trans->c);
        int ret;
 
        dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-       if (IS_ERR(dir_iter))
-               return PTR_ERR(dir_iter);
+       ret = PTR_ERR_OR_ZERO(dir_iter);
+       if (ret)
+               goto err;
 
        bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
 
@@ -37,20 +38,20 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
                                BLOCKDEV_INODE_MAX, 0,
                                &c->unused_inode_hint);
        if (ret)
-               return ret;
+               goto err;
 
        if (default_acl) {
                ret = bch2_set_acl_trans(trans, new_inode, &hash,
                                         default_acl, ACL_TYPE_DEFAULT);
                if (ret)
-                       return ret;
+                       goto err;
        }
 
        if (acl) {
                ret = bch2_set_acl_trans(trans, new_inode, &hash,
                                         acl, ACL_TYPE_ACCESS);
                if (ret)
-                       return ret;
+                       goto err;
        }
 
        if (name) {
@@ -62,48 +63,55 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
 
                ret = bch2_inode_write(trans, dir_iter, dir_u);
                if (ret)
-                       return ret;
+                       goto err;
 
                ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
                                         mode_to_type(new_inode->bi_mode),
                                         name, new_inode->bi_inum,
                                         BCH_HASH_SET_MUST_CREATE);
                if (ret)
-                       return ret;
+                       goto err;
        }
-
-       return 0;
+err:
+       bch2_trans_iter_put(trans, dir_iter);
+       return ret;
 }
 
 int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
                    u64 inum, struct bch_inode_unpacked *dir_u,
                    struct bch_inode_unpacked *inode_u, const struct qstr *name)
 {
-       struct btree_iter *dir_iter, *inode_iter;
+       struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
        struct bch_hash_info dir_hash;
        u64 now = bch2_current_time(trans->c);
+       int ret;
 
        inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-       if (IS_ERR(inode_iter))
-               return PTR_ERR(inode_iter);
+       ret = PTR_ERR_OR_ZERO(inode_iter);
+       if (ret)
+               goto err;
 
        inode_u->bi_ctime = now;
        bch2_inode_nlink_inc(inode_u);
 
        dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
-       if (IS_ERR(dir_iter))
-               return PTR_ERR(dir_iter);
+       ret = PTR_ERR_OR_ZERO(dir_iter);
+       if (ret)
+               goto err;
 
        dir_u->bi_mtime = dir_u->bi_ctime = now;
 
        dir_hash = bch2_hash_info_init(trans->c, dir_u);
-       bch2_trans_iter_put(trans, dir_iter);
 
-       return bch2_dirent_create(trans, dir_inum, &dir_hash,
+       ret =   bch2_dirent_create(trans, dir_inum, &dir_hash,
                                  mode_to_type(inode_u->bi_mode),
                                  name, inum, BCH_HASH_SET_MUST_CREATE) ?:
                bch2_inode_write(trans, dir_iter, dir_u) ?:
                bch2_inode_write(trans, inode_iter, inode_u);
+err:
+       bch2_trans_iter_put(trans, dir_iter);
+       bch2_trans_iter_put(trans, inode_iter);
+       return ret;
 }
 
 int bch2_unlink_trans(struct btree_trans *trans,
@@ -111,39 +119,49 @@ int bch2_unlink_trans(struct btree_trans *trans,
                      struct bch_inode_unpacked *inode_u,
                      const struct qstr *name)
 {
-       struct btree_iter *dir_iter, *dirent_iter, *inode_iter;
+       struct btree_iter *dir_iter = NULL, *dirent_iter = NULL,
+                         *inode_iter = NULL;
        struct bch_hash_info dir_hash;
        u64 inum, now = bch2_current_time(trans->c);
        struct bkey_s_c k;
+       int ret;
 
        dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-       if (IS_ERR(dir_iter))
-               return PTR_ERR(dir_iter);
+       ret = PTR_ERR_OR_ZERO(dir_iter);
+       if (ret)
+               goto err;
 
        dir_hash = bch2_hash_info_init(trans->c, dir_u);
 
        dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash,
                                                 name, BTREE_ITER_INTENT);
-       if (IS_ERR(dirent_iter))
-               return PTR_ERR(dirent_iter);
+       ret = PTR_ERR_OR_ZERO(dirent_iter);
+       if (ret)
+               goto err;
 
        k = bch2_btree_iter_peek_slot(dirent_iter);
        inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
 
        inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-       if (IS_ERR(inode_iter))
-               return PTR_ERR(inode_iter);
+       ret = PTR_ERR_OR_ZERO(inode_iter);
+       if (ret)
+               goto err;
 
        dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
        dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
        bch2_inode_nlink_dec(inode_u);
 
-       return  (S_ISDIR(inode_u->bi_mode)
+       ret =   (S_ISDIR(inode_u->bi_mode)
                 ? bch2_empty_dir_trans(trans, inum)
                 : 0) ?:
                bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?:
                bch2_inode_write(trans, dir_iter, dir_u) ?:
                bch2_inode_write(trans, inode_iter, inode_u);
+err:
+       bch2_trans_iter_put(trans, inode_iter);
+       bch2_trans_iter_put(trans, dirent_iter);
+       bch2_trans_iter_put(trans, dir_iter);
+       return ret;
 }
 
 bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
@@ -179,24 +197,26 @@ int bch2_rename_trans(struct btree_trans *trans,
                      const struct qstr *dst_name,
                      enum bch_rename_mode mode)
 {
-       struct btree_iter *src_dir_iter, *dst_dir_iter = NULL;
-       struct btree_iter *src_inode_iter, *dst_inode_iter = NULL;
+       struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL;
+       struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL;
        struct bch_hash_info src_hash, dst_hash;
        u64 src_inode, dst_inode, now = bch2_current_time(trans->c);
        int ret;
 
        src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir,
                                       BTREE_ITER_INTENT);
-       if (IS_ERR(src_dir_iter))
-               return PTR_ERR(src_dir_iter);
+       ret = PTR_ERR_OR_ZERO(src_dir_iter);
+       if (ret)
+               goto err;
 
        src_hash = bch2_hash_info_init(trans->c, src_dir_u);
 
        if (dst_dir != src_dir) {
                dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir,
                                               BTREE_ITER_INTENT);
-               if (IS_ERR(dst_dir_iter))
-                       return PTR_ERR(dst_dir_iter);
+               ret = PTR_ERR_OR_ZERO(dst_dir_iter);
+               if (ret)
+                       goto err;
 
                dst_hash = bch2_hash_info_init(trans->c, dst_dir_u);
        } else {
@@ -211,38 +231,48 @@ int bch2_rename_trans(struct btree_trans *trans,
                                 dst_name, &dst_inode,
                                 mode);
        if (ret)
-               return ret;
+               goto err;
 
        src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode,
                                         BTREE_ITER_INTENT);
-       if (IS_ERR(src_inode_iter))
-               return PTR_ERR(src_inode_iter);
+       ret = PTR_ERR_OR_ZERO(src_inode_iter);
+       if (ret)
+               goto err;
 
        if (dst_inode) {
                dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode,
                                                 BTREE_ITER_INTENT);
-               if (IS_ERR(dst_inode_iter))
-                       return PTR_ERR(dst_inode_iter);
+               ret = PTR_ERR_OR_ZERO(dst_inode_iter);
+               if (ret)
+                       goto err;
        }
 
        if (mode == BCH_RENAME_OVERWRITE) {
                if (S_ISDIR(src_inode_u->bi_mode) !=
-                   S_ISDIR(dst_inode_u->bi_mode))
-                       return -ENOTDIR;
+                   S_ISDIR(dst_inode_u->bi_mode)) {
+                       ret = -ENOTDIR;
+                       goto err;
+               }
 
                if (S_ISDIR(dst_inode_u->bi_mode) &&
-                   bch2_empty_dir_trans(trans, dst_inode))
-                       return -ENOTEMPTY;
+                   bch2_empty_dir_trans(trans, dst_inode)) {
+                       ret = -ENOTEMPTY;
+                       goto err;
+               }
        }
 
        if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
-           S_ISDIR(src_inode_u->bi_mode))
-               return -EXDEV;
+           S_ISDIR(src_inode_u->bi_mode)) {
+               ret = -EXDEV;
+               goto err;
+       }
 
        if (mode == BCH_RENAME_EXCHANGE &&
            bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
-           S_ISDIR(dst_inode_u->bi_mode))
-               return -EXDEV;
+           S_ISDIR(dst_inode_u->bi_mode)) {
+               ret = -EXDEV;
+               goto err;
+       }
 
        if (S_ISDIR(src_inode_u->bi_mode)) {
                src_dir_u->bi_nlink--;
@@ -270,7 +300,7 @@ int bch2_rename_trans(struct btree_trans *trans,
        if (dst_inode)
                dst_inode_u->bi_ctime   = now;
 
-       return  bch2_inode_write(trans, src_dir_iter, src_dir_u) ?:
+       ret =   bch2_inode_write(trans, src_dir_iter, src_dir_u) ?:
                (src_dir != dst_dir
                 ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u)
                 : 0 ) ?:
@@ -278,4 +308,10 @@ int bch2_rename_trans(struct btree_trans *trans,
                (dst_inode
                 ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u)
                 : 0 );
+err:
+       bch2_trans_iter_put(trans, dst_inode_iter);
+       bch2_trans_iter_put(trans, src_inode_iter);
+       bch2_trans_iter_put(trans, dst_dir_iter);
+       bch2_trans_iter_put(trans, src_dir_iter);
+       return ret;
 }
index e811b98d0f0333b8f0e35a941263346b539115ca..26171ff754a686d655affca1431ef1c389989792 100644 (file)
@@ -362,16 +362,16 @@ int bch2_inode_create(struct btree_trans *trans,
                      struct bch_inode_unpacked *inode_u,
                      u64 min, u64 max, u64 *hint)
 {
-       struct bch_fs *c = trans->c;
        struct bkey_inode_buf *inode_p;
-       struct btree_iter *iter;
+       struct btree_iter *iter = NULL;
+       struct bkey_s_c k;
        u64 start;
        int ret;
 
        if (!max)
                max = ULLONG_MAX;
 
-       if (c->opts.inodes_32bit)
+       if (trans->c->opts.inodes_32bit)
                max = min_t(u64, max, U32_MAX);
 
        start = READ_ONCE(*hint);
@@ -382,48 +382,37 @@ int bch2_inode_create(struct btree_trans *trans,
        inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
        if (IS_ERR(inode_p))
                return PTR_ERR(inode_p);
-
-       iter = bch2_trans_get_iter(trans,
-                       BTREE_ID_INODES, POS(start, 0),
-                       BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-       if (IS_ERR(iter))
-               return PTR_ERR(iter);
 again:
-       while (1) {
-               struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
-
-               ret = bkey_err(k);
-               if (ret)
-                       return ret;
+       for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(start, 0),
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+               if (iter->pos.inode > max)
+                       break;
 
-               switch (k.k->type) {
-               case KEY_TYPE_inode:
-                       /* slot used */
-                       if (iter->pos.inode >= max)
-                               goto out;
+               if (k.k->type != KEY_TYPE_inode)
+                       goto found_slot;
+       }
 
-                       bch2_btree_iter_next_slot(iter);
-                       break;
+       bch2_trans_iter_put(trans, iter);
 
-               default:
-                       *hint                   = k.k->p.inode;
-                       inode_u->bi_inum        = k.k->p.inode;
-                       inode_u->bi_generation  = bkey_generation(k);
+       if (ret)
+               return ret;
 
-                       bch2_inode_pack(inode_p, inode_u);
-                       bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-                       return 0;
-               }
-       }
-out:
        if (start != min) {
                /* Retry from start */
                start = min;
-               bch2_btree_iter_set_pos(iter, POS(start, 0));
                goto again;
        }
 
        return -ENOSPC;
+found_slot:
+       *hint                   = k.k->p.inode;
+       inode_u->bi_inum        = k.k->p.inode;
+       inode_u->bi_generation  = bkey_generation(k);
+
+       bch2_inode_pack(inode_p, inode_u);
+       bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
+       bch2_trans_iter_put(trans, iter);
+       return 0;
 }
 
 int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
@@ -518,14 +507,13 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
        k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
        if (ret)
-               return ret;
+               goto err;
 
        ret = k.k->type == KEY_TYPE_inode
                ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
                : -ENOENT;
-
+err:
        bch2_trans_iter_put(trans, iter);
-
        return ret;
 }
 
index 4c7dd0994a28211531b47fa791dae7333bd3b451..0d3366c25ba643fe83e508835912fd8e9231dced 100644 (file)
@@ -546,9 +546,14 @@ static void __bch2_write_index(struct bch_write_op *op)
         * particularly want to plumb io_opts all the way through the btree
         * update stack right now
         */
-       for_each_keylist_key(keys, k)
+       for_each_keylist_key(keys, k) {
                bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
 
+               if (bch2_bkey_is_incompressible(bkey_i_to_s_c(k)))
+                       bch2_check_set_feature(op->c, BCH_FEATURE_incompressible);
+
+       }
+
        if (!bch2_keylist_empty(keys)) {
                u64 sectors_start = keylist_sectors(keys);
                int ret = op->index_update_fn(op);
@@ -784,8 +789,9 @@ static enum prep_encoded_ret {
        /* Can we just write the entire extent as is? */
        if (op->crc.uncompressed_size == op->crc.live_size &&
            op->crc.compressed_size <= wp->sectors_free &&
-           op->crc.compression_type == op->compression_type) {
-               if (!op->crc.compression_type &&
+           (op->crc.compression_type == op->compression_type ||
+            op->incompressible)) {
+               if (!crc_is_compressed(op->crc) &&
                    op->csum_type != op->crc.csum_type &&
                    bch2_write_rechecksum(c, op, op->csum_type))
                        return PREP_ENCODED_CHECKSUM_ERR;
@@ -797,7 +803,7 @@ static enum prep_encoded_ret {
         * If the data is compressed and we couldn't write the entire extent as
         * is, we have to decompress it:
         */
-       if (op->crc.compression_type) {
+       if (crc_is_compressed(op->crc)) {
                struct bch_csum csum;
 
                if (bch2_write_decrypt(op))
@@ -864,6 +870,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
                ret = -EIO;
                goto err;
        case PREP_ENCODED_CHECKSUM_ERR:
+               BUG();
                goto csum_err;
        case PREP_ENCODED_DO_WRITE:
                /* XXX look for bug here */
@@ -908,11 +915,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
                       bch2_csum_type_is_encryption(op->crc.csum_type));
                BUG_ON(op->compression_type && !bounce);
 
-               crc.compression_type = op->compression_type
-                       ?  bch2_bio_compress(c, dst, &dst_len, src, &src_len,
-                                            op->compression_type)
+               crc.compression_type = op->incompressible
+                       ? BCH_COMPRESSION_TYPE_incompressible
+                       : op->compression_type
+                       ? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
+                                           op->compression_type)
                        : 0;
-               if (!crc.compression_type) {
+               if (!crc_is_compressed(crc)) {
                        dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
                        dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9);
 
@@ -933,7 +942,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
 
                if (bch2_csum_type_is_encryption(op->csum_type)) {
                        if (bversion_zero(version)) {
-                               version.lo = atomic64_inc_return(&c->key_version) + 1;
+                               version.lo = atomic64_inc_return(&c->key_version);
                        } else {
                                crc.nonce = op->nonce;
                                op->nonce += src_len >> 9;
@@ -941,7 +950,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
                }
 
                if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-                   !crc.compression_type &&
+                   !crc_is_compressed(crc) &&
                    bch2_csum_type_is_encryption(op->crc.csum_type) ==
                    bch2_csum_type_is_encryption(op->csum_type)) {
                        /*
@@ -1338,6 +1347,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
 
 static struct promote_op *__promote_alloc(struct bch_fs *c,
                                          enum btree_id btree_id,
+                                         struct bkey_s_c k,
                                          struct bpos pos,
                                          struct extent_ptr_decoded *pick,
                                          struct bch_io_opts opts,
@@ -1394,8 +1404,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
                        (struct data_opts) {
                                .target = opts.promote_target
                        },
-                       btree_id,
-                       bkey_s_c_null);
+                       btree_id, k);
        BUG_ON(ret);
 
        return op;
@@ -1437,7 +1446,7 @@ static struct promote_op *promote_alloc(struct bch_fs *c,
                                  k.k->type == KEY_TYPE_reflink_v
                                  ? BTREE_ID_REFLINK
                                  : BTREE_ID_EXTENTS,
-                                 pos, pick, opts, sectors, rbio);
+                                 k, pos, pick, opts, sectors, rbio);
        if (!promote)
                return NULL;
 
@@ -1701,7 +1710,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
        u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
        int ret;
 
-       if (rbio->pick.crc.compression_type)
+       if (crc_is_compressed(rbio->pick.crc))
                return;
 
        bkey_on_stack_init(&new);
@@ -1786,7 +1795,7 @@ static void __bch2_read_endio(struct work_struct *work)
        crc.offset     += rbio->offset_into_extent;
        crc.live_size   = bvec_iter_sectors(rbio->bvec_iter);
 
-       if (crc.compression_type != BCH_COMPRESSION_TYPE_none) {
+       if (crc_is_compressed(crc)) {
                bch2_encrypt_bio(c, crc.csum_type, nonce, src);
                if (bch2_bio_uncompress(c, src, dst, dst_iter, crc))
                        goto decompression_err;
@@ -1883,7 +1892,7 @@ static void bch2_read_endio(struct bio *bio)
        }
 
        if (rbio->narrow_crcs ||
-           rbio->pick.crc.compression_type ||
+           crc_is_compressed(rbio->pick.crc) ||
            bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
                context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
        else if (rbio->pick.crc.csum_type)
@@ -1994,7 +2003,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
 
        EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
 
-       if (pick.crc.compression_type != BCH_COMPRESSION_TYPE_none ||
+       if (crc_is_compressed(pick.crc) ||
            (pick.crc.csum_type != BCH_CSUM_NONE &&
             (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
              (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
@@ -2009,7 +2018,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
                                        &rbio, &bounce, &read_full);
 
        if (!read_full) {
-               EBUG_ON(pick.crc.compression_type);
+               EBUG_ON(crc_is_compressed(pick.crc));
                EBUG_ON(pick.crc.csum_type &&
                        (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
                         bvec_iter_sectors(iter) != pick.crc.live_size ||
index 45c950942d784f89c1e2f2f1355f6446e46b4e7c..37f7fa6102fcaab30c1266dbae116a69e7e6d494 100644 (file)
@@ -78,6 +78,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
        op->nr_replicas         = 0;
        op->nr_replicas_required = c->opts.data_replicas_required;
        op->alloc_reserve       = RESERVE_NONE;
+       op->incompressible      = 0;
        op->open_buckets.nr     = 0;
        op->devs_have.nr        = 0;
        op->target              = 0;
index c37b7d7401e95714937438e9565802090afcd214..684e4c9a5d98958feb613046ae8c609a6987487b 100644 (file)
@@ -104,7 +104,8 @@ struct bch_write_op {
        unsigned                compression_type:4;
        unsigned                nr_replicas:4;
        unsigned                nr_replicas_required:4;
-       unsigned                alloc_reserve:4;
+       unsigned                alloc_reserve:3;
+       unsigned                incompressible:1;
 
        struct bch_devs_list    devs_have;
        u16                     target;
index 7112a25d0600e1d246517cbaf5263c8a827a9fde..db722a8ae4ea0c457385bf8eac929ddb02fb02f6 100644 (file)
@@ -171,8 +171,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
                return 0;
        }
 
-       if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN)
-               bch2_bkey_swab(NULL, bkey_to_packed(k));
+       if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) {
+               bch2_bkey_swab_key(NULL, bkey_to_packed(k));
+               bch2_bkey_swab_val(bkey_i_to_s(k));
+       }
 
        if (!write &&
            version < bcachefs_metadata_version_bkey_renumber)
index 1ef62a189e3319a5425dbc10b59e1de4dd2af2dd..e26fa1608f39d49b671ebeb3255574c9bca928bd 100644 (file)
@@ -123,23 +123,21 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
                for_each_btree_node(&trans, iter, id, POS_MIN,
                                    BTREE_ITER_PREFETCH, b) {
                        __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-                       struct bkey_i_btree_ptr *new_key;
 retry:
                        if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
                                                  dev_idx))
                                continue;
 
                        bkey_copy(&tmp.k, &b->key);
-                       new_key = bkey_i_to_btree_ptr(&tmp.k);
 
-                       ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i),
+                       ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k),
                                            dev_idx, flags, true);
                        if (ret) {
                                bch_err(c, "Cannot drop device without losing data");
                                goto err;
                        }
 
-                       ret = bch2_btree_node_update_key(c, iter, b, new_key);
+                       ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
                        if (ret == -EINTR) {
                                b = bch2_btree_iter_peek_node(iter);
                                goto retry;
index 257e00ae6fa704fc07c890979fe8d6856c9dd553..ecc74ebe05799f5a619030ad14accfed41543c4c 100644 (file)
@@ -215,6 +215,9 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
                            enum btree_id btree_id,
                            struct bkey_s_c k)
 {
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
        int ret;
 
        m->btree_id     = btree_id;
@@ -223,9 +226,14 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
        m->nr_ptrs_reserved = 0;
 
        bch2_write_op_init(&m->op, c, io_opts);
-       m->op.compression_type =
-               bch2_compression_opt_to_type[io_opts.background_compression ?:
-                                            io_opts.compression];
+
+       if (!bch2_bkey_is_incompressible(k))
+               m->op.compression_type =
+                       bch2_compression_opt_to_type[io_opts.background_compression ?:
+                                                    io_opts.compression];
+       else
+               m->op.incompressible = true;
+
        m->op.target    = data_opts.target,
        m->op.write_point = wp;
 
@@ -265,14 +273,11 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
                break;
        }
        case DATA_REWRITE: {
-               struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-               const union bch_extent_entry *entry;
-               struct extent_ptr_decoded p;
                unsigned compressed_sectors = 0;
 
                bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
                        if (!p.ptr.cached &&
-                           p.crc.compression_type != BCH_COMPRESSION_TYPE_none &&
+                           crc_is_compressed(p.crc) &&
                            bch2_dev_in_target(c, p.ptr.dev, data_opts.target))
                                compressed_sectors += p.crc.compressed_size;
 
index 84b3fb6eb101e591c5d2a07f4eea5fa8a801c2e0..ab1934325948e200a45642c65a76c050b13e9779 100644 (file)
 #include <linux/sched/cputime.h>
 #include <trace/events/bcachefs.h>
 
-static inline bool rebalance_ptr_pred(struct bch_fs *c,
-                                     struct extent_ptr_decoded p,
-                                     struct bch_io_opts *io_opts)
+/*
+ * Check if an extent should be moved:
+ * returns -1 if it should not be moved, or
+ * device of pointer that should be moved, if known, or INT_MAX if unknown
+ */
+static int __bch2_rebalance_pred(struct bch_fs *c,
+                                struct bkey_s_c k,
+                                struct bch_io_opts *io_opts)
 {
-       if (io_opts->background_target &&
-           !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target) &&
-           !p.ptr.cached)
-               return true;
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
 
        if (io_opts->background_compression &&
-           p.crc.compression_type !=
-           bch2_compression_opt_to_type[io_opts->background_compression])
-               return true;
-
-       return false;
+           !bch2_bkey_is_incompressible(k))
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+                       if (!p.ptr.cached &&
+                           p.crc.compression_type !=
+                           bch2_compression_opt_to_type[io_opts->background_compression])
+                               return p.ptr.dev;
+
+       if (io_opts->background_target)
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+                       if (!p.ptr.cached &&
+                           !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target))
+                               return p.ptr.dev;
+
+       return -1;
 }
 
 void bch2_rebalance_add_key(struct bch_fs *c,
                            struct bkey_s_c k,
                            struct bch_io_opts *io_opts)
 {
-       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-       const union bch_extent_entry *entry;
-       struct extent_ptr_decoded p;
+       atomic64_t *counter;
+       int dev;
 
-       if (!io_opts->background_target &&
-           !io_opts->background_compression)
+       dev = __bch2_rebalance_pred(c, k, io_opts);
+       if (dev < 0)
                return;
 
-       bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-               if (rebalance_ptr_pred(c, p, io_opts)) {
-                       struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+       counter = dev < INT_MAX
+               ? &bch_dev_bkey_exists(c, dev)->rebalance_work
+               : &c->rebalance.work_unknown_dev;
 
-                       if (atomic64_add_return(p.crc.compressed_size,
-                                               &ca->rebalance_work) ==
-                           p.crc.compressed_size)
-                               rebalance_wakeup(c);
-               }
-}
-
-void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
-{
-       if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
-           sectors)
+       if (atomic64_add_return(k.k->size, counter) == k.k->size)
                rebalance_wakeup(c);
 }
 
@@ -69,26 +71,20 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
                                    struct bch_io_opts *io_opts,
                                    struct data_opts *data_opts)
 {
-       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-       const union bch_extent_entry *entry;
-       struct extent_ptr_decoded p;
-       unsigned nr_replicas = 0;
-
-       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-               nr_replicas += !p.ptr.cached;
-
-               if (rebalance_ptr_pred(c, p, io_opts))
-                       goto found;
+       if (__bch2_rebalance_pred(c, k, io_opts) >= 0) {
+               data_opts->target               = io_opts->background_target;
+               data_opts->btree_insert_flags   = 0;
+               return DATA_ADD_REPLICAS;
+       } else {
+               return DATA_SKIP;
        }
+}
 
-       if (nr_replicas < io_opts->data_replicas)
-               goto found;
-
-       return DATA_SKIP;
-found:
-       data_opts->target               = io_opts->background_target;
-       data_opts->btree_insert_flags   = 0;
-       return DATA_ADD_REPLICAS;
+void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
+{
+       if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
+           sectors)
+               rebalance_wakeup(c);
 }
 
 struct rebalance_work {
@@ -183,6 +179,8 @@ static int bch2_rebalance_thread(void *arg)
        prev_cputime    = curr_cputime();
 
        while (!kthread_wait_freezable(r->enabled)) {
+               cond_resched();
+
                start                   = jiffies;
                cputime                 = curr_cputime();
 
index 8ecd4abc8eebdffdf5ca12154df75c874e7a4df6..c9d12f7c180e88705109ccd1ba246b29ab0d637e 100644 (file)
 
 /* iterate over keys read from the journal: */
 
-struct journal_iter bch2_journal_iter_init(struct journal_keys *keys,
-                                          enum btree_id id)
-{
-       return (struct journal_iter) {
-               .keys           = keys,
-               .k              = keys->d,
-               .btree_id       = id,
-       };
-}
-
 struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
 {
-       while (1) {
-               if (iter->k == iter->keys->d + iter->keys->nr)
-                       return bkey_s_c_null;
-
+       while (iter->k) {
                if (iter->k->btree_id == iter->btree_id)
                        return bkey_i_to_s_c(iter->k->k);
 
                iter->k++;
+               if (iter->k == iter->keys->d + iter->keys->nr)
+                       iter->k = NULL;
        }
 
        return bkey_s_c_null;
@@ -54,13 +43,110 @@ struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
 
 struct bkey_s_c bch2_journal_iter_next(struct journal_iter *iter)
 {
-       if (iter->k == iter->keys->d + iter->keys->nr)
+       if (!iter->k)
                return bkey_s_c_null;
 
        iter->k++;
+       if (iter->k == iter->keys->d + iter->keys->nr)
+               iter->k = NULL;
+
        return bch2_journal_iter_peek(iter);
 }
 
+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
+{
+       switch (iter->last) {
+       case none:
+               break;
+       case btree:
+               bch2_btree_iter_next(iter->btree);
+               break;
+       case journal:
+               bch2_journal_iter_next(&iter->journal);
+               break;
+       }
+
+       iter->last = none;
+}
+
+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
+{
+       struct bkey_s_c ret;
+
+       while (1) {
+               struct bkey_s_c btree_k         = bch2_btree_iter_peek(iter->btree);
+               struct bkey_s_c journal_k       = bch2_journal_iter_peek(&iter->journal);
+
+               if (btree_k.k && journal_k.k) {
+                       int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
+
+                       if (!cmp)
+                               bch2_btree_iter_next(iter->btree);
+
+                       iter->last = cmp < 0 ? btree : journal;
+               } else if (btree_k.k) {
+                       iter->last = btree;
+               } else if (journal_k.k) {
+                       iter->last = journal;
+               } else {
+                       iter->last = none;
+                       return bkey_s_c_null;
+               }
+
+               ret = iter->last == journal ? journal_k : btree_k;
+               if (!bkey_deleted(ret.k))
+                       break;
+
+               bch2_btree_and_journal_iter_advance(iter);
+       }
+
+       return ret;
+}
+
+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *iter)
+{
+       bch2_btree_and_journal_iter_advance(iter);
+
+       return bch2_btree_and_journal_iter_peek(iter);
+}
+
+struct journal_key *journal_key_search(struct journal_keys *journal_keys,
+                                      enum btree_id id, struct bpos pos)
+{
+       size_t l = 0, r = journal_keys->nr, m;
+
+       while (l < r) {
+               m = l + ((r - l) >> 1);
+               if ((cmp_int(id, journal_keys->d[m].btree_id) ?:
+                    bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0)
+                       l = m + 1;
+               else
+                       r = m;
+       }
+
+       BUG_ON(l < journal_keys->nr &&
+              (cmp_int(id, journal_keys->d[l].btree_id) ?:
+               bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0);
+
+       BUG_ON(l &&
+              (cmp_int(id, journal_keys->d[l - 1].btree_id) ?:
+               bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0);
+
+       return l < journal_keys->nr ? journal_keys->d + l : NULL;
+}
+
+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
+                                     struct btree_trans *trans,
+                                     struct journal_keys *journal_keys,
+                                     enum btree_id id, struct bpos pos)
+{
+       iter->journal.keys      = journal_keys;
+       iter->journal.k         = journal_key_search(journal_keys, id, pos);
+       iter->journal.btree_id  = id;
+
+       iter->btree = bch2_trans_get_iter(trans, id, pos, 0);
+}
+
 /* sort and dedup all keys in the journal: */
 
 static void journal_entries_free(struct list_head *list)
@@ -924,6 +1010,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
                c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
                c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
+               c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_ptr_v2;
                write_sb = true;
        }
 
index 479ea46f8dcb5954acc13d732646aff733b890c0..ccd84a8fe60d017976123e5d03093b1df6822318 100644 (file)
@@ -24,10 +24,28 @@ struct journal_iter {
        enum btree_id           btree_id;
 };
 
-struct journal_iter bch2_journal_iter_init(struct journal_keys *,
-                                          enum btree_id);
-struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *);
-struct bkey_s_c bch2_journal_iter_next(struct journal_iter *);
+struct btree_and_journal_iter {
+       enum btree_id           btree_id;
+
+       struct btree_iter       *btree;
+       struct journal_iter     journal;
+
+       enum last_key_returned {
+               none,
+               btree,
+               journal,
+       }                       last;
+};
+
+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
+struct journal_key *journal_key_search(struct journal_keys *,
+                                      enum btree_id, struct bpos);
+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
+                                     struct btree_trans *,
+                                     struct journal_keys *,
+                                     enum btree_id, struct bpos);
 
 int bch2_fs_recovery(struct bch_fs *);
 int bch2_fs_initialize(struct bch_fs *);
index 3b8c74ca3725fc1d294c9179bd021e844e6eedd6..d78a3d5f72465d32da21df13b21dab6af04b766c 100644 (file)
@@ -128,10 +128,9 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
 
        bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);
 err:
-       if (!IS_ERR(reflink_iter)) {
+       if (!IS_ERR(reflink_iter))
                c->reflink_hint = reflink_iter->pos.offset;
-               bch2_trans_iter_put(trans, reflink_iter);
-       }
+       bch2_trans_iter_put(trans, reflink_iter);
 
        return ret;
 }
index ac23b855858cc75e45e1422c0b2933e1f0dd520f..5445c1cf0797c1a51eff5c444eee93ccd0fe010a 100644 (file)
@@ -22,6 +22,7 @@ void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
 #define bch2_bkey_ops_reflink_v (struct bkey_ops) {            \
        .key_invalid    = bch2_reflink_v_invalid,               \
        .val_to_text    = bch2_reflink_v_to_text,               \
+       .swab           = bch2_ptr_swab,                        \
 }
 
 s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
index 366888b1b36d7ef5c52dbf7e4c296b9f6095ab6f..be4908575f72463eddc895ce4050e11f5cec6693 100644 (file)
@@ -112,6 +112,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
 
        switch (k.k->type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
                e->data_type = BCH_DATA_BTREE;
                extent_to_replicas(k, e);
                break;
index f2779159a6b8a3391d72d5cb0176c2a57c371a02..19a0d6b2754fa07df32fe32cc4e3dc14bcff8f08 100644 (file)
@@ -262,10 +262,8 @@ int bch2_hash_set(struct btree_trans *trans,
        if (!ret)
                ret = -ENOSPC;
 out:
-       if (!IS_ERR_OR_NULL(slot))
-               bch2_trans_iter_put(trans, slot);
-       if (!IS_ERR_OR_NULL(iter))
-               bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_put(trans, slot);
+       bch2_trans_iter_put(trans, iter);
 
        return ret;
 found:
@@ -319,13 +317,16 @@ int bch2_hash_delete(struct btree_trans *trans,
                     u64 inode, const void *key)
 {
        struct btree_iter *iter;
+       int ret;
 
        iter = bch2_hash_lookup(trans, desc, info, inode, key,
                                BTREE_ITER_INTENT);
        if (IS_ERR(iter))
                return PTR_ERR(iter);
 
-       return bch2_hash_delete_at(trans, desc, info, iter);
+       ret = bch2_hash_delete_at(trans, desc, info, iter);
+       bch2_trans_iter_put(trans, iter);
+       return ret;
 }
 
 #endif /* _BCACHEFS_STR_HASH_H */
index 602def1ee95a0b917ac052f97ce22771058810c2..d78ffcc0e8a457cfef0790908938bda9bce765b0 100644 (file)
@@ -276,7 +276,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
                        struct extent_ptr_decoded p;
 
                        extent_for_each_ptr_decode(e, p, entry) {
-                               if (p.crc.compression_type == BCH_COMPRESSION_TYPE_none) {
+                               if (!crc_is_compressed(p.crc)) {
                                        nr_uncompressed_extents++;
                                        uncompressed_sectors += e.k->size;
                                } else {