]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to b12d1535f3 bcachefs: fix bounds checks in bch2_bio_map()
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 4 Nov 2018 00:11:29 +0000 (20:11 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Sun, 4 Nov 2018 00:19:25 +0000 (20:19 -0400)
40 files changed:
.bcachefs_revision
cmd_migrate.c
libbcachefs.c
libbcachefs/alloc_background.c
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bkey_methods.h
libbcachefs/bset.c
libbcachefs/btree_gc.c
libbcachefs/btree_gc.h
libbcachefs/btree_io.c
libbcachefs/btree_io.h
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/clock.c
libbcachefs/debug.c
libbcachefs/extents.c
libbcachefs/extents.h
libbcachefs/extents_types.h
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/io.c
libbcachefs/io.h
libbcachefs/io_types.h
libbcachefs/journal.h
libbcachefs/journal_io.c
libbcachefs/journal_io.h
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/movinggc.c
libbcachefs/rebalance.c
libbcachefs/replicas.c
libbcachefs/replicas.h
libbcachefs/replicas_types.h [new file with mode: 0644]
libbcachefs/super_types.h
libbcachefs/sysfs.c
libbcachefs/util.c
libbcachefs/util.h

index 14085745156da766186b9bc29d4bdb7bc9c60873..4c8c8d11a89ac474d2adbd8490305469c46769cd 100644 (file)
@@ -1 +1 @@
-d7f6da1d60ec24266301231538ff6f09716537ed
+b12d1535f33661c5f11925d9a2debe28be661088
index 352f7403ba47f9e2bd537510af0a00642df13c8d..9523dbdd16d78f6fd0ec1feff7cb649599419cbe 100644 (file)
@@ -250,7 +250,6 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
 }
 
 static char buf[1 << 20] __aligned(PAGE_SIZE);
-static const size_t buf_pages = sizeof(buf) / PAGE_SIZE;
 
 static void write_data(struct bch_fs *c,
                       struct bch_inode_unpacked *dst_inode,
@@ -258,7 +257,7 @@ static void write_data(struct bch_fs *c,
 {
        struct {
                struct bch_write_op op;
-               struct bio_vec bv[buf_pages];
+               struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
        } o;
        struct closure cl;
 
@@ -267,7 +266,7 @@ static void write_data(struct bch_fs *c,
 
        closure_init_stack(&cl);
 
-       bio_init(&o.op.wbio.bio, o.bv, buf_pages);
+       bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
        o.op.wbio.bio.bi_iter.bi_size = len;
        bch2_bio_map(&o.op.wbio.bio, buf);
 
index da3186110f55f2c703c68c3355134e5386a7e22e..3ce69d1b788d6f4e0e1788d1a444bf9abbd80f64 100644 (file)
@@ -346,7 +346,7 @@ static unsigned get_dev_has_data(struct bch_sb *sb, unsigned dev)
 
        if (replicas)
                for_each_replicas_entry(replicas, r)
-                       for (i = 0; i < r->nr; i++)
+                       for (i = 0; i < r->nr_devs; i++)
                                if (r->devs[i] == dev)
                                        data_has |= 1 << r->data_type;
 
@@ -502,7 +502,7 @@ static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f,
                printf_pad(32, "  %s:", bch2_data_types[e->data_type]);
 
                putchar('[');
-               for (i = 0; i < e->nr; i++) {
+               for (i = 0; i < e->nr_devs; i++) {
                        if (i)
                                putchar(' ');
                        printf("%u", e->devs[i]);
index 7ba20c87caada672c25fdd91ce85c48ce9d23503..c3efb4357cad110833c9c2350faac615707c6a72 100644 (file)
@@ -582,7 +582,8 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
                        e.nr++;
                } else {
                        if (e.nr)
-                               heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
+                               heap_add_or_replace(&ca->alloc_heap, e,
+                                       -bucket_alloc_cmp, NULL);
 
                        e = (struct alloc_heap_entry) {
                                .bucket = b,
@@ -595,14 +596,15 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
        }
 
        if (e.nr)
-               heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
+               heap_add_or_replace(&ca->alloc_heap, e,
+                               -bucket_alloc_cmp, NULL);
 
        for (i = 0; i < ca->alloc_heap.used; i++)
                nr += ca->alloc_heap.data[i].nr;
 
        while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
                nr -= ca->alloc_heap.data[0].nr;
-               heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp);
+               heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp, NULL);
        }
 
        up_read(&ca->bucket_lock);
@@ -632,7 +634,7 @@ static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
                if (bch2_can_invalidate_bucket(ca, b, m)) {
                        struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
 
-                       heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
+                       heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
                        if (heap_full(&ca->alloc_heap))
                                break;
                }
@@ -659,7 +661,7 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca
                if (bch2_can_invalidate_bucket(ca, b, m)) {
                        struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
 
-                       heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
+                       heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
                        if (heap_full(&ca->alloc_heap))
                                break;
                }
@@ -697,7 +699,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
                break;
        }
 
-       heap_resort(&ca->alloc_heap, bucket_alloc_cmp);
+       heap_resort(&ca->alloc_heap, bucket_alloc_cmp, NULL);
 
        for (i = 0; i < ca->alloc_heap.used; i++)
                nr += ca->alloc_heap.data[i].nr;
@@ -718,7 +720,7 @@ static inline long next_alloc_bucket(struct bch_dev *ca)
                        return b;
                }
 
-               heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp);
+               heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
        }
 
        return -1;
index 92727cca2d756eb9f53b2a741980bb7d78919262..6d5c7d6b848413567d84ccc9692390d5f73bdb4f 100644 (file)
@@ -312,6 +312,7 @@ enum bch_time_stats {
 #include "keylist_types.h"
 #include "quota_types.h"
 #include "rebalance_types.h"
+#include "replicas_types.h"
 #include "super_types.h"
 
 /* Number of nodes btree coalesce will try to coalesce at once */
index f1814f4caf2114cc5b3583179469b9f42df2b926..cdf392b39bb8692a32c504811851b5afe2547629 100644 (file)
@@ -456,15 +456,19 @@ enum bch_compression_type {
        BCH_COMPRESSION_NR              = 5,
 };
 
+#define BCH_EXTENT_ENTRY_TYPES()               \
+       x(ptr,                  0)              \
+       x(crc32,                1)              \
+       x(crc64,                2)              \
+       x(crc128,               3)
+#define BCH_EXTENT_ENTRY_MAX   4
+
 enum bch_extent_entry_type {
-       BCH_EXTENT_ENTRY_ptr            = 0,
-       BCH_EXTENT_ENTRY_crc32          = 1,
-       BCH_EXTENT_ENTRY_crc64          = 2,
-       BCH_EXTENT_ENTRY_crc128         = 3,
+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
 };
 
-#define BCH_EXTENT_ENTRY_MAX           4
-
 /* Compressed/uncompressed size are stored biased by 1: */
 struct bch_extent_crc32 {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -589,10 +593,10 @@ union bch_extent_entry {
 #else
 #error edit for your odd byteorder.
 #endif
-       struct bch_extent_crc32         crc32;
-       struct bch_extent_crc64         crc64;
-       struct bch_extent_crc128        crc128;
-       struct bch_extent_ptr           ptr;
+
+#define x(f, n) struct bch_extent_##f  f;
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
 };
 
 enum {
@@ -1007,9 +1011,9 @@ enum bch_data_type {
 };
 
 struct bch_replicas_entry {
-       u8                      data_type;
-       u8                      nr;
-       u8                      devs[0];
+       __u8                    data_type;
+       __u8                    nr_devs;
+       __u8                    devs[0];
 };
 
 struct bch_sb_field_replicas {
index c708f8c09cabad1719ab7570ae7bbe8ffeed7b5a..cf7a55630ac10a64024b5511911e8713116958bd 100644 (file)
@@ -18,17 +18,6 @@ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
        return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
 }
 
-static inline bool btree_type_has_ptrs(enum bkey_type type)
-{
-       switch (type) {
-       case BKEY_TYPE_BTREE:
-       case BKEY_TYPE_EXTENTS:
-               return true;
-       default:
-               return false;
-       }
-}
-
 struct bch_fs;
 struct btree;
 struct bkey;
index c8e16dea8ee735684b8c20b06054097d133b8d30..c631e30aca53e9fa7362100698a3290a5b48ff73 100644 (file)
@@ -1689,7 +1689,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
        struct bkey_packed *orig_pos = bch2_btree_node_iter_peek_all(iter, b);
        struct btree_node_iter_set *set;
        struct bset_tree *t;
-       unsigned end;
+       unsigned end = 0;
 
        bch2_btree_node_iter_verify(iter, b);
 
@@ -1791,7 +1791,7 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
        struct bkey_packed *l, *r, *p;
        struct bkey uk, up;
        char buf1[200], buf2[200];
-       unsigned j;
+       unsigned j, inorder;
 
        if (!size)
                return 0;
@@ -1799,53 +1799,57 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
        if (!bset_has_ro_aux_tree(t))
                goto out;
 
-       j = __inorder_to_eytzinger1(bkey_to_cacheline(b, t, k), t->size, t->extra);
-       if (j &&
-           j < t->size &&
-           k == tree_to_bkey(b, t, j))
-               switch (bkey_float(b, t, j)->exponent) {
-               case BFLOAT_FAILED_UNPACKED:
-                       uk = bkey_unpack_key(b, k);
-                       return scnprintf(buf, size,
-                                        "    failed unpacked at depth %u\n"
-                                        "\t%llu:%llu\n",
-                                        ilog2(j),
-                                        uk.p.inode, uk.p.offset);
-               case BFLOAT_FAILED_PREV:
-                       p = tree_to_prev_bkey(b, t, j);
-                       l = is_power_of_2(j)
-                               ? btree_bkey_first(b, t)
-                               : tree_to_prev_bkey(b, t, j >> ffs(j));
-                       r = is_power_of_2(j + 1)
-                               ? bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))
-                               : tree_to_bkey(b, t, j >> (ffz(j) + 1));
-
-                       up = bkey_unpack_key(b, p);
-                       uk = bkey_unpack_key(b, k);
-                       bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits);
-                       bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits);
-
-                       return scnprintf(buf, size,
-                                        "    failed prev at depth %u\n"
-                                        "\tkey starts at bit %u but first differing bit at %u\n"
-                                        "\t%llu:%llu\n"
-                                        "\t%llu:%llu\n"
-                                        "\t%s\n"
-                                        "\t%s\n",
-                                        ilog2(j),
-                                        bch2_bkey_greatest_differing_bit(b, l, r),
-                                        bch2_bkey_greatest_differing_bit(b, p, k),
-                                        uk.p.inode, uk.p.offset,
-                                        up.p.inode, up.p.offset,
-                                        buf1, buf2);
-               case BFLOAT_FAILED_OVERFLOW:
-                       uk = bkey_unpack_key(b, k);
-                       return scnprintf(buf, size,
-                                        "    failed overflow at depth %u\n"
-                                        "\t%llu:%llu\n",
-                                        ilog2(j),
-                                        uk.p.inode, uk.p.offset);
-               }
+       inorder = bkey_to_cacheline(b, t, k);
+       if (!inorder || inorder >= t->size)
+               goto out;
+
+       j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
+       if (k != tree_to_bkey(b, t, j))
+               goto out;
+
+       switch (bkey_float(b, t, j)->exponent) {
+       case BFLOAT_FAILED_UNPACKED:
+               uk = bkey_unpack_key(b, k);
+               return scnprintf(buf, size,
+                                "    failed unpacked at depth %u\n"
+                                "\t%llu:%llu\n",
+                                ilog2(j),
+                                uk.p.inode, uk.p.offset);
+       case BFLOAT_FAILED_PREV:
+               p = tree_to_prev_bkey(b, t, j);
+               l = is_power_of_2(j)
+                       ? btree_bkey_first(b, t)
+                       : tree_to_prev_bkey(b, t, j >> ffs(j));
+               r = is_power_of_2(j + 1)
+                       ? bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))
+                       : tree_to_bkey(b, t, j >> (ffz(j) + 1));
+
+               up = bkey_unpack_key(b, p);
+               uk = bkey_unpack_key(b, k);
+               bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits);
+               bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits);
+
+               return scnprintf(buf, size,
+                                "    failed prev at depth %u\n"
+                                "\tkey starts at bit %u but first differing bit at %u\n"
+                                "\t%llu:%llu\n"
+                                "\t%llu:%llu\n"
+                                "\t%s\n"
+                                "\t%s\n",
+                                ilog2(j),
+                                bch2_bkey_greatest_differing_bit(b, l, r),
+                                bch2_bkey_greatest_differing_bit(b, p, k),
+                                uk.p.inode, uk.p.offset,
+                                up.p.inode, up.p.offset,
+                                buf1, buf2);
+       case BFLOAT_FAILED_OVERFLOW:
+               uk = bkey_unpack_key(b, k);
+               return scnprintf(buf, size,
+                                "    failed overflow at depth %u\n"
+                                "\t%llu:%llu\n",
+                                ilog2(j),
+                                uk.p.inode, uk.p.offset);
+       }
 out:
        *buf = '\0';
        return 0;
index b0f9bd75588b41b5ed562ab7a777e23b0df7750d..b3c69da926dfeb48ba1cfa3fb3d9c4420dee1cf9 100644 (file)
@@ -17,6 +17,7 @@
 #include "error.h"
 #include "extents.h"
 #include "journal.h"
+#include "journal_io.h"
 #include "keylist.h"
 #include "move.h"
 #include "replicas.h"
 #include <linux/sched/task.h>
 #include <trace/events/bcachefs.h>
 
+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+       write_seqcount_begin(&c->gc_pos_lock);
+       c->gc_pos = new_pos;
+       write_seqcount_end(&c->gc_pos_lock);
+}
+
+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+       BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
+       __gc_pos_set(c, new_pos);
+}
+
+/* range_checks - for validating min/max pos of each btree node: */
+
 struct range_checks {
        struct range_level {
                struct bpos     min;
@@ -90,6 +106,19 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
        }
 }
 
+/* marking of btree keys/nodes: */
+
+static bool bkey_type_needs_gc(enum bkey_type type)
+{
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+       case BKEY_TYPE_EXTENTS:
+               return true;
+       default:
+               return false;
+       }
+}
+
 u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
 {
        const struct bch_extent_ptr *ptr;
@@ -112,39 +141,8 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
        return max_stale;
 }
 
-/*
- * For runtime mark and sweep:
- */
-static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
-                          struct bkey_s_c k, unsigned flags)
-{
-       struct gc_pos pos = { 0 };
-       u8 ret = 0;
-
-       switch (type) {
-       case BKEY_TYPE_BTREE:
-               bch2_mark_key(c, k, c->opts.btree_node_size,
-                             BCH_DATA_BTREE, pos, NULL,
-                             0, flags|
-                             BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
-                             BCH_BUCKET_MARK_GC_LOCK_HELD);
-               break;
-       case BKEY_TYPE_EXTENTS:
-               bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
-                             0, flags|
-                             BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
-                             BCH_BUCKET_MARK_GC_LOCK_HELD);
-               ret = bch2_btree_key_recalc_oldest_gen(c, k);
-               break;
-       default:
-               BUG();
-       }
-
-       return ret;
-}
-
-int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
-                               struct bkey_s_c k)
+static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
+                                       struct bkey_s_c k)
 {
        enum bch_data_type data_type = type == BKEY_TYPE_BTREE
                ? BCH_DATA_BTREE : BCH_DATA_USER;
@@ -154,10 +152,10 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
               k.k->version.lo > journal_cur_seq(&c->journal));
 
        if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-           fsck_err_on(!bch2_bkey_replicas_marked(c, data_type, k), c,
+           fsck_err_on(!bch2_bkey_replicas_marked(c, type, k), c,
                        "superblock not marked as containing replicas (type %u)",
                        data_type)) {
-               ret = bch2_mark_bkey_replicas(c, data_type, k);
+               ret = bch2_mark_bkey_replicas(c, type, k);
                if (ret)
                        return ret;
        }
@@ -198,52 +196,87 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
        }
        }
 
-       atomic64_set(&c->key_version,
-                    max_t(u64, k.k->version.lo,
-                          atomic64_read(&c->key_version)));
-
-       bch2_gc_mark_key(c, type, k, BCH_BUCKET_MARK_NOATOMIC);
+       if (k.k->version.lo > atomic64_read(&c->key_version))
+               atomic64_set(&c->key_version, k.k->version.lo);
 fsck_err:
        return ret;
 }
 
-static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
+/*
+ * For runtime mark and sweep:
+ */
+static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
+                           struct bkey_s_c k, bool initial)
+{
+       struct gc_pos pos = { 0 };
+       unsigned flags =
+               BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
+               BCH_BUCKET_MARK_GC_LOCK_HELD|
+               (initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
+       int ret = 0;
+
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+       case BKEY_TYPE_EXTENTS:
+               if (initial) {
+                       ret = bch2_btree_mark_ptrs_initial(c, type, k);
+                       if (ret < 0)
+                               return ret;
+               }
+               break;
+       default:
+               break;
+       }
+
+       bch2_mark_key(c, type, k, true, k.k->size,
+                     pos, NULL, 0, flags);
+
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+       case BKEY_TYPE_EXTENTS:
+               ret = bch2_btree_key_recalc_oldest_gen(c, k);
+               break;
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
+                             bool initial)
 {
        enum bkey_type type = btree_node_type(b);
        struct btree_node_iter iter;
        struct bkey unpacked;
        struct bkey_s_c k;
        u8 stale = 0;
+       int ret;
 
-       if (btree_node_has_ptrs(b))
-               for_each_btree_node_key_unpack(b, k, &iter,
-                                              &unpacked) {
-                       bch2_bkey_debugcheck(c, b, k);
-                       stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
-               }
+       if (!bkey_type_needs_gc(type))
+               return 0;
 
-       return stale;
-}
+       for_each_btree_node_key_unpack(b, k, &iter,
+                                      &unpacked) {
+               bch2_bkey_debugcheck(c, b, k);
 
-static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-{
-       write_seqcount_begin(&c->gc_pos_lock);
-       c->gc_pos = new_pos;
-       write_seqcount_end(&c->gc_pos_lock);
-}
+               ret = bch2_gc_mark_key(c, type, k, initial);
+               if (ret < 0)
+                       return ret;
 
-static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-{
-       BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
-       __gc_pos_set(c, new_pos);
+               stale = max_t(u8, stale, ret);
+       }
+
+       return stale;
 }
 
-static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
+static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
+                        bool initial)
 {
        struct btree_iter iter;
        struct btree *b;
        struct range_checks r;
-       unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1;
+       unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
        unsigned max_stale;
        int ret = 0;
 
@@ -254,8 +287,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
 
        /*
         * if expensive_debug_checks is on, run range_checks on all leaf nodes:
+        *
+        * and on startup, we have to read every btree node (XXX: only if it was
+        * an unclean shutdown)
         */
-       if (expensive_debug_checks(c))
+       if (initial || expensive_debug_checks(c))
                depth = 0;
 
        btree_node_range_checks_init(&r, depth);
@@ -266,22 +302,24 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
 
                bch2_verify_btree_nr_keys(b);
 
-               max_stale = btree_gc_mark_node(c, b);
+               max_stale = btree_gc_mark_node(c, b, initial);
 
                gc_pos_set(c, gc_pos_btree_node(b));
 
-               if (max_stale > 64)
-                       bch2_btree_node_rewrite(c, &iter,
-                                       b->data->keys.seq,
-                                       BTREE_INSERT_USE_RESERVE|
-                                       BTREE_INSERT_NOWAIT|
-                                       BTREE_INSERT_GC_LOCK_HELD);
-               else if (!btree_gc_rewrite_disabled(c) &&
-                        (btree_gc_always_rewrite(c) || max_stale > 16))
-                       bch2_btree_node_rewrite(c, &iter,
-                                       b->data->keys.seq,
-                                       BTREE_INSERT_NOWAIT|
-                                       BTREE_INSERT_GC_LOCK_HELD);
+               if (!initial) {
+                       if (max_stale > 64)
+                               bch2_btree_node_rewrite(c, &iter,
+                                               b->data->keys.seq,
+                                               BTREE_INSERT_USE_RESERVE|
+                                               BTREE_INSERT_NOWAIT|
+                                               BTREE_INSERT_GC_LOCK_HELD);
+                       else if (!btree_gc_rewrite_disabled(c) &&
+                                (btree_gc_always_rewrite(c) || max_stale > 16))
+                               bch2_btree_node_rewrite(c, &iter,
+                                               b->data->keys.seq,
+                                               BTREE_INSERT_NOWAIT|
+                                               BTREE_INSERT_GC_LOCK_HELD);
+               }
 
                bch2_btree_iter_cond_resched(&iter);
        }
@@ -293,13 +331,47 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
 
        b = c->btree_roots[btree_id].b;
        if (!btree_node_fake(b))
-               bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
+               bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
+                                bkey_i_to_s_c(&b->key), initial);
        gc_pos_set(c, gc_pos_btree_root(b->btree_id));
 
        mutex_unlock(&c->btree_root_lock);
        return 0;
 }
 
+static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
+                         bool initial)
+{
+       unsigned i;
+
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               enum bkey_type type = bkey_type(0, i);
+
+               int ret = bch2_gc_btree(c, i, initial);
+               if (ret)
+                       return ret;
+
+               if (journal && bkey_type_needs_gc(type)) {
+                       struct bkey_i *k, *n;
+                       struct jset_entry *j;
+                       struct journal_replay *r;
+                       int ret;
+
+                       list_for_each_entry(r, journal, list)
+                               for_each_jset_key(k, n, j, &r->j) {
+                                       if (type == bkey_type(j->level, j->btree_id)) {
+                                               ret = bch2_gc_mark_key(c, type,
+                                                       bkey_i_to_s_c(k), initial);
+                                               if (ret < 0)
+                                                       return ret;
+                                       }
+                               }
+               }
+       }
+
+       return 0;
+}
+
 static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
                                  u64 start, u64 end,
                                  enum bch_data_type type,
@@ -395,10 +467,10 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
 
        for_each_pending_btree_node_free(c, as, d)
                if (d->index_update_done)
-                       bch2_mark_key(c, bkey_i_to_s_c(&d->key),
-                                     c->opts.btree_node_size,
-                                     BCH_DATA_BTREE, pos,
-                                     &stats, 0,
+                       bch2_mark_key(c, BKEY_TYPE_BTREE,
+                                     bkey_i_to_s_c(&d->key),
+                                     true, 0,
+                                     pos, &stats, 0,
                                      BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
                                      BCH_BUCKET_MARK_GC_LOCK_HELD);
        /*
@@ -522,6 +594,7 @@ void bch2_gc(struct bch_fs *c)
        struct bch_dev *ca;
        u64 start_time = local_clock();
        unsigned i;
+       int ret;
 
        /*
         * Walk _all_ references to buckets, and recompute them:
@@ -557,14 +630,11 @@ void bch2_gc(struct bch_fs *c)
 
        bch2_mark_superblocks(c);
 
-       /* Walk btree: */
-       for (i = 0; i < BTREE_ID_NR; i++) {
-               int ret = bch2_gc_btree(c, i);
-               if (ret) {
-                       bch_err(c, "btree gc failed: %d", ret);
-                       set_bit(BCH_FS_GC_FAILURE, &c->flags);
-                       goto out;
-               }
+       ret = bch2_gc_btrees(c, NULL, false);
+       if (ret) {
+               bch_err(c, "btree gc failed: %d", ret);
+               set_bit(BCH_FS_GC_FAILURE, &c->flags);
+               goto out;
        }
 
        bch2_mark_pending_btree_node_frees(c);
@@ -1006,58 +1076,9 @@ int bch2_gc_thread_start(struct bch_fs *c)
 
 /* Initial GC computes bucket marks during startup */
 
-static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
-{
-       struct btree_iter iter;
-       struct btree *b;
-       struct range_checks r;
-       int ret = 0;
-
-       btree_node_range_checks_init(&r, 0);
-
-       gc_pos_set(c, gc_pos_btree(id, POS_MIN, 0));
-
-       if (!c->btree_roots[id].b)
-               return 0;
-
-       b = c->btree_roots[id].b;
-       if (!btree_node_fake(b))
-               ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
-                                                 bkey_i_to_s_c(&b->key));
-       if (ret)
-               return ret;
-
-       /*
-        * We have to hit every btree node before starting journal replay, in
-        * order for the journal seq blacklist machinery to work:
-        */
-       for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
-               btree_node_range_checks(c, b, &r);
-
-               if (btree_node_has_ptrs(b)) {
-                       struct btree_node_iter node_iter;
-                       struct bkey unpacked;
-                       struct bkey_s_c k;
-
-                       for_each_btree_node_key_unpack(b, k, &node_iter,
-                                                      &unpacked) {
-                               ret = bch2_btree_mark_key_initial(c,
-                                                       btree_node_type(b), k);
-                               if (ret)
-                                       goto err;
-                       }
-               }
-
-               bch2_btree_iter_cond_resched(&iter);
-       }
-err:
-       return bch2_btree_iter_unlock(&iter) ?: ret;
-}
-
 int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
 {
        unsigned iter = 0;
-       enum btree_id id;
        int ret = 0;
 
        down_write(&c->gc_lock);
@@ -1066,13 +1087,7 @@ again:
 
        bch2_mark_superblocks(c);
 
-       for (id = 0; id < BTREE_ID_NR; id++) {
-               ret = bch2_initial_gc_btree(c, id);
-               if (ret)
-                       goto err;
-       }
-
-       ret = bch2_journal_mark(c, journal);
+       ret = bch2_gc_btrees(c, journal, true);
        if (ret)
                goto err;
 
index 214a3fe3aabed2c0ca6d311f5c438046b01b6fbf..f9225af21060476d5bee8c631ba6373bcb994061 100644 (file)
@@ -11,8 +11,6 @@ void bch2_gc_thread_stop(struct bch_fs *);
 int bch2_gc_thread_start(struct bch_fs *);
 int bch2_initial_gc(struct bch_fs *, struct list_head *);
 u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
-int bch2_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
-                               struct bkey_s_c);
 void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
 
 /*
index a4f184f3f8c88aa416b969efe95247b043513250..beab463b51e1221fa52fe33edf503ff4e08beebb 100644 (file)
@@ -35,7 +35,7 @@ void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
                                 __btree_node_key_to_offset(b, end)
                         });
 
-               __heap_add(iter, n, btree_node_iter_cmp_heap);
+               __heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
        }
 }
 
@@ -48,9 +48,9 @@ void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
        EBUG_ON(iter->data->k > iter->data->end);
 
        if (iter->data->k == iter->data->end)
-               heap_del(iter, 0, btree_node_iter_cmp_heap);
+               heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
        else
-               heap_sift_down(iter, 0, btree_node_iter_cmp_heap);
+               heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
 }
 
 static void verify_no_dups(struct btree *b,
@@ -1345,11 +1345,9 @@ static void btree_node_read_work(struct work_struct *work)
        struct bch_dev *ca      = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
        struct btree *b         = rb->bio.bi_private;
        struct bio *bio         = &rb->bio;
-       struct bch_devs_mask avoid;
+       struct bch_io_failures failed = { .nr = 0 };
        bool can_retry;
 
-       memset(&avoid, 0, sizeof(avoid));
-
        goto start;
        while (1) {
                bch_info(c, "retrying read");
@@ -1372,8 +1370,9 @@ start:
                        percpu_ref_put(&ca->io_ref);
                rb->have_ioref = false;
 
-               __set_bit(rb->pick.ptr.dev, avoid.d);
-               can_retry = bch2_btree_pick_ptr(c, b, &avoid, &rb->pick) > 0;
+               bch2_mark_io_failure(&failed, &rb->pick);
+
+               can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
 
                if (!bio->bi_status &&
                    !bch2_btree_node_read_done(c, b, can_retry))
@@ -1408,7 +1407,7 @@ static void btree_node_read_endio(struct bio *bio)
 void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
                          bool sync)
 {
-       struct extent_pick_ptr pick;
+       struct extent_ptr_decoded pick;
        struct btree_read_bio *rb;
        struct bch_dev *ca;
        struct bio *bio;
@@ -1425,7 +1424,9 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
 
        ca = bch_dev_bkey_exists(c, pick.ptr.dev);
 
-       bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
+       bio = bio_alloc_bioset(GFP_NOIO, buf_pages(b->data,
+                                                  btree_bytes(c)),
+                              &c->btree_bio);
        rb = container_of(bio, struct btree_read_bio, bio);
        rb->c                   = c;
        rb->start_time          = local_clock();
@@ -1568,9 +1569,9 @@ retry:
 
        new_key = bkey_i_to_extent(&tmp.k);
        e = extent_i_to_s(new_key);
-       extent_for_each_ptr_backwards(e, ptr)
-               if (bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev))
-                       bch2_extent_drop_ptr(e, ptr);
+
+       bch2_extent_drop_ptrs(e, ptr,
+               bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
 
        if (!bch2_extent_nr_ptrs(e.c))
                goto err;
@@ -1880,7 +1881,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
 
        trace_btree_write(b, bytes_to_write, sectors_to_write);
 
-       wbio = container_of(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->btree_bio),
+       wbio = container_of(bio_alloc_bioset(GFP_NOIO,
+                               buf_pages(data, sectors_to_write << 9),
+                               &c->btree_bio),
                            struct btree_write_bio, wbio.bio);
        wbio_init(&wbio->wbio.bio);
        wbio->data                      = data;
index ccd47326d16bbd739abee3a1dda9430072fcde6e..48833a9883fc772c1f58624475135fbe102e7667 100644 (file)
@@ -14,7 +14,7 @@ struct btree_read_bio {
        struct bch_fs           *c;
        u64                     start_time;
        unsigned                have_ioref:1;
-       struct extent_pick_ptr  pick;
+       struct extent_ptr_decoded       pick;
        struct work_struct      work;
        struct bio              bio;
 };
index e20dd7a2be8d3e5a57d2649277c0814ad70c4633..44349159dfbe0d205e57ca27937b005ed469cee4 100644 (file)
@@ -414,11 +414,6 @@ static inline const struct bkey_ops *btree_node_ops(struct btree *b)
        return &bch2_bkey_ops[btree_node_type(b)];
 }
 
-static inline bool btree_node_has_ptrs(struct btree *b)
-{
-       return btree_type_has_ptrs(btree_node_type(b));
-}
-
 static inline bool btree_node_is_extents(struct btree *b)
 {
        return btree_node_type(b) == BKEY_TYPE_EXTENTS;
index 6d3fab8e767ef0214abb811ce1c919839b4f5d02..0a9d6919ccd334f615f952a490ee255ee5804dad 100644 (file)
@@ -210,11 +210,12 @@ found:
        if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
                struct bch_fs_usage tmp = { 0 };
 
-               bch2_mark_key(c, bkey_i_to_s_c(&d->key),
-                            -c->opts.btree_node_size, BCH_DATA_BTREE, b
-                            ? gc_pos_btree_node(b)
-                            : gc_pos_btree_root(as->btree_id),
-                            &tmp, 0, 0);
+               bch2_mark_key(c, BKEY_TYPE_BTREE,
+                             bkey_i_to_s_c(&d->key),
+                             false, 0, b
+                             ? gc_pos_btree_node(b)
+                             : gc_pos_btree_root(as->btree_id),
+                             &tmp, 0, 0);
                /*
                 * Don't apply tmp - pending deletes aren't tracked in
                 * bch_alloc_stats:
@@ -289,10 +290,11 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
 
        BUG_ON(!pending->index_update_done);
 
-       bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
-                    -c->opts.btree_node_size, BCH_DATA_BTREE,
-                    gc_phase(GC_PHASE_PENDING_DELETE),
-                    &stats, 0, 0);
+       bch2_mark_key(c, BKEY_TYPE_BTREE,
+                     bkey_i_to_s_c(&pending->key),
+                     false, 0,
+                     gc_phase(GC_PHASE_PENDING_DELETE),
+                     &stats, 0, 0);
        /*
         * Don't apply stats - pending deletes aren't tracked in
         * bch_alloc_stats:
@@ -550,7 +552,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
                        goto err_free;
                }
 
-               ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
                                              bkey_i_to_s_c(&b->key));
                if (ret)
                        goto err_free;
@@ -1091,8 +1093,9 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
 
        __bch2_btree_set_root_inmem(c, b);
 
-       bch2_mark_key(c, bkey_i_to_s_c(&b->key),
-                     c->opts.btree_node_size, BCH_DATA_BTREE,
+       bch2_mark_key(c, BKEY_TYPE_BTREE,
+                     bkey_i_to_s_c(&b->key),
+                     true, 0,
                      gc_pos_btree_root(b->btree_id),
                      &stats, 0, 0);
 
@@ -1179,9 +1182,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
        BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
 
        if (bkey_extent_is_data(&insert->k))
-               bch2_mark_key(c, bkey_i_to_s_c(insert),
-                            c->opts.btree_node_size, BCH_DATA_BTREE,
-                            gc_pos_btree_node(b), &stats, 0, 0);
+               bch2_mark_key(c, BKEY_TYPE_BTREE,
+                             bkey_i_to_s_c(insert),
+                             true, 0,
+                             gc_pos_btree_node(b), &stats, 0, 0);
 
        while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
               bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@@ -1966,8 +1970,9 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 
                bch2_btree_node_lock_write(b, iter);
 
-               bch2_mark_key(c, bkey_i_to_s_c(&new_key->k_i),
-                             c->opts.btree_node_size, BCH_DATA_BTREE,
+               bch2_mark_key(c, BKEY_TYPE_BTREE,
+                             bkey_i_to_s_c(&new_key->k_i),
+                             true, 0,
                              gc_pos_btree_root(b->btree_id),
                              &stats, 0, 0);
                bch2_btree_node_free_index(as, NULL,
@@ -2062,7 +2067,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                        goto err;
        }
 
-       ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
                                      extent_i_to_s_c(new_key).s_c);
        if (ret)
                goto err_free_update;
index 052e8af8708a7f71672164bd5e99179171413328..271c02f1a5a70be8c05dd054246320679461b59f 100644 (file)
@@ -533,27 +533,12 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
                                    crc.uncompressed_size));
 }
 
-/*
- * Checking against gc's position has to be done here, inside the cmpxchg()
- * loop, to avoid racing with the start of gc clearing all the marks - GC does
- * that with the gc pos seqlock held.
- */
-static void bch2_mark_pointer(struct bch_fs *c,
-                             struct bkey_s_c_extent e,
-                             const struct bch_extent_ptr *ptr,
-                             struct bch_extent_crc_unpacked crc,
-                             s64 sectors, enum bch_data_type data_type,
-                             unsigned replicas,
-                             struct bch_fs_usage *fs_usage,
-                             u64 journal_seq, unsigned flags)
+static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
+                           struct extent_ptr_decoded p,
+                           s64 sectors)
 {
-       struct bucket_mark old, new;
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-       struct bucket *g = PTR_BUCKET(ca, ptr);
-       s64 uncompressed_sectors = sectors;
-       u64 v;
 
-       if (crc.compression_type) {
+       if (p.crc.compression_type) {
                unsigned old_sectors, new_sectors;
 
                if (sectors > 0) {
@@ -564,23 +549,29 @@ static void bch2_mark_pointer(struct bch_fs *c,
                        new_sectors = e.k->size + sectors;
                }
 
-               sectors = -__disk_sectors(crc, old_sectors)
-                         +__disk_sectors(crc, new_sectors);
+               sectors = -__disk_sectors(p.crc, old_sectors)
+                         +__disk_sectors(p.crc, new_sectors);
        }
 
-       /*
-        * fs level usage (which determines free space) is in uncompressed
-        * sectors, until copygc + compression is sorted out:
-        *
-        * note also that we always update @fs_usage, even when we otherwise
-        * wouldn't do anything because gc is running - this is because the
-        * caller still needs to account w.r.t. its disk reservation. It is
-        * caller's responsibility to not apply @fs_usage if gc is in progress.
-        */
-       fs_usage->replicas
-               [!ptr->cached && replicas ? replicas - 1 : 0].data
-               [!ptr->cached ? data_type : BCH_DATA_CACHED] +=
-                       uncompressed_sectors;
+       return sectors;
+}
+
+/*
+ * Checking against gc's position has to be done here, inside the cmpxchg()
+ * loop, to avoid racing with the start of gc clearing all the marks - GC does
+ * that with the gc pos seqlock held.
+ */
+static void bch2_mark_pointer(struct bch_fs *c,
+                             struct bkey_s_c_extent e,
+                             struct extent_ptr_decoded p,
+                             s64 sectors, enum bch_data_type data_type,
+                             struct bch_fs_usage *fs_usage,
+                             u64 journal_seq, unsigned flags)
+{
+       struct bucket_mark old, new;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+       struct bucket *g = PTR_BUCKET(ca, &p.ptr);
+       u64 v;
 
        if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
                if (journal_seq)
@@ -601,14 +592,14 @@ static void bch2_mark_pointer(struct bch_fs *c,
                 * the allocator invalidating a bucket after we've already
                 * checked the gen
                 */
-               if (gen_after(new.gen, ptr->gen)) {
+               if (gen_after(new.gen, p.ptr.gen)) {
                        BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags));
-                       EBUG_ON(!ptr->cached &&
+                       EBUG_ON(!p.ptr.cached &&
                                test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
                        return;
                }
 
-               if (!ptr->cached)
+               if (!p.ptr.cached)
                        checked_add(new.dirty_sectors, sectors);
                else
                        checked_add(new.cached_sectors, sectors);
@@ -639,16 +630,64 @@ static void bch2_mark_pointer(struct bch_fs *c,
               bucket_became_unavailable(c, old, new));
 }
 
-void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
-                  s64 sectors, enum bch_data_type data_type,
-                  struct gc_pos pos,
-                  struct bch_fs_usage *stats,
-                  u64 journal_seq, unsigned flags)
+static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
+                            s64 sectors, enum bch_data_type data_type,
+                            struct gc_pos pos,
+                            struct bch_fs_usage *stats,
+                            u64 journal_seq, unsigned flags)
 {
        unsigned replicas = bch2_extent_nr_dirty_ptrs(k);
 
        BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas));
+       BUG_ON(!sectors);
+
+       switch (k.k->type) {
+       case BCH_EXTENT:
+       case BCH_EXTENT_CACHED: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
+
+               extent_for_each_ptr_decode(e, p, entry) {
+                       s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
+
+                       /*
+                        * fs level usage (which determines free space) is in
+                        * uncompressed sectors, until copygc + compression is
+                        * sorted out:
+                        *
+                        * note also that we always update @fs_usage, even when
+                        * we otherwise wouldn't do anything because gc is
+                        * running - this is because the caller still needs to
+                        * account w.r.t. its disk reservation. It is caller's
+                        * responsibility to not apply @fs_usage if gc is in
+                        * progress.
+                        */
+                       stats->replicas
+                               [!p.ptr.cached && replicas ? replicas - 1 : 0].data
+                               [!p.ptr.cached ? data_type : BCH_DATA_CACHED] +=
+                                       sectors;
+
+                       bch2_mark_pointer(c, e, p, disk_sectors, data_type,
+                                         stats, journal_seq, flags);
+               }
+               break;
+       }
+       case BCH_RESERVATION:
+               if (replicas)
+                       stats->replicas[replicas - 1].persistent_reserved +=
+                               sectors * replicas;
+               break;
+       }
+}
 
+void bch2_mark_key(struct bch_fs *c,
+                  enum bkey_type type, struct bkey_s_c k,
+                  bool inserting, s64 sectors,
+                  struct gc_pos pos,
+                  struct bch_fs_usage *stats,
+                  u64 journal_seq, unsigned flags)
+{
        /*
         * synchronization w.r.t. GC:
         *
@@ -685,24 +724,19 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
        if (!stats)
                stats = this_cpu_ptr(c->usage_percpu);
 
-       switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED: {
-               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-               const struct bch_extent_ptr *ptr;
-               struct bch_extent_crc_unpacked crc;
-
-               BUG_ON(!sectors);
-
-               extent_for_each_ptr_crc(e, ptr, crc)
-                       bch2_mark_pointer(c, e, ptr, crc, sectors, data_type,
-                                         replicas, stats, journal_seq, flags);
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+               bch2_mark_extent(c, k, inserting
+                                ?  c->opts.btree_node_size
+                                : -c->opts.btree_node_size,
+                                BCH_DATA_BTREE,
+                                pos, stats, journal_seq, flags);
                break;
-       }
-       case BCH_RESERVATION:
-               if (replicas)
-                       stats->replicas[replicas - 1].persistent_reserved +=
-                               sectors * replicas;
+       case BKEY_TYPE_EXTENTS:
+               bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
+                                pos, stats, journal_seq, flags);
+               break;
+       default:
                break;
        }
        percpu_up_read_preempt_enable(&c->usage_lock);
index ff86d23e15e47aae7d6e7d8757814ab8ec53b311..d9fe938af4b4f61e1bf1a09d0b3491ef1b2a2efc 100644 (file)
@@ -203,8 +203,9 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
 #define BCH_BUCKET_MARK_GC_WILL_VISIT          (1 << 2)
 #define BCH_BUCKET_MARK_GC_LOCK_HELD           (1 << 3)
 
-void bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64, enum bch_data_type,
-                  struct gc_pos, struct bch_fs_usage *, u64, unsigned);
+void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
+                  bool, s64, struct gc_pos,
+                  struct bch_fs_usage *, u64, unsigned);
 
 void bch2_recalc_sectors_available(struct bch_fs *);
 
index c67376f96f5ae635d63840654f4254db6226d5a6..90b10cef60b8ccd06abc9f903677ad6e5d96779c 100644 (file)
@@ -21,7 +21,7 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
                if (clock->timers.data[i] == timer)
                        goto out;
 
-       BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp));
+       BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
 out:
        spin_unlock(&clock->timer_lock);
 }
@@ -34,7 +34,7 @@ void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
 
        for (i = 0; i < clock->timers.used; i++)
                if (clock->timers.data[i] == timer) {
-                       heap_del(&clock->timers, i, io_timer_cmp);
+                       heap_del(&clock->timers, i, io_timer_cmp, NULL);
                        break;
                }
 
@@ -127,7 +127,7 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
 
        if (clock->timers.used &&
            time_after_eq(now, clock->timers.data[0]->expire))
-               heap_pop(&clock->timers, ret, io_timer_cmp);
+               heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
 
        spin_unlock(&clock->timer_lock);
 
index 71f649bc4c7f07bbd20b26ec4513cd259db05773..f69d76ec1e78480fbe011e487304eabf0550af56 100644 (file)
@@ -35,7 +35,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
        struct btree *v = c->verify_data;
        struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
        struct bset *sorted, *inmemory;
-       struct extent_pick_ptr pick;
+       struct extent_ptr_decoded pick;
        struct bch_dev *ca;
        struct bio *bio;
 
@@ -62,7 +62,9 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
        if (!bch2_dev_get_ioref(ca, READ))
                return;
 
-       bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
+       bio = bio_alloc_bioset(GFP_NOIO,
+                       buf_pages(n_sorted, btree_bytes(c)),
+                       &c->btree_bio);
        bio_set_dev(bio, ca->disk_sb.bdev);
        bio->bi_opf             = REQ_OP_READ|REQ_META;
        bio->bi_iter.bi_sector  = pick.ptr.offset;
index a4d7e52bcbd8da36cc27926084cf0fbfe0fde520..6eaa89c9259553e7156029fceb2fc846f35e95a0 100644 (file)
@@ -88,7 +88,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
 
        memset(&nr, 0, sizeof(nr));
 
-       heap_resort(iter, key_sort_cmp);
+       heap_resort(iter, key_sort_cmp, NULL);
 
        while (!bch2_btree_node_iter_large_end(iter)) {
                if (!should_drop_next_key(iter, b)) {
@@ -101,7 +101,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
                }
 
                sort_key_next(iter, b, iter->data);
-               heap_sift_down(iter, 0, key_sort_cmp);
+               heap_sift_down(iter, 0, key_sort_cmp, NULL);
        }
 
        dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
@@ -122,20 +122,11 @@ bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
        return NULL;
 }
 
-bool bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
+void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
 {
        struct bch_extent_ptr *ptr;
-       bool dropped = false;
 
-       extent_for_each_ptr_backwards(e, ptr)
-               if (ptr->dev == dev) {
-                       __bch2_extent_drop_ptr(e, ptr);
-                       dropped = true;
-               }
-
-       if (dropped)
-               bch2_extent_drop_redundant_crcs(e);
-       return dropped;
+       bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev);
 }
 
 const struct bch_extent_ptr *
@@ -231,21 +222,21 @@ unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
 
 unsigned bch2_extent_is_compressed(struct bkey_s_c k)
 {
-       struct bkey_s_c_extent e;
-       const struct bch_extent_ptr *ptr;
-       struct bch_extent_crc_unpacked crc;
        unsigned ret = 0;
 
        switch (k.k->type) {
        case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               e = bkey_s_c_to_extent(k);
+       case BCH_EXTENT_CACHED: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
 
-               extent_for_each_ptr_crc(e, ptr, crc)
-                       if (!ptr->cached &&
-                           crc.compression_type != BCH_COMPRESSION_NONE &&
-                           crc.compressed_size < crc.live_size)
-                               ret = max_t(unsigned, ret, crc.compressed_size);
+               extent_for_each_ptr_decode(e, p, entry)
+                       if (!p.ptr.cached &&
+                           p.crc.compression_type != BCH_COMPRESSION_NONE &&
+                           p.crc.compressed_size < p.crc.live_size)
+                               ret = max_t(unsigned, ret, p.crc.compressed_size);
+       }
        }
 
        return ret;
@@ -254,34 +245,50 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k)
 bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e,
                             struct bch_extent_ptr m, u64 offset)
 {
-       const struct bch_extent_ptr *ptr;
-       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
 
-       extent_for_each_ptr_crc(e, ptr, crc)
-               if (ptr->dev    == m.dev &&
-                   ptr->gen    == m.gen &&
-                   (s64) ptr->offset + crc.offset - bkey_start_offset(e.k) ==
+       extent_for_each_ptr_decode(e, p, entry)
+               if (p.ptr.dev   == m.dev &&
+                   p.ptr.gen   == m.gen &&
+                   (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(e.k) ==
                    (s64) m.offset  - offset)
-                       return ptr;
+                       return true;
 
-       return NULL;
+       return false;
 }
 
-/* Doesn't cleanup redundant crcs */
-void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
+union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
+                                            struct bch_extent_ptr *ptr)
 {
+       union bch_extent_entry *dst;
+       union bch_extent_entry *src;
+
        EBUG_ON(ptr < &e.v->start->ptr ||
                ptr >= &extent_entry_last(e)->ptr);
        EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
-       memmove_u64s_down(ptr, ptr + 1,
-                         (u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
-       e.k->u64s -= sizeof(*ptr) / sizeof(u64);
-}
 
-void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
-{
-       __bch2_extent_drop_ptr(e, ptr);
-       bch2_extent_drop_redundant_crcs(e);
+       src = to_entry(ptr + 1);
+
+       if (src != extent_entry_last(e) &&
+           extent_entry_type(src) == BCH_EXTENT_ENTRY_ptr) {
+               dst = to_entry(ptr);
+       } else {
+               extent_for_each_entry(e, dst) {
+                       if (dst == to_entry(ptr))
+                               break;
+
+                       if (extent_entry_next(dst) == to_entry(ptr) &&
+                           extent_entry_is_crc(dst))
+                               break;
+               }
+       }
+
+       memmove_u64s_down(dst, src,
+                         (u64 *) extent_entry_last(e) - (u64 *) src);
+       e.k->u64s -= (u64 *) src - (u64 *) dst;
+
+       return dst;
 }
 
 static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
@@ -323,38 +330,38 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e,
                             struct bch_extent_crc_unpacked n)
 {
        struct bch_extent_crc_unpacked u;
-       struct bch_extent_ptr *ptr;
+       struct extent_ptr_decoded p;
        union bch_extent_entry *i;
+       bool ret = false;
 
        /* Find a checksum entry that covers only live data: */
-       if (!n.csum_type)
+       if (!n.csum_type) {
                extent_for_each_crc(extent_i_to_s(e), u, i)
                        if (!u.compression_type &&
                            u.csum_type &&
                            u.live_size == u.uncompressed_size) {
                                n = u;
-                               break;
+                               goto found;
                        }
-
-       if (!bch2_can_narrow_extent_crcs(extent_i_to_s_c(e), n))
                return false;
-
+       }
+found:
        BUG_ON(n.compression_type);
        BUG_ON(n.offset);
        BUG_ON(n.live_size != e->k.size);
 
-       bch2_extent_crc_append(e, n);
 restart_narrow_pointers:
-       extent_for_each_ptr_crc(extent_i_to_s(e), ptr, u)
-               if (can_narrow_crc(u, n)) {
-                       ptr->offset += u.offset;
-                       extent_ptr_append(e, *ptr);
-                       __bch2_extent_drop_ptr(extent_i_to_s(e), ptr);
+       extent_for_each_ptr_decode(extent_i_to_s(e), p, i)
+               if (can_narrow_crc(p.crc, n)) {
+                       bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr);
+                       p.ptr.offset += p.crc.offset;
+                       p.crc = n;
+                       bch2_extent_ptr_decoded_append(e, &p);
+                       ret = true;
                        goto restart_narrow_pointers;
                }
 
-       bch2_extent_drop_redundant_crcs(extent_i_to_s(e));
-       return true;
+       return ret;
 }
 
 /* returns true if not equal */
@@ -371,87 +378,13 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
                bch2_crc_cmp(l.csum, r.csum));
 }
 
-void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e)
-{
-       union bch_extent_entry *entry = e.v->start;
-       union bch_extent_crc *crc, *prev = NULL;
-       struct bch_extent_crc_unpacked u, prev_u = { 0 };
-
-       while (entry != extent_entry_last(e)) {
-               union bch_extent_entry *next = extent_entry_next(entry);
-               size_t crc_u64s = extent_entry_u64s(entry);
-
-               if (!extent_entry_is_crc(entry))
-                       goto next;
-
-               crc = entry_to_crc(entry);
-               u = bch2_extent_crc_unpack(e.k, crc);
-
-               if (next == extent_entry_last(e)) {
-                       /* crc entry with no pointers after it: */
-                       goto drop;
-               }
-
-               if (extent_entry_is_crc(next)) {
-                       /* no pointers before next crc entry: */
-                       goto drop;
-               }
-
-               if (prev && !bch2_crc_unpacked_cmp(u, prev_u)) {
-                       /* identical to previous crc entry: */
-                       goto drop;
-               }
-
-               if (!prev &&
-                   !u.csum_type &&
-                   !u.compression_type) {
-                       /* null crc entry: */
-                       union bch_extent_entry *e2;
-
-                       extent_for_each_entry_from(e, e2, extent_entry_next(entry)) {
-                               if (!extent_entry_is_ptr(e2))
-                                       break;
-
-                               e2->ptr.offset += u.offset;
-                       }
-                       goto drop;
-               }
-
-               prev = crc;
-               prev_u = u;
-next:
-               entry = next;
-               continue;
-drop:
-               memmove_u64s_down(crc, next,
-                                 (u64 *) extent_entry_last(e) - (u64 *) next);
-               e.k->u64s -= crc_u64s;
-       }
-
-       EBUG_ON(bkey_val_u64s(e.k) && !bch2_extent_nr_ptrs(e.c));
-}
-
-static bool should_drop_ptr(const struct bch_fs *c,
-                           struct bkey_s_c_extent e,
-                           const struct bch_extent_ptr *ptr)
-{
-       return ptr->cached && ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr);
-}
-
 static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
 {
-       struct bch_extent_ptr *ptr = &e.v->start->ptr;
-       bool dropped = false;
-
-       while ((ptr = extent_ptr_next(e, ptr)))
-               if (should_drop_ptr(c, e.c, ptr)) {
-                       __bch2_extent_drop_ptr(e, ptr);
-                       dropped = true;
-               } else
-                       ptr++;
+       struct bch_extent_ptr *ptr;
 
-       if (dropped)
-               bch2_extent_drop_redundant_crcs(e);
+       bch2_extent_drop_ptrs(e, ptr,
+               ptr->cached &&
+               ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
 }
 
 bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k)
@@ -475,6 +408,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
                     entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
                     entry = extent_entry_next(entry)) {
                        switch (extent_entry_type(entry)) {
+                       case BCH_EXTENT_ENTRY_ptr:
+                               break;
                        case BCH_EXTENT_ENTRY_crc32:
                                entry->crc32.csum = swab32(entry->crc32.csum);
                                break;
@@ -488,8 +423,6 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
                                entry->crc128.csum.lo = (__force __le64)
                                        swab64((__force u64) entry->crc128.csum.lo);
                                break;
-                       case BCH_EXTENT_ENTRY_ptr:
-                               break;
                        }
                }
                break;
@@ -586,12 +519,45 @@ out:
        return out - buf;
 }
 
-static inline bool dev_latency_better(struct bch_fs *c,
-                             const struct bch_extent_ptr *ptr1,
-                             const struct bch_extent_ptr *ptr2)
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+                                                  unsigned dev)
+{
+       struct bch_dev_io_failures *i;
+
+       for (i = f->devs; i < f->devs + f->nr; i++)
+               if (i->dev == dev)
+                       return i;
+
+       return NULL;
+}
+
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+                         struct extent_ptr_decoded *p)
+{
+       struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+       if (!f) {
+               BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+               f = &failed->devs[failed->nr++];
+               f->dev          = p->ptr.dev;
+               f->nr_failed    = 1;
+               f->nr_retries   = 0;
+       } else {
+               f->nr_failed++;
+       }
+}
+
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+                             const struct extent_ptr_decoded p1,
+                             const struct extent_ptr_decoded p2)
 {
-       struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev);
-       struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev);
+       struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+       struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
        u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
        u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
 
@@ -602,31 +568,29 @@ static inline bool dev_latency_better(struct bch_fs *c,
 
 static int extent_pick_read_device(struct bch_fs *c,
                                   struct bkey_s_c_extent e,
-                                  struct bch_devs_mask *avoid,
-                                  struct extent_pick_ptr *pick)
+                                  struct bch_io_failures *failed,
+                                  struct extent_ptr_decoded *pick)
 {
-       const struct bch_extent_ptr *ptr;
-       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct bch_dev_io_failures *f;
        struct bch_dev *ca;
        int ret = 0;
 
-       extent_for_each_ptr_crc(e, ptr, crc) {
-               ca = bch_dev_bkey_exists(c, ptr->dev);
+       extent_for_each_ptr_decode(e, p, entry) {
+               ca = bch_dev_bkey_exists(c, p.ptr.dev);
 
-               if (ptr->cached && ptr_stale(ca, ptr))
+               if (p.ptr.cached && ptr_stale(ca, &p.ptr))
                        continue;
 
-               if (avoid && test_bit(ptr->dev, avoid->d))
+               f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+               if (f && f->nr_failed >= f->nr_retries)
                        continue;
 
-               if (ret && !dev_latency_better(c, ptr, &pick->ptr))
+               if (ret && !ptr_better(c, p, *pick))
                        continue;
 
-               *pick = (struct extent_pick_ptr) {
-                       .ptr    = *ptr,
-                       .crc    = crc,
-               };
-
+               *pick = p;
                ret = 1;
        }
 
@@ -715,7 +679,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
                        goto err;
        }
 
-       if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) {
+       if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
                bch2_bkey_val_to_text(c, btree_node_type(b),
                                     buf, sizeof(buf), k);
                bch2_fs_bug(c,
@@ -752,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
 }
 
 int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
-                       struct bch_devs_mask *avoid,
-                       struct extent_pick_ptr *pick)
+                       struct bch_io_failures *failed,
+                       struct extent_ptr_decoded *pick)
 {
        return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
-                                      avoid, pick);
+                                      failed, pick);
 }
 
 /* Extents */
@@ -908,7 +872,7 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
 static inline void extent_sort_sift(struct btree_node_iter_large *iter,
                                    struct btree *b, size_t i)
 {
-       heap_sift_down(iter, i, extent_sort_cmp);
+       heap_sift_down(iter, i, extent_sort_cmp, NULL);
 }
 
 static inline void extent_sort_next(struct btree_node_iter_large *iter,
@@ -916,7 +880,7 @@ static inline void extent_sort_next(struct btree_node_iter_large *iter,
                                    struct btree_node_iter_set *i)
 {
        sort_key_next(iter, b, i);
-       heap_sift_down(iter, i - iter->data, extent_sort_cmp);
+       heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
 }
 
 static void extent_sort_append(struct bch_fs *c,
@@ -964,7 +928,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 
        memset(&nr, 0, sizeof(nr));
 
-       heap_resort(iter, extent_sort_cmp);
+       heap_resort(iter, extent_sort_cmp, NULL);
 
        while (!bch2_btree_node_iter_large_end(iter)) {
                lk = __btree_node_offset_to_key(b, _l->k);
@@ -1076,8 +1040,9 @@ static void bch2_add_sectors(struct extent_insert_state *s,
        if (!sectors)
                return;
 
-       bch2_mark_key(c, k, sectors, BCH_DATA_USER, gc_pos_btree_node(b),
-                     &s->stats, s->trans->journal_res.seq, 0);
+       bch2_mark_key(c, BKEY_TYPE_EXTENTS, k, sectors > 0, sectors,
+                     gc_pos_btree_node(b), &s->stats,
+                     s->trans->journal_res.seq, 0);
 }
 
 static void bch2_subtract_sectors(struct extent_insert_state *s,
@@ -1748,8 +1713,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
                return;
        }
 
-       if (!bkey_extent_is_cached(e.k) &&
-           !bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) {
+       if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
                bch2_bkey_val_to_text(c, btree_node_type(b),
                                     buf, sizeof(buf), e.s_c);
                bch2_fs_bug(c,
@@ -1853,25 +1817,25 @@ static void bch2_extent_crc_init(union bch_extent_crc *crc,
 void bch2_extent_crc_append(struct bkey_i_extent *e,
                            struct bch_extent_crc_unpacked new)
 {
-       struct bch_extent_crc_unpacked crc;
-       const union bch_extent_entry *i;
-
-       BUG_ON(new.compressed_size > new.uncompressed_size);
-       BUG_ON(new.live_size != e->k.size);
-       BUG_ON(!new.compressed_size || !new.uncompressed_size);
+       bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new);
+       __extent_entry_push(e);
+}
 
-       /*
-        * Look up the last crc entry, so we can check if we need to add
-        * another:
-        */
-       extent_for_each_crc(extent_i_to_s(e), crc, i)
-               ;
+void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
+                                   struct extent_ptr_decoded *p)
+{
+       struct bch_extent_crc_unpacked crc;
+       union bch_extent_entry *pos;
 
-       if (!bch2_crc_unpacked_cmp(crc, new))
-               return;
+       extent_for_each_crc(extent_i_to_s(e), crc, pos)
+               if (!bch2_crc_unpacked_cmp(crc, p->crc))
+                       goto found;
 
-       bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new);
-       __extent_entry_push(e);
+       bch2_extent_crc_append(e, p->crc);
+       pos = extent_entry_last(extent_i_to_s(e));
+found:
+       p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+       __extent_entry_insert(e, pos, to_entry(&p->ptr));
 }
 
 /*
@@ -1957,8 +1921,8 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
  * other devices, it will still pick a pointer from avoid.
  */
 int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
-                        struct bch_devs_mask *avoid,
-                        struct extent_pick_ptr *pick)
+                        struct bch_io_failures *failed,
+                        struct extent_ptr_decoded *pick)
 {
        int ret;
 
@@ -1969,7 +1933,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
        case BCH_EXTENT:
        case BCH_EXTENT_CACHED:
                ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
-                                             avoid, pick);
+                                             failed, pick);
 
                if (!ret && !bkey_extent_is_cached(k.k))
                        ret = -EIO;
index 66a02f1c5e5b78de8b427fa03bd0018b20284f7e..e04cb5a9839c5882b78838128620dcdcfb159fac 100644 (file)
@@ -52,13 +52,14 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
                                                     struct btree *,
                                                     struct btree_node_iter_large *);
 
+void bch2_mark_io_failure(struct bch_io_failures *,
+                         struct extent_ptr_decoded *);
 int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
-                       struct bch_devs_mask *avoid,
-                       struct extent_pick_ptr *);
-
+                       struct bch_io_failures *,
+                       struct extent_ptr_decoded *);
 int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
-                        struct bch_devs_mask *,
-                        struct extent_pick_ptr *);
+                        struct bch_io_failures *,
+                        struct extent_ptr_decoded *);
 
 void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
 
@@ -83,7 +84,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
 
 const struct bch_extent_ptr *
 bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
-bool bch2_extent_drop_device(struct bkey_s_extent, unsigned);
+void bch2_extent_drop_device(struct bkey_s_extent, unsigned);
 const struct bch_extent_ptr *
 bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
 const struct bch_extent_ptr *
@@ -161,14 +162,11 @@ extent_entry_type(const union bch_extent_entry *e)
 static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
 {
        switch (extent_entry_type(entry)) {
-       case BCH_EXTENT_ENTRY_crc32:
-               return sizeof(struct bch_extent_crc32);
-       case BCH_EXTENT_ENTRY_crc64:
-               return sizeof(struct bch_extent_crc64);
-       case BCH_EXTENT_ENTRY_crc128:
-               return sizeof(struct bch_extent_crc128);
-       case BCH_EXTENT_ENTRY_ptr:
-               return sizeof(struct bch_extent_ptr);
+#define x(f, n)                                                \
+       case BCH_EXTENT_ENTRY_##f:                      \
+               return sizeof(struct bch_extent_##f);
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
        default:
                BUG();
        }
@@ -181,12 +179,24 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
 
 static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
 {
-       return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
+       switch (extent_entry_type(e)) {
+       case BCH_EXTENT_ENTRY_ptr:
+               return true;
+       default:
+               return false;
+       }
 }
 
 static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
 {
-       return !extent_entry_is_ptr(e);
+       switch (extent_entry_type(e)) {
+       case BCH_EXTENT_ENTRY_crc32:
+       case BCH_EXTENT_ENTRY_crc64:
+       case BCH_EXTENT_ENTRY_crc128:
+               return true;
+       default:
+               return false;
+       }
 }
 
 union bch_extent_crc {
@@ -200,11 +210,13 @@ union bch_extent_crc {
 #define to_entry(_entry)                                               \
 ({                                                                     \
        BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) &&        \
-                    !type_is(_entry, struct bch_extent_ptr *));        \
+                    !type_is(_entry, struct bch_extent_ptr *) &&       \
+                    !type_is(_entry, struct bch_extent_stripe_ptr *)); \
                                                                        \
        __builtin_choose_expr(                                          \
                (type_is_exact(_entry, const union bch_extent_crc *) || \
-                type_is_exact(_entry, const struct bch_extent_ptr *)), \
+                type_is_exact(_entry, const struct bch_extent_ptr *) ||\
+                type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
                (const union bch_extent_entry *) (_entry),              \
                (union bch_extent_entry *) (_entry));                   \
 })
@@ -234,44 +246,6 @@ union bch_extent_crc {
 
 /* checksum entries: */
 
-enum bch_extent_crc_type {
-       BCH_EXTENT_CRC_NONE,
-       BCH_EXTENT_CRC32,
-       BCH_EXTENT_CRC64,
-       BCH_EXTENT_CRC128,
-};
-
-static inline enum bch_extent_crc_type
-__extent_crc_type(const union bch_extent_crc *crc)
-{
-       if (!crc)
-               return BCH_EXTENT_CRC_NONE;
-
-       switch (extent_entry_type(to_entry(crc))) {
-       case BCH_EXTENT_ENTRY_crc32:
-               return BCH_EXTENT_CRC32;
-       case BCH_EXTENT_ENTRY_crc64:
-               return BCH_EXTENT_CRC64;
-       case BCH_EXTENT_ENTRY_crc128:
-               return BCH_EXTENT_CRC128;
-       default:
-               BUG();
-       }
-}
-
-#define extent_crc_type(_crc)                                          \
-({                                                                     \
-       BUILD_BUG_ON(!type_is(_crc, struct bch_extent_crc32 *) &&       \
-                    !type_is(_crc, struct bch_extent_crc64 *) &&       \
-                    !type_is(_crc, struct bch_extent_crc128 *) &&      \
-                    !type_is(_crc, union bch_extent_crc *));           \
-                                                                       \
-         type_is(_crc, struct bch_extent_crc32 *)  ? BCH_EXTENT_CRC32  \
-       : type_is(_crc, struct bch_extent_crc64 *)  ? BCH_EXTENT_CRC64  \
-       : type_is(_crc, struct bch_extent_crc128 *) ? BCH_EXTENT_CRC128 \
-       : __extent_crc_type((union bch_extent_crc *) _crc);             \
-})
-
 static inline struct bch_extent_crc_unpacked
 bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 {
@@ -283,14 +257,15 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
                .offset                 = _crc.offset,                  \
                .live_size              = k->size
 
-       switch (extent_crc_type(crc)) {
-       case BCH_EXTENT_CRC_NONE:
+       if (!crc)
                return (struct bch_extent_crc_unpacked) {
                        .compressed_size        = k->size,
                        .uncompressed_size      = k->size,
                        .live_size              = k->size,
                };
-       case BCH_EXTENT_CRC32: {
+
+       switch (extent_entry_type(to_entry(crc))) {
+       case BCH_EXTENT_ENTRY_crc32: {
                struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
                        common_fields(crc->crc32),
                };
@@ -302,7 +277,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 
                return ret;
        }
-       case BCH_EXTENT_CRC64: {
+       case BCH_EXTENT_ENTRY_crc64: {
                struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
                        common_fields(crc->crc64),
                        .nonce                  = crc->crc64.nonce,
@@ -313,7 +288,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 
                return ret;
        }
-       case BCH_EXTENT_CRC128: {
+       case BCH_EXTENT_ENTRY_crc128: {
                struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
                        common_fields(crc->crc128),
                        .nonce                  = crc->crc128.nonce,
@@ -346,23 +321,25 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 #define extent_for_each_entry(_e, _entry)                              \
        extent_for_each_entry_from(_e, _entry, (_e).v->start)
 
-/* Iterate over crcs only: */
+/* Iterate over pointers only: */
 
-#define __extent_crc_next(_e, _p)                                      \
+#define extent_ptr_next(_e, _ptr)                                      \
 ({                                                                     \
-       typeof(&(_e).v->start[0]) _entry = _p;                          \
+       typeof(&(_e).v->start[0]) _entry;                               \
                                                                        \
-       while ((_entry) < extent_entry_last(_e) &&                      \
-              !extent_entry_is_crc(_entry))                            \
-               (_entry) = extent_entry_next(_entry);                   \
+       extent_for_each_entry_from(_e, _entry, to_entry(_ptr))          \
+               if (extent_entry_is_ptr(_entry))                        \
+                       break;                                          \
                                                                        \
-       entry_to_crc(_entry < extent_entry_last(_e) ? _entry : NULL);   \
+       _entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL;   \
 })
 
-#define __extent_for_each_crc(_e, _crc)                                        \
-       for ((_crc) = __extent_crc_next(_e, (_e).v->start);             \
-            (_crc);                                                    \
-            (_crc) = __extent_crc_next(_e, extent_entry_next(to_entry(_crc))))
+#define extent_for_each_ptr(_e, _ptr)                                  \
+       for ((_ptr) = &(_e).v->start->ptr;                              \
+            ((_ptr) = extent_ptr_next(_e, _ptr));                      \
+            (_ptr)++)
+
+/* Iterate over crcs only: */
 
 #define extent_crc_next(_e, _crc, _iter)                               \
 ({                                                                     \
@@ -383,69 +360,61 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 
 /* Iterate over pointers, with crcs: */
 
-#define extent_ptr_crc_next(_e, _ptr, _crc)                            \
+static inline struct extent_ptr_decoded
+__extent_ptr_decoded_init(const struct bkey *k)
+{
+       return (struct extent_ptr_decoded) {
+               .crc            = bch2_extent_crc_unpack(k, NULL),
+       };
+}
+
+#define EXTENT_ITERATE_EC              (1 << 0)
+
+#define __extent_ptr_next_decode(_e, _ptr, _entry)                     \
 ({                                                                     \
        __label__ out;                                                  \
-       typeof(&(_e).v->start[0]) _entry;                               \
                                                                        \
-       extent_for_each_entry_from(_e, _entry, to_entry(_ptr))          \
-               if (extent_entry_is_crc(_entry)) {                      \
-                       (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_entry));\
-               } else {                                                \
-                       _ptr = entry_to_ptr(_entry);                    \
+       extent_for_each_entry_from(_e, _entry, _entry)                  \
+               switch (extent_entry_type(_entry)) {                    \
+               case BCH_EXTENT_ENTRY_ptr:                              \
+                       (_ptr).ptr              = _entry->ptr;          \
                        goto out;                                       \
+               case BCH_EXTENT_ENTRY_crc32:                            \
+               case BCH_EXTENT_ENTRY_crc64:                            \
+               case BCH_EXTENT_ENTRY_crc128:                           \
+                       (_ptr).crc = bch2_extent_crc_unpack((_e).k,     \
+                                       entry_to_crc(_entry));          \
+                       break;                                          \
                }                                                       \
                                                                        \
-       _ptr = NULL;                                                    \
 out:                                                                   \
-       _ptr;                                                           \
-})
-
-#define extent_for_each_ptr_crc(_e, _ptr, _crc)                                \
-       for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL),             \
-            (_ptr) = &(_e).v->start->ptr;                              \
-            ((_ptr) = extent_ptr_crc_next(_e, _ptr, _crc));            \
-            (_ptr)++)
-
-/* Iterate over pointers only, and from a given position: */
-
-#define extent_ptr_next(_e, _ptr)                                      \
-({                                                                     \
-       struct bch_extent_crc_unpacked _crc;                            \
-                                                                       \
-       extent_ptr_crc_next(_e, _ptr, _crc);                            \
+       _entry < extent_entry_last(_e);                                 \
 })
 
-#define extent_for_each_ptr(_e, _ptr)                                  \
-       for ((_ptr) = &(_e).v->start->ptr;                              \
-            ((_ptr) = extent_ptr_next(_e, _ptr));                      \
-            (_ptr)++)
-
-#define extent_ptr_prev(_e, _ptr)                                      \
-({                                                                     \
-       typeof(&(_e).v->start->ptr) _p;                                 \
-       typeof(&(_e).v->start->ptr) _prev = NULL;                       \
-                                                                       \
-       extent_for_each_ptr(_e, _p) {                                   \
-               if (_p == (_ptr))                                       \
-                       break;                                          \
-               _prev = _p;                                             \
-       }                                                               \
-                                                                       \
-       _prev;                                                          \
-})
+#define extent_for_each_ptr_decode(_e, _ptr, _entry)                   \
+       for ((_ptr) = __extent_ptr_decoded_init((_e).k),                \
+            (_entry) = (_e).v->start;                                  \
+            __extent_ptr_next_decode(_e, _ptr, _entry);                \
+            (_entry) = extent_entry_next(_entry))
 
-/*
- * Use this when you'll be dropping pointers as you iterate. Quadratic,
- * unfortunately:
- */
-#define extent_for_each_ptr_backwards(_e, _ptr)                                \
-       for ((_ptr) = extent_ptr_prev(_e, NULL);                        \
-            (_ptr);                                                    \
-            (_ptr) = extent_ptr_prev(_e, _ptr))
+/* Iterate over pointers backwards: */
 
 void bch2_extent_crc_append(struct bkey_i_extent *,
                            struct bch_extent_crc_unpacked);
+void bch2_extent_ptr_decoded_append(struct bkey_i_extent *,
+                                   struct extent_ptr_decoded *);
+
+static inline void __extent_entry_insert(struct bkey_i_extent *e,
+                                        union bch_extent_entry *dst,
+                                        union bch_extent_entry *new)
+{
+       union bch_extent_entry *end = extent_entry_last(extent_i_to_s(e));
+
+       memmove_u64s_up((u64 *) dst + extent_entry_u64s(new),
+                       dst, (u64 *) end - (u64 *) dst);
+       e->k.u64s += extent_entry_u64s(new);
+       memcpy(dst, new, extent_entry_bytes(new));
+}
 
 static inline void __extent_entry_push(struct bkey_i_extent *e)
 {
@@ -536,10 +505,23 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
 bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
                                 struct bch_extent_crc_unpacked);
 bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
-void bch2_extent_drop_redundant_crcs(struct bkey_s_extent);
 
-void __bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
-void bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
+union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent ,
+                                            struct bch_extent_ptr *);
+
+#define bch2_extent_drop_ptrs(_e, _ptr, _cond)                         \
+do {                                                                   \
+       _ptr = &(_e).v->start->ptr;                                     \
+                                                                       \
+       while ((_ptr = extent_ptr_next(e, _ptr))) {                     \
+               if (_cond) {                                            \
+                       _ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \
+                       continue;                                       \
+               }                                                       \
+                                                                       \
+               (_ptr)++;                                               \
+       }                                                               \
+} while (0)
 
 bool bch2_cut_front(struct bpos, struct bkey_i *);
 bool bch2_cut_back(struct bpos, struct bkey *);
index 76139f931fe04e50a51ce314147696424d1d0220..02c625672ad2011d3830cff1d1833ad1f6b9cab9 100644 (file)
@@ -18,9 +18,18 @@ struct bch_extent_crc_unpacked {
        struct bch_csum         csum;
 };
 
-struct extent_pick_ptr {
-       struct bch_extent_ptr           ptr;
+struct extent_ptr_decoded {
        struct bch_extent_crc_unpacked  crc;
+       struct bch_extent_ptr           ptr;
+};
+
+struct bch_io_failures {
+       u8                      nr;
+       struct bch_dev_io_failures {
+               u8              dev;
+               u8              nr_failed;
+               u8              nr_retries;
+       }                       devs[BCH_REPLICAS_MAX];
 };
 
 #endif /* _BCACHEFS_EXTENTS_TYPES_H */
index 250dd55fe0fd58fe4dfe538e305f13d273469fe3..986bb7d28b0fc6c426a580815b205949a8a0af69 100644 (file)
@@ -963,12 +963,12 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
 
                        if (bkey_extent_is_data(k.k)) {
                                struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-                               struct bch_extent_crc_unpacked crc;
                                const union bch_extent_entry *i;
+                               struct extent_ptr_decoded p;
 
-                               extent_for_each_crc(e, crc, i)
-                                       want_full_extent |= ((crc.csum_type != 0) |
-                                                            (crc.compression_type != 0));
+                               extent_for_each_ptr_decode(e, p, i)
+                                       want_full_extent |= ((p.crc.csum_type != 0) |
+                                                            (p.crc.compression_type != 0));
                        }
 
                        readpage_bio_extend(readpages_iter,
index ae875870b78ddf0b638f33a62d671a49d36e516b..1cf729107b1ec49e971172f57cfea37277faa4de 100644 (file)
@@ -973,27 +973,27 @@ static int bch2_fill_extent(struct fiemap_extent_info *info,
 {
        if (bkey_extent_is_data(&k->k)) {
                struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
-               const struct bch_extent_ptr *ptr;
-               struct bch_extent_crc_unpacked crc;
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
                int ret;
 
-               extent_for_each_ptr_crc(e, ptr, crc) {
+               extent_for_each_ptr_decode(e, p, entry) {
                        int flags2 = 0;
-                       u64 offset = ptr->offset;
+                       u64 offset = p.ptr.offset;
 
-                       if (crc.compression_type)
+                       if (p.crc.compression_type)
                                flags2 |= FIEMAP_EXTENT_ENCODED;
                        else
-                               offset += crc.offset;
+                               offset += p.crc.offset;
 
                        if ((offset & (PAGE_SECTORS - 1)) ||
                            (e.k->size & (PAGE_SECTORS - 1)))
                                flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
 
                        ret = fiemap_fill_next_extent(info,
-                                                     bkey_start_offset(e.k) << 9,
-                                                     offset << 9,
-                                                     e.k->size << 9, flags|flags2);
+                                               bkey_start_offset(e.k) << 9,
+                                               offset << 9,
+                                               e.k->size << 9, flags|flags2);
                        if (ret)
                                return ret;
                }
index 021a80df098fc29cc44f24c218f77469b8c888d4..eceb48656e6091924868b5a07c54de83a442a6e7 100644 (file)
@@ -310,9 +310,9 @@ static void __bch2_write_index(struct bch_write_op *op)
                bkey_copy(dst, src);
 
                e = bkey_i_to_s_extent(dst);
-               extent_for_each_ptr_backwards(e, ptr)
-                       if (test_bit(ptr->dev, op->failed.d))
-                               bch2_extent_drop_ptr(e, ptr);
+
+               bch2_extent_drop_ptrs(e, ptr,
+                       test_bit(ptr->dev, op->failed.d));
 
                if (!bch2_extent_nr_ptrs(e.c)) {
                        ret = -EIO;
@@ -320,7 +320,8 @@ static void __bch2_write_index(struct bch_write_op *op)
                }
 
                if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
-                       ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, e.s_c);
+                       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
+                                                     e.s_c);
                        if (ret)
                                goto err;
                }
@@ -1008,7 +1009,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
 noinline
 static struct promote_op *__promote_alloc(struct bch_fs *c,
                                          struct bpos pos,
-                                         struct extent_pick_ptr *pick,
+                                         struct extent_ptr_decoded *pick,
                                          struct bch_io_opts opts,
                                          unsigned rbio_sectors,
                                          struct bch_read_bio **rbio)
@@ -1089,7 +1090,7 @@ err:
 static inline struct promote_op *promote_alloc(struct bch_fs *c,
                                               struct bvec_iter iter,
                                               struct bkey_s_c k,
-                                              struct extent_pick_ptr *pick,
+                                              struct extent_ptr_decoded *pick,
                                               struct bch_io_opts opts,
                                               unsigned flags,
                                               struct bch_read_bio **rbio,
@@ -1183,7 +1184,8 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
 
 static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
                                     struct bvec_iter bvec_iter, u64 inode,
-                                    struct bch_devs_mask *avoid, unsigned flags)
+                                    struct bch_io_failures *failed,
+                                    unsigned flags)
 {
        struct btree_iter iter;
        BKEY_PADDED(k) tmp;
@@ -1217,7 +1219,7 @@ retry:
                goto out;
        }
 
-       ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+       ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
        if (ret == READ_RETRY)
                goto retry;
        if (ret)
@@ -1231,7 +1233,7 @@ out:
 
 static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
                            struct bvec_iter bvec_iter, u64 inode,
-                           struct bch_devs_mask *avoid, unsigned flags)
+                           struct bch_io_failures *failed, unsigned flags)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -1254,7 +1256,7 @@ retry:
                              (k.k->p.offset - bvec_iter.bi_sector) << 9);
                swap(bvec_iter.bi_size, bytes);
 
-               ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+               ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
                switch (ret) {
                case READ_RETRY:
                        goto retry;
@@ -1290,14 +1292,12 @@ static void bch2_rbio_retry(struct work_struct *work)
        struct bvec_iter iter   = rbio->bvec_iter;
        unsigned flags          = rbio->flags;
        u64 inode               = rbio->pos.inode;
-       struct bch_devs_mask avoid;
+       struct bch_io_failures failed = { .nr = 0 };
 
        trace_read_retry(&rbio->bio);
 
-       memset(&avoid, 0, sizeof(avoid));
-
        if (rbio->retry == READ_RETRY_AVOID)
-               __set_bit(rbio->pick.ptr.dev, avoid.d);
+               bch2_mark_io_failure(&failed, &rbio->pick);
 
        rbio->bio.bi_status = 0;
 
@@ -1307,9 +1307,9 @@ static void bch2_rbio_retry(struct work_struct *work)
        flags &= ~BCH_READ_MAY_PROMOTE;
 
        if (flags & BCH_READ_NODECODE)
-               bch2_read_retry_nodecode(c, rbio, iter, inode, &avoid, flags);
+               bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
        else
-               bch2_read_retry(c, rbio, iter, inode, &avoid, flags);
+               bch2_read_retry(c, rbio, iter, inode, &failed, flags);
 }
 
 static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
@@ -1396,7 +1396,7 @@ out:
 }
 
 static bool should_narrow_crcs(struct bkey_s_c k,
-                              struct extent_pick_ptr *pick,
+                              struct extent_ptr_decoded *pick,
                               unsigned flags)
 {
        return !(flags & BCH_READ_IN_RETRY) &&
@@ -1549,9 +1549,9 @@ static void bch2_read_endio(struct bio *bio)
 
 int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
                       struct bvec_iter iter, struct bkey_s_c k,
-                      struct bch_devs_mask *avoid, unsigned flags)
+                      struct bch_io_failures *failed, unsigned flags)
 {
-       struct extent_pick_ptr pick;
+       struct extent_ptr_decoded pick;
        struct bch_read_bio *rbio = NULL;
        struct bch_dev *ca;
        struct promote_op *promote = NULL;
@@ -1559,7 +1559,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
        struct bpos pos = bkey_start_pos(k.k);
        int pick_ret;
 
-       pick_ret = bch2_extent_pick_ptr(c, k, avoid, &pick);
+       pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
 
        /* hole or reservation - just zero fill: */
        if (!pick_ret)
@@ -1723,7 +1723,7 @@ noclone:
                rbio = bch2_rbio_free(rbio);
 
                if (ret == READ_RETRY_AVOID) {
-                       __set_bit(pick.ptr.dev, avoid->d);
+                       bch2_mark_io_failure(failed, &pick);
                        ret = READ_RETRY;
                }
 
index 1724232fd2747c17201e92e87d64520ab04910d2..5bd5f846122e1af5ad74cb827ca9340efd1009bf 100644 (file)
@@ -94,10 +94,10 @@ static inline struct bch_write_bio *wbio_init(struct bio *bio)
 
 struct bch_devs_mask;
 struct cache_promote_op;
-struct extent_pick_ptr;
+struct extent_ptr_decoded;
 
 int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
-                      struct bkey_s_c, struct bch_devs_mask *, unsigned);
+                      struct bkey_s_c, struct bch_io_failures *, unsigned);
 void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
 
 enum bch_read_flags {
index fe5779b361c5efbb2b4e0b1aa2fa66eab1c7ba5e..8ec846cc996f1d09aa2386f159e84f4d2832edd7 100644 (file)
@@ -54,7 +54,7 @@ struct bch_read_bio {
 
        struct bch_devs_list    devs_have;
 
-       struct extent_pick_ptr  pick;
+       struct extent_ptr_decoded pick;
        /* start pos of data we read (may not be pos of data we want) */
        struct bpos             pos;
        struct bversion         version;
index 6759810b19ef5dedc9ddb5fa64d153cc6bcafa77..5870392e383eda54508e0c2ad6a03443140c4190 100644 (file)
@@ -352,10 +352,6 @@ static inline bool journal_flushes_device(struct bch_dev *ca)
        return true;
 }
 
-int bch2_journal_mark(struct bch_fs *, struct list_head *);
-void bch2_journal_entries_free(struct list_head *);
-int bch2_journal_replay(struct bch_fs *, struct list_head *);
-
 static inline void bch2_journal_set_replay_done(struct journal *j)
 {
        BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
index 2f88e2422c5e8e8fafd4e3573ee39679950989ed..0cb1bc3c0df71f92410e39f757ef24d69b370be7 100644 (file)
@@ -429,7 +429,6 @@ static int journal_read_bucket(struct bch_dev *ca,
 {
        struct bch_fs *c = ca->fs;
        struct journal_device *ja = &ca->journal;
-       struct bio *bio = ja->bio;
        struct jset *j = NULL;
        unsigned sectors, sectors_read = 0;
        u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
@@ -441,10 +440,14 @@ static int journal_read_bucket(struct bch_dev *ca,
 
        while (offset < end) {
                if (!sectors_read) {
-reread:                        sectors_read = min_t(unsigned,
+                       struct bio *bio;
+reread:
+                       sectors_read = min_t(unsigned,
                                end - offset, buf->size >> 9);
 
-                       bio_reset(bio);
+                       bio = bio_kmalloc(GFP_KERNEL,
+                                         buf_pages(buf->data,
+                                                   sectors_read << 9));
                        bio_set_dev(bio, ca->disk_sb.bdev);
                        bio->bi_iter.bi_sector  = offset;
                        bio->bi_iter.bi_size    = sectors_read << 9;
@@ -452,6 +455,7 @@ reread:                     sectors_read = min_t(unsigned,
                        bch2_bio_map(bio, buf->data);
 
                        ret = submit_bio_wait(bio);
+                       bio_put(bio);
 
                        if (bch2_dev_io_err_on(ret, ca,
                                               "journal read from sector %llu",
@@ -849,28 +853,6 @@ fsck_err:
 
 /* journal replay: */
 
-int bch2_journal_mark(struct bch_fs *c, struct list_head *list)
-{
-       struct bkey_i *k, *n;
-       struct jset_entry *j;
-       struct journal_replay *r;
-       int ret;
-
-       list_for_each_entry(r, list, list)
-               for_each_jset_key(k, n, j, &r->j) {
-                       enum bkey_type type = bkey_type(j->level, j->btree_id);
-                       struct bkey_s_c k_s_c = bkey_i_to_s_c(k);
-
-                       if (btree_type_has_ptrs(type)) {
-                               ret = bch2_btree_mark_key_initial(c, type, k_s_c);
-                               if (ret)
-                                       return ret;
-                       }
-               }
-
-       return 0;
-}
-
 int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
 {
        struct journal *j = &c->journal;
@@ -1064,14 +1046,19 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
         * entry - that's why we drop pointers to devices <= current free space,
         * i.e. whichever device was limiting the current journal entry size.
         */
-       extent_for_each_ptr_backwards(e, ptr) {
-                  ca = bch_dev_bkey_exists(c, ptr->dev);
+       bch2_extent_drop_ptrs(e, ptr, ({
+               ca = bch_dev_bkey_exists(c, ptr->dev);
 
-               if (ca->mi.state != BCH_MEMBER_STATE_RW ||
-                   ca->journal.sectors_free <= sectors)
-                       __bch2_extent_drop_ptr(e, ptr);
-               else
-                       ca->journal.sectors_free -= sectors;
+               ca->mi.state != BCH_MEMBER_STATE_RW ||
+               ca->journal.sectors_free <= sectors;
+       }));
+
+       extent_for_each_ptr(e, ptr) {
+               ca = bch_dev_bkey_exists(c, ptr->dev);
+
+               BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW ||
+                      ca->journal.sectors_free <= sectors);
+               ca->journal.sectors_free -= sectors;
        }
 
        replicas = bch2_extent_nr_ptrs(e.c);
index e303df9241dec5a79bb22f509d72f31d6103c8d2..d0a652cf93564d9c7725946d2e34e80094517091 100644 (file)
@@ -36,6 +36,8 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
 
 int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
 int bch2_journal_read(struct bch_fs *, struct list_head *);
+void bch2_journal_entries_free(struct list_head *);
+int bch2_journal_replay(struct bch_fs *, struct list_head *);
 
 int bch2_journal_entry_sectors(struct journal *);
 void bch2_journal_write(struct closure *);
index f5cbf44d7b8c21b7a1a5b1536a364375dffde9ea..c0dfe1c641e22d89eb98d6451eedaf8bcede01c6 100644 (file)
@@ -50,7 +50,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
               !(ret = btree_iter_err(k))) {
                if (!bkey_extent_is_data(k.k) ||
                    !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
-                       ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
+                       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
                        if (ret)
                                break;
                        bch2_btree_iter_next(&iter);
@@ -71,7 +71,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
                 */
                bch2_extent_normalize(c, e.s);
 
-               ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
+               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
                                              bkey_i_to_s_c(&tmp.key));
                if (ret)
                        break;
@@ -134,7 +134,7 @@ retry:
                                 */
                                bch2_btree_iter_downgrade(&iter);
 
-                               ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+                               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
                                                              bkey_i_to_s_c(&b->key));
                                if (ret)
                                        goto err;
index e75e6e71ecc7ae2f854c896ce5a7a9e731e71019..c9495ab78cf051e3d421e502e36b806368ed8d13 100644 (file)
@@ -67,8 +67,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                struct bkey_i_extent *insert, *new =
                        bkey_i_to_extent(bch2_keylist_front(keys));
                BKEY_PADDED(k) _new, _insert;
-               struct bch_extent_ptr *ptr;
-               struct bch_extent_crc_unpacked crc;
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
                bool did_work = false;
                int nr;
 
@@ -98,15 +98,12 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                bch2_cut_back(new->k.p, &insert->k);
                bch2_cut_back(insert->k.p, &new->k);
 
-               if (m->data_cmd == DATA_REWRITE) {
-                       ptr = (struct bch_extent_ptr *)
-                               bch2_extent_has_device(extent_i_to_s_c(insert),
-                                                      m->data_opts.rewrite_dev);
-                       bch2_extent_drop_ptr(extent_i_to_s(insert), ptr);
-               }
+               if (m->data_cmd == DATA_REWRITE)
+                       bch2_extent_drop_device(extent_i_to_s(insert),
+                                               m->data_opts.rewrite_dev);
 
-               extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) {
-                       if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) {
+               extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
+                       if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) {
                                /*
                                 * raced with another move op? extent already
                                 * has a pointer to the device we just wrote
@@ -115,8 +112,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                                continue;
                        }
 
-                       bch2_extent_crc_append(insert, crc);
-                       extent_ptr_append(insert, *ptr);
+                       bch2_extent_ptr_decoded_append(insert, &p);
                        did_work = true;
                }
 
@@ -153,7 +149,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                        goto next;
                }
 
-               ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
+               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
                                              extent_i_to_s_c(insert).s_c);
                if (ret)
                        break;
@@ -379,8 +375,8 @@ static int bch2_move_extent(struct bch_fs *c,
                            struct data_opts data_opts)
 {
        struct moving_io *io;
-       const struct bch_extent_ptr *ptr;
-       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
        unsigned sectors = e.k->size, pages;
        int ret = -ENOMEM;
 
@@ -393,8 +389,8 @@ static int bch2_move_extent(struct bch_fs *c,
                SECTORS_IN_FLIGHT_PER_DEVICE);
 
        /* write path might have to decompress data: */
-       extent_for_each_ptr_crc(e, ptr, crc)
-               sectors = max_t(unsigned, sectors, crc.uncompressed_size);
+       extent_for_each_ptr_decode(e, p, entry)
+               sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
 
        pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
        io = kzalloc(sizeof(struct moving_io) +
@@ -605,7 +601,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
 
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
                           BTREE_ITER_PREFETCH, k) {
-               ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
+               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
                if (ret)
                        break;
        }
@@ -629,7 +625,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c)
 
        for (id = 0; id < BTREE_ID_NR; id++) {
                for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
-                       ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+                       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
                                                      bkey_i_to_s_c(&b->key));
 
                        bch2_btree_iter_cond_resched(&iter);
index 468865625f3122277cc2c5a422bb28b2e6e678c2..70318f2ca39b7650bfd9d41df780d342ae55f11b 100644 (file)
@@ -160,7 +160,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
                        .sectors        = bucket_sectors_used(m),
                        .offset         = bucket_to_sector(ca, b),
                };
-               heap_add_or_replace(h, e, -sectors_used_cmp);
+               heap_add_or_replace(h, e, -sectors_used_cmp, NULL);
        }
        up_read(&ca->bucket_lock);
        up_read(&c->gc_lock);
@@ -169,7 +169,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
                sectors_to_move += i->sectors;
 
        while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
-               BUG_ON(!heap_pop(h, e, -sectors_used_cmp));
+               BUG_ON(!heap_pop(h, e, -sectors_used_cmp, NULL));
                sectors_to_move -= e.sectors;
        }
 
index 3fbe7b10be35a0f4b8ba7310b6b5b25b91323ba4..85ea4c6bf90fd93b9cef8b1843aab867c34f845e 100644 (file)
 #include <trace/events/bcachefs.h>
 
 static inline bool rebalance_ptr_pred(struct bch_fs *c,
-                                     const struct bch_extent_ptr *ptr,
-                                     struct bch_extent_crc_unpacked crc,
+                                     struct extent_ptr_decoded p,
                                      struct bch_io_opts *io_opts)
 {
        if (io_opts->background_target &&
-           !bch2_dev_in_target(c, ptr->dev, io_opts->background_target) &&
-           !ptr->cached)
+           !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target) &&
+           !p.ptr.cached)
                return true;
 
        if (io_opts->background_compression &&
-           crc.compression_type !=
+           p.crc.compression_type !=
            bch2_compression_opt_to_type[io_opts->background_compression])
                return true;
 
@@ -38,8 +37,8 @@ void bch2_rebalance_add_key(struct bch_fs *c,
                            struct bkey_s_c k,
                            struct bch_io_opts *io_opts)
 {
-       const struct bch_extent_ptr *ptr;
-       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
        struct bkey_s_c_extent e;
 
        if (!bkey_extent_is_data(k.k))
@@ -51,13 +50,13 @@ void bch2_rebalance_add_key(struct bch_fs *c,
 
        e = bkey_s_c_to_extent(k);
 
-       extent_for_each_ptr_crc(e, ptr, crc)
-               if (rebalance_ptr_pred(c, ptr, crc, io_opts)) {
-                       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+       extent_for_each_ptr_decode(e, p, entry)
+               if (rebalance_ptr_pred(c, p, io_opts)) {
+                       struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
 
-                       if (atomic64_add_return(crc.compressed_size,
+                       if (atomic64_add_return(p.crc.compressed_size,
                                                &ca->rebalance_work) ==
-                           crc.compressed_size)
+                           p.crc.compressed_size)
                                rebalance_wakeup(c);
                }
 }
@@ -75,16 +74,16 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
                                    struct bch_io_opts *io_opts,
                                    struct data_opts *data_opts)
 {
-       const struct bch_extent_ptr *ptr;
-       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
 
        /* Make sure we have room to add a new pointer: */
        if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
            BKEY_EXTENT_VAL_U64s_MAX)
                return DATA_SKIP;
 
-       extent_for_each_ptr_crc(e, ptr, crc)
-               if (rebalance_ptr_pred(c, ptr, crc, io_opts))
+       extent_for_each_ptr_decode(e, p, entry)
+               if (rebalance_ptr_pred(c, p, io_opts))
                        goto found;
 
        return DATA_SKIP;
index 1e94d35fde96b76f5273745d8f11521f257ef6ba..b0cef995e9e6923907d66d559f0d8729ba20c6bf 100644 (file)
@@ -3,17 +3,32 @@
 #include "replicas.h"
 #include "super-io.h"
 
+struct bch_replicas_entry_padded {
+       struct bch_replicas_entry       e;
+       u8                              pad[BCH_SB_MEMBERS_MAX];
+};
+
 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
                                            struct bch_replicas_cpu *);
 
 /* Replicas tracking - in memory: */
 
+static inline int u8_cmp(u8 l, u8 r)
+{
+       return (l > r) - (l < r);
+}
+
+static void replicas_entry_sort(struct bch_replicas_entry *e)
+{
+       bubble_sort(e->devs, e->nr_devs, u8_cmp);
+}
+
 #define for_each_cpu_replicas_entry(_r, _i)                            \
        for (_i = (_r)->entries;                                        \
             (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
             _i = (void *) (_i) + (_r)->entry_size)
 
-static inline struct bch_replicas_cpu_entry *
+static inline struct bch_replicas_entry *
 cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
 {
        return (void *) r->entries + r->entry_size * i;
@@ -24,84 +39,79 @@ static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
        eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
 }
 
-static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
-                                    unsigned dev)
+static int replicas_entry_to_text(struct bch_replicas_entry *e,
+                                 char *buf, size_t size)
 {
-       return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
-}
+       char *out = buf, *end = out + size;
+       unsigned i;
 
-static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
-                                   unsigned dev)
-{
-       e->devs[dev >> 3] |= 1 << (dev & 7);
-}
+       out += scnprintf(out, end - out, "%u: [", e->data_type);
 
-static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
-{
-       return (r->entry_size -
-               offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
+       for (i = 0; i < e->nr_devs; i++)
+               out += scnprintf(out, end - out,
+                                i ? " %u" : "%u", e->devs[i]);
+       out += scnprintf(out, end - out, "]");
+
+       return out - buf;
 }
 
 int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *r,
                              char *buf, size_t size)
 {
        char *out = buf, *end = out + size;
-       struct bch_replicas_cpu_entry *e;
+       struct bch_replicas_entry *e;
        bool first = true;
-       unsigned i;
 
        for_each_cpu_replicas_entry(r, e) {
-               bool first_e = true;
-
                if (!first)
                        out += scnprintf(out, end - out, " ");
                first = false;
 
-               out += scnprintf(out, end - out, "%u: [", e->data_type);
-
-               for (i = 0; i < replicas_dev_slots(r); i++)
-                       if (replicas_test_dev(e, i)) {
-                               if (!first_e)
-                                       out += scnprintf(out, end - out, " ");
-                               first_e = false;
-                               out += scnprintf(out, end - out, "%u", i);
-                       }
-               out += scnprintf(out, end - out, "]");
+               out += replicas_entry_to_text(e, out, end - out);
        }
 
        return out - buf;
 }
 
-static inline unsigned bkey_to_replicas(struct bkey_s_c_extent e,
-                                       enum bch_data_type data_type,
-                                       struct bch_replicas_cpu_entry *r,
-                                       unsigned *max_dev)
+static void extent_to_replicas(struct bkey_s_c k,
+                              struct bch_replicas_entry *r)
 {
-       const struct bch_extent_ptr *ptr;
-       unsigned nr = 0;
-
-       BUG_ON(!data_type ||
-              data_type == BCH_DATA_SB ||
-              data_type >= BCH_DATA_NR);
-
-       memset(r, 0, sizeof(*r));
-       r->data_type = data_type;
+       if (bkey_extent_is_data(k.k)) {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
+
+               extent_for_each_ptr_decode(e, p, entry)
+                       if (!p.ptr.cached)
+                               r->devs[r->nr_devs++] = p.ptr.dev;
+       }
+}
 
-       *max_dev = 0;
+static void bkey_to_replicas(enum bkey_type type,
+                            struct bkey_s_c k,
+                            struct bch_replicas_entry *e)
+{
+       e->nr_devs = 0;
+
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+               e->data_type = BCH_DATA_BTREE;
+               extent_to_replicas(k, e);
+               break;
+       case BKEY_TYPE_EXTENTS:
+               e->data_type = BCH_DATA_USER;
+               extent_to_replicas(k, e);
+               break;
+       default:
+               break;
+       }
 
-       extent_for_each_ptr(e, ptr)
-               if (!ptr->cached) {
-                       *max_dev = max_t(unsigned, *max_dev, ptr->dev);
-                       replicas_set_dev(r, ptr->dev);
-                       nr++;
-               }
-       return nr;
+       replicas_entry_sort(e);
 }
 
 static inline void devlist_to_replicas(struct bch_devs_list devs,
                                       enum bch_data_type data_type,
-                                      struct bch_replicas_cpu_entry *r,
-                                      unsigned *max_dev)
+                                      struct bch_replicas_entry *e)
 {
        unsigned i;
 
@@ -109,28 +119,24 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
               data_type == BCH_DATA_SB ||
               data_type >= BCH_DATA_NR);
 
-       memset(r, 0, sizeof(*r));
-       r->data_type = data_type;
+       e->data_type    = data_type;
+       e->nr_devs      = 0;
 
-       *max_dev = 0;
+       for (i = 0; i < devs.nr; i++)
+               e->devs[e->nr_devs++] = devs.devs[i];
 
-       for (i = 0; i < devs.nr; i++) {
-               *max_dev = max_t(unsigned, *max_dev, devs.devs[i]);
-               replicas_set_dev(r, devs.devs[i]);
-       }
+       replicas_entry_sort(e);
 }
 
 static struct bch_replicas_cpu *
 cpu_replicas_add_entry(struct bch_replicas_cpu *old,
-                      struct bch_replicas_cpu_entry new_entry,
-                      unsigned max_dev)
+                      struct bch_replicas_entry *new_entry)
 {
        struct bch_replicas_cpu *new;
        unsigned i, nr, entry_size;
 
-       entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
-               DIV_ROUND_UP(max_dev + 1, 8);
-       entry_size = max(entry_size, old->entry_size);
+       entry_size = max_t(unsigned, old->entry_size,
+                          replicas_entry_bytes(new_entry));
        nr = old->nr + 1;
 
        new = kzalloc(sizeof(struct bch_replicas_cpu) +
@@ -144,30 +150,28 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
        for (i = 0; i < old->nr; i++)
                memcpy(cpu_replicas_entry(new, i),
                       cpu_replicas_entry(old, i),
-                      min(new->entry_size, old->entry_size));
+                      old->entry_size);
 
        memcpy(cpu_replicas_entry(new, old->nr),
-              &new_entry,
-              new->entry_size);
+              new_entry,
+              replicas_entry_bytes(new_entry));
 
        bch2_cpu_replicas_sort(new);
        return new;
 }
 
 static bool replicas_has_entry(struct bch_replicas_cpu *r,
-                               struct bch_replicas_cpu_entry search,
-                               unsigned max_dev)
+                              struct bch_replicas_entry *search)
 {
-       return max_dev < replicas_dev_slots(r) &&
+       return replicas_entry_bytes(search) <= r->entry_size &&
                eytzinger0_find(r->entries, r->nr,
                                r->entry_size,
-                               memcmp, &search) < r->nr;
+                               memcmp, search) < r->nr;
 }
 
 noinline
 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
-                               struct bch_replicas_cpu_entry new_entry,
-                               unsigned max_dev)
+                               struct bch_replicas_entry *new_entry)
 {
        struct bch_replicas_cpu *old_gc, *new_gc = NULL, *old_r, *new_r = NULL;
        int ret = -ENOMEM;
@@ -176,16 +180,16 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
 
        old_gc = rcu_dereference_protected(c->replicas_gc,
                                           lockdep_is_held(&c->sb_lock));
-       if (old_gc && !replicas_has_entry(old_gc, new_entry, max_dev)) {
-               new_gc = cpu_replicas_add_entry(old_gc, new_entry, max_dev);
+       if (old_gc && !replicas_has_entry(old_gc, new_entry)) {
+               new_gc = cpu_replicas_add_entry(old_gc, new_entry);
                if (!new_gc)
                        goto err;
        }
 
        old_r = rcu_dereference_protected(c->replicas,
                                          lockdep_is_held(&c->sb_lock));
-       if (!replicas_has_entry(old_r, new_entry, max_dev)) {
-               new_r = cpu_replicas_add_entry(old_r, new_entry, max_dev);
+       if (!replicas_has_entry(old_r, new_entry)) {
+               new_r = cpu_replicas_add_entry(old_r, new_entry);
                if (!new_r)
                        goto err;
 
@@ -220,47 +224,63 @@ err:
        return ret;
 }
 
+static int __bch2_mark_replicas(struct bch_fs *c,
+                               struct bch_replicas_entry *devs)
+{
+       struct bch_replicas_cpu *r, *gc_r;
+       bool marked;
+
+       rcu_read_lock();
+       r = rcu_dereference(c->replicas);
+       gc_r = rcu_dereference(c->replicas_gc);
+       marked = replicas_has_entry(r, devs) &&
+               (!likely(gc_r) || replicas_has_entry(gc_r, devs));
+       rcu_read_unlock();
+
+       return likely(marked) ? 0
+               : bch2_mark_replicas_slowpath(c, devs);
+}
+
 int bch2_mark_replicas(struct bch_fs *c,
                       enum bch_data_type data_type,
                       struct bch_devs_list devs)
 {
-       struct bch_replicas_cpu_entry search;
-       struct bch_replicas_cpu *r, *gc_r;
-       unsigned max_dev;
-       bool marked;
+       struct bch_replicas_entry_padded search;
 
        if (!devs.nr)
                return 0;
 
-       BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
+       memset(&search, 0, sizeof(search));
 
-       devlist_to_replicas(devs, data_type, &search, &max_dev);
+       BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
 
-       rcu_read_lock();
-       r = rcu_dereference(c->replicas);
-       gc_r = rcu_dereference(c->replicas_gc);
-       marked = replicas_has_entry(r, search, max_dev) &&
-               (!likely(gc_r) || replicas_has_entry(gc_r, search, max_dev));
-       rcu_read_unlock();
+       devlist_to_replicas(devs, data_type, &search.e);
 
-       return likely(marked) ? 0
-               : bch2_mark_replicas_slowpath(c, search, max_dev);
+       return __bch2_mark_replicas(c, &search.e);
 }
 
 int bch2_mark_bkey_replicas(struct bch_fs *c,
-                           enum bch_data_type data_type,
+                           enum bkey_type type,
                            struct bkey_s_c k)
 {
-       struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-       unsigned i;
+       struct bch_replicas_entry_padded search;
        int ret;
 
-       for (i = 0; i < cached.nr; i++)
-               if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
-                                             bch2_dev_list_single(cached.devs[i]))))
-                       return ret;
+       if (type == BKEY_TYPE_EXTENTS) {
+               struct bch_devs_list cached = bch2_bkey_cached_devs(k);
+               unsigned i;
+
+               for (i = 0; i < cached.nr; i++)
+                       if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
+                                               bch2_dev_list_single(cached.devs[i]))))
+                               return ret;
+       }
+
+       bkey_to_replicas(type, k, &search.e);
 
-       return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
+       return search.e.nr_devs
+               ? __bch2_mark_replicas(c, &search.e)
+               : 0;
 }
 
 int bch2_replicas_gc_end(struct bch_fs *c, int ret)
@@ -303,7 +323,7 @@ err:
 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
 {
        struct bch_replicas_cpu *dst, *src;
-       struct bch_replicas_cpu_entry *e;
+       struct bch_replicas_entry *e;
 
        lockdep_assert_held(&c->replicas_gc_lock);
 
@@ -338,40 +358,19 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
 
 /* Replicas tracking - superblock: */
 
-static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
-                                       unsigned *nr,
-                                       unsigned *bytes,
-                                       unsigned *max_dev)
-{
-       struct bch_replicas_entry *i;
-       unsigned j;
-
-       *nr     = 0;
-       *bytes  = sizeof(*r);
-       *max_dev = 0;
-
-       if (!r)
-               return;
-
-       for_each_replicas_entry(r, i) {
-               for (j = 0; j < i->nr; j++)
-                       *max_dev = max_t(unsigned, *max_dev, i->devs[j]);
-               (*nr)++;
-       }
-
-       *bytes = (void *) i - (void *) r;
-}
-
 static struct bch_replicas_cpu *
 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
 {
+       struct bch_replicas_entry *e, *dst;
        struct bch_replicas_cpu *cpu_r;
-       unsigned i, nr, bytes, max_dev, entry_size;
-
-       bch2_sb_replicas_nr_entries(sb_r, &nr, &bytes, &max_dev);
+       unsigned nr = 0, entry_size = 0;
 
-       entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
-               DIV_ROUND_UP(max_dev + 1, 8);
+       if (sb_r)
+               for_each_replicas_entry(sb_r, e) {
+                       entry_size = max_t(unsigned, entry_size,
+                                          replicas_entry_bytes(e));
+                       nr++;
+               }
 
        cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
                        nr * entry_size, GFP_NOIO);
@@ -381,20 +380,14 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
        cpu_r->nr               = nr;
        cpu_r->entry_size       = entry_size;
 
-       if (nr) {
-               struct bch_replicas_cpu_entry *dst =
-                       cpu_replicas_entry(cpu_r, 0);
-               struct bch_replicas_entry *src = sb_r->entries;
-
-               while (dst < cpu_replicas_entry(cpu_r, nr)) {
-                       dst->data_type = src->data_type;
-                       for (i = 0; i < src->nr; i++)
-                               replicas_set_dev(dst, src->devs[i]);
+       nr = 0;
 
-                       src     = replicas_entry_next(src);
-                       dst     = (void *) dst + entry_size;
+       if (sb_r)
+               for_each_replicas_entry(sb_r, e) {
+                       dst = cpu_replicas_entry(cpu_r, nr++);
+                       memcpy(dst, e, replicas_entry_bytes(e));
+                       replicas_entry_sort(dst);
                }
-       }
 
        bch2_cpu_replicas_sort(cpu_r);
        return cpu_r;
@@ -422,20 +415,16 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
                                            struct bch_replicas_cpu *r)
 {
        struct bch_sb_field_replicas *sb_r;
-       struct bch_replicas_entry *sb_e;
-       struct bch_replicas_cpu_entry *e;
-       size_t i, bytes;
+       struct bch_replicas_entry *dst, *src;
+       size_t bytes;
 
        bytes = sizeof(struct bch_sb_field_replicas);
 
-       for_each_cpu_replicas_entry(r, e) {
-               bytes += sizeof(struct bch_replicas_entry);
-               for (i = 0; i < r->entry_size - 1; i++)
-                       bytes += hweight8(e->devs[i]);
-       }
+       for_each_cpu_replicas_entry(r, src)
+               bytes += replicas_entry_bytes(src);
 
        sb_r = bch2_sb_resize_replicas(&c->disk_sb,
-                       DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
+                       DIV_ROUND_UP(bytes, sizeof(u64)));
        if (!sb_r)
                return -ENOSPC;
 
@@ -443,22 +432,42 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
               vstruct_end(&sb_r->field) -
               (void *) &sb_r->entries);
 
-       sb_e = sb_r->entries;
-       for_each_cpu_replicas_entry(r, e) {
-               sb_e->data_type = e->data_type;
+       dst = sb_r->entries;
+       for_each_cpu_replicas_entry(r, src) {
+               memcpy(dst, src, replicas_entry_bytes(src));
 
-               for (i = 0; i < replicas_dev_slots(r); i++)
-                       if (replicas_test_dev(e, i))
-                               sb_e->devs[sb_e->nr++] = i;
+               dst = replicas_entry_next(dst);
 
-               sb_e = replicas_entry_next(sb_e);
-
-               BUG_ON((void *) sb_e > vstruct_end(&sb_r->field));
+               BUG_ON((void *) dst > vstruct_end(&sb_r->field));
        }
 
        return 0;
 }
 
+static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
+{
+       unsigned i;
+
+       sort_cmp_size(cpu_r->entries,
+                     cpu_r->nr,
+                     cpu_r->entry_size,
+                     memcmp, NULL);
+
+       for (i = 0; i + 1 < cpu_r->nr; i++) {
+               struct bch_replicas_entry *l =
+                       cpu_replicas_entry(cpu_r, i);
+               struct bch_replicas_entry *r =
+                       cpu_replicas_entry(cpu_r, i + 1);
+
+               BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
+
+               if (!memcmp(l, r, cpu_r->entry_size))
+                       return "duplicate replicas entry";
+       }
+
+       return NULL;
+}
+
 static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
 {
        struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
@@ -474,15 +483,15 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
                        goto err;
 
                err = "invalid replicas entry: no devices";
-               if (!e->nr)
+               if (!e->nr_devs)
                        goto err;
 
                err = "invalid replicas entry: too many devices";
-               if (e->nr >= BCH_REPLICAS_MAX)
+               if (e->nr_devs >= BCH_REPLICAS_MAX)
                        goto err;
 
                err = "invalid replicas entry: invalid device";
-               for (i = 0; i < e->nr; i++)
+               for (i = 0; i < e->nr_devs; i++)
                        if (!bch2_dev_exists(sb, mi, e->devs[i]))
                                goto err;
        }
@@ -492,25 +501,7 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
        if (!cpu_r)
                goto err;
 
-       sort_cmp_size(cpu_r->entries,
-                     cpu_r->nr,
-                     cpu_r->entry_size,
-                     memcmp, NULL);
-
-       for (i = 0; i + 1 < cpu_r->nr; i++) {
-               struct bch_replicas_cpu_entry *l =
-                       cpu_replicas_entry(cpu_r, i);
-               struct bch_replicas_cpu_entry *r =
-                       cpu_replicas_entry(cpu_r, i + 1);
-
-               BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
-
-               err = "duplicate replicas entry";
-               if (!memcmp(l, r, cpu_r->entry_size))
-                       goto err;
-       }
-
-       err = NULL;
+       err = check_dup_replicas_entries(cpu_r);
 err:
        kfree(cpu_r);
        return err;
@@ -525,7 +516,6 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
        char *out = buf, *end = out + size;
        struct bch_replicas_entry *e;
        bool first = true;
-       unsigned i;
 
        if (!r) {
                out += scnprintf(out, end - out, "(no replicas section found)");
@@ -537,12 +527,7 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
                        out += scnprintf(out, end - out, " ");
                first = false;
 
-               out += scnprintf(out, end - out, "%u: [", e->data_type);
-
-               for (i = 0; i < e->nr; i++)
-                       out += scnprintf(out, end - out,
-                                        i ? " %u" : "%u", e->devs[i]);
-               out += scnprintf(out, end - out, "]");
+               out += replicas_entry_to_text(e, out, end - out);
        }
 
        return out - buf;
@@ -554,45 +539,59 @@ bool bch2_replicas_marked(struct bch_fs *c,
                          enum bch_data_type data_type,
                          struct bch_devs_list devs)
 {
-       struct bch_replicas_cpu_entry search;
-       unsigned max_dev;
+       struct bch_replicas_entry_padded search;
        bool ret;
 
        if (!devs.nr)
                return true;
 
-       devlist_to_replicas(devs, data_type, &search, &max_dev);
+       memset(&search, 0, sizeof(search));
+
+       devlist_to_replicas(devs, data_type, &search.e);
 
        rcu_read_lock();
-       ret = replicas_has_entry(rcu_dereference(c->replicas),
-                                search, max_dev);
+       ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
        rcu_read_unlock();
 
        return ret;
 }
 
 bool bch2_bkey_replicas_marked(struct bch_fs *c,
-                              enum bch_data_type data_type,
+                              enum bkey_type type,
                               struct bkey_s_c k)
 {
-       struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-       unsigned i;
+       struct bch_replicas_entry_padded search;
+       bool ret;
 
-       for (i = 0; i < cached.nr; i++)
-               if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
-                                         bch2_dev_list_single(cached.devs[i])))
-                       return false;
+       if (type == BKEY_TYPE_EXTENTS) {
+               struct bch_devs_list cached = bch2_bkey_cached_devs(k);
+               unsigned i;
 
-       return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
+               for (i = 0; i < cached.nr; i++)
+                       if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
+                                       bch2_dev_list_single(cached.devs[i])))
+                               return false;
+       }
+
+       bkey_to_replicas(type, k, &search.e);
+
+       if (!search.e.nr_devs)
+               return true;
+
+       rcu_read_lock();
+       ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
+       rcu_read_unlock();
+
+       return ret;
 }
 
 struct replicas_status __bch2_replicas_status(struct bch_fs *c,
                                              struct bch_devs_mask online_devs)
 {
        struct bch_sb_field_members *mi;
-       struct bch_replicas_cpu_entry *e;
+       struct bch_replicas_entry *e;
        struct bch_replicas_cpu *r;
-       unsigned i, dev, dev_slots, nr_online, nr_offline;
+       unsigned i, nr_online, nr_offline;
        struct replicas_status ret;
 
        memset(&ret, 0, sizeof(ret));
@@ -602,9 +601,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
 
        mi = bch2_sb_get_members(c->disk_sb.sb);
        rcu_read_lock();
-
        r = rcu_dereference(c->replicas);
-       dev_slots = replicas_dev_slots(r);
 
        for_each_cpu_replicas_entry(r, e) {
                if (e->data_type >= ARRAY_SIZE(ret.replicas))
@@ -612,13 +609,11 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
 
                nr_online = nr_offline = 0;
 
-               for (dev = 0; dev < dev_slots; dev++) {
-                       if (!replicas_test_dev(e, dev))
-                               continue;
-
-                       BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, dev));
+               for (i = 0; i < e->nr_devs; i++) {
+                       BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
+                                               e->devs[i]));
 
-                       if (test_bit(dev, online_devs.d))
+                       if (test_bit(e->devs[i], online_devs.d))
                                nr_online++;
                        else
                                nr_offline++;
@@ -677,20 +672,18 @@ unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
 
 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
 {
-       struct bch_replicas_cpu_entry *e;
+       struct bch_replicas_entry *e;
        struct bch_replicas_cpu *r;
-       unsigned ret = 0;
+       unsigned i, ret = 0;
 
        rcu_read_lock();
        r = rcu_dereference(c->replicas);
 
-       if (ca->dev_idx >= replicas_dev_slots(r))
-               goto out;
-
        for_each_cpu_replicas_entry(r, e)
-               if (replicas_test_dev(e, ca->dev_idx))
-                       ret |= 1 << e->data_type;
-out:
+               for (i = 0; i < e->nr_devs; i++)
+                       if (e->devs[i] == ca->dev_idx)
+                               ret |= 1 << e->data_type;
+
        rcu_read_unlock();
 
        return ret;
index 49f114b01c1eb3e2a2d512d9633f48dcb7faa661..640fe5b269753a89a553d4497a2f24be03e57207 100644 (file)
@@ -1,13 +1,15 @@
 #ifndef _BCACHEFS_REPLICAS_H
 #define _BCACHEFS_REPLICAS_H
 
+#include "replicas_types.h"
+
 bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
                          struct bch_devs_list);
-bool bch2_bkey_replicas_marked(struct bch_fs *, enum bch_data_type,
+bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type,
                               struct bkey_s_c);
 int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
                       struct bch_devs_list);
-int bch2_mark_bkey_replicas(struct bch_fs *, enum bch_data_type,
+int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type,
                            struct bkey_s_c);
 
 int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
@@ -33,11 +35,11 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
 
 /* iterate over superblock replicas - used by userspace tools: */
 
-static inline struct bch_replicas_entry *
-replicas_entry_next(struct bch_replicas_entry *i)
-{
-       return (void *) i + offsetof(struct bch_replicas_entry, devs) + i->nr;
-}
+#define replicas_entry_bytes(_i)                                       \
+       (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
+
+#define replicas_entry_next(_i)                                                \
+       ((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
 
 #define for_each_replicas_entry(_r, _i)                                        \
        for (_i = (_r)->entries;                                        \
diff --git a/libbcachefs/replicas_types.h b/libbcachefs/replicas_types.h
new file mode 100644 (file)
index 0000000..3061840
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef _BCACHEFS_REPLICAS_TYPES_H
+#define _BCACHEFS_REPLICAS_TYPES_H
+
+struct bch_replicas_cpu {
+       struct rcu_head         rcu;
+       unsigned                nr;
+       unsigned                entry_size;
+       struct bch_replicas_entry entries[];
+};
+
+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
index ab83ade959e42403e59ffcb1b2f30e4efaeb6f31..ebb238aa547231c1f6883e944fb5935ed28bc13c 100644 (file)
@@ -34,18 +34,6 @@ struct bch_member_cpu {
        u8                      valid;
 };
 
-struct bch_replicas_cpu_entry {
-       u8                      data_type;
-       u8                      devs[BCH_SB_MEMBERS_MAX / 8];
-};
-
-struct bch_replicas_cpu {
-       struct rcu_head         rcu;
-       unsigned                nr;
-       unsigned                entry_size;
-       struct bch_replicas_cpu_entry entries[];
-};
-
 struct bch_disk_group_cpu {
        bool                            deleted;
        u16                             parent;
index 3038b455209f973cf1958d4efa63e7ba84b96623..481269201389e0e5f9c1dfee12de15d66b71a3bf 100644 (file)
@@ -282,19 +282,19 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
                if (k.k->type == BCH_EXTENT) {
                        struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-                       const struct bch_extent_ptr *ptr;
-                       struct bch_extent_crc_unpacked crc;
+                       const union bch_extent_entry *entry;
+                       struct extent_ptr_decoded p;
 
-                       extent_for_each_ptr_crc(e, ptr, crc) {
-                               if (crc.compression_type == BCH_COMPRESSION_NONE) {
+                       extent_for_each_ptr_decode(e, p, entry) {
+                               if (p.crc.compression_type == BCH_COMPRESSION_NONE) {
                                        nr_uncompressed_extents++;
                                        uncompressed_sectors += e.k->size;
                                } else {
                                        nr_compressed_extents++;
                                        compressed_sectors_compressed +=
-                                               crc.compressed_size;
+                                               p.crc.compressed_size;
                                        compressed_sectors_uncompressed +=
-                                               crc.uncompressed_size;
+                                               p.crc.uncompressed_size;
                                }
 
                                /* only looking at the first ptr */
index 5cfaed5b305c12514bd5f01cb6910756d85bd55c..4df96ef0bcd0a30a1bb03df3088272f4c30a3cd4 100644 (file)
@@ -526,15 +526,17 @@ void bch2_bio_map(struct bio *bio, void *base)
 
        BUG_ON(!bio->bi_iter.bi_size);
        BUG_ON(bio->bi_vcnt);
+       BUG_ON(!bio->bi_max_vecs);
 
        bv->bv_offset = base ? offset_in_page(base) : 0;
        goto start;
 
        for (; size; bio->bi_vcnt++, bv++) {
+               BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
+
                bv->bv_offset   = 0;
 start:         bv->bv_len      = min_t(size_t, PAGE_SIZE - bv->bv_offset,
                                        size);
-               BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
                if (base) {
                        bv->bv_page = is_vmalloc_addr(base)
                                ? vmalloc_to_page(base)
index 178bf98308e882c6362508241a564b1859b3b15c..433ba9c14dcbdc7f45bc4b68535fd4db81ea5188 100644 (file)
@@ -83,6 +83,14 @@ struct closure;
        (__builtin_types_compatible_p(typeof(_val), _type) ||           \
         __builtin_types_compatible_p(typeof(_val), const _type))
 
+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
+static inline size_t buf_pages(void *p, size_t len)
+{
+       return DIV_ROUND_UP(len +
+                           ((unsigned long) p & (PAGE_SIZE - 1)),
+                           PAGE_SIZE);
+}
+
 static inline void vpfree(void *p, size_t size)
 {
        if (is_vmalloc_addr(p))
@@ -137,7 +145,19 @@ do {                                                                       \
        (heap)->data = NULL;                                            \
 } while (0)
 
-#define heap_swap(h, i, j)     swap((h)->data[i], (h)->data[j])
+#define heap_set_backpointer(h, i, _fn)                                        \
+do {                                                                   \
+       void (*fn)(typeof(h), size_t) = _fn;                            \
+       if (fn)                                                         \
+               fn(h, i);                                               \
+} while (0)
+
+#define heap_swap(h, i, j, set_backpointer)                            \
+do {                                                                   \
+       swap((h)->data[i], (h)->data[j]);                               \
+       heap_set_backpointer(h, i, set_backpointer);                    \
+       heap_set_backpointer(h, j, set_backpointer);                    \
+} while (0)
 
 #define heap_peek(h)                                                   \
 ({                                                                     \
@@ -147,7 +167,7 @@ do {                                                                        \
 
 #define heap_full(h)   ((h)->used == (h)->size)
 
-#define heap_sift_down(h, i, cmp)                                      \
+#define heap_sift_down(h, i, cmp, set_backpointer)                     \
 do {                                                                   \
        size_t _c, _j = i;                                              \
                                                                        \
@@ -159,72 +179,75 @@ do {                                                                      \
                                                                        \
                if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0)          \
                        break;                                          \
-               heap_swap(h, _c, _j);                                   \
+               heap_swap(h, _c, _j, set_backpointer);                  \
        }                                                               \
 } while (0)
 
-#define heap_sift_up(h, i, cmp)                                                \
+#define heap_sift_up(h, i, cmp, set_backpointer)                       \
 do {                                                                   \
        while (i) {                                                     \
                size_t p = (i - 1) / 2;                                 \
                if (cmp(h, (h)->data[i], (h)->data[p]) >= 0)            \
                        break;                                          \
-               heap_swap(h, i, p);                                     \
+               heap_swap(h, i, p, set_backpointer);                    \
                i = p;                                                  \
        }                                                               \
 } while (0)
 
-#define __heap_add(h, d, cmp)                                          \
-do {                                                                   \
+#define __heap_add(h, d, cmp, set_backpointer)                         \
+({                                                                     \
        size_t _i = (h)->used++;                                        \
        (h)->data[_i] = d;                                              \
+       heap_set_backpointer(h, _i, set_backpointer);                   \
                                                                        \
-       heap_sift_up(h, _i, cmp);                                       \
-} while (0)
+       heap_sift_up(h, _i, cmp, set_backpointer);                      \
+       _i;                                                             \
+})
 
-#define heap_add(h, d, cmp)                                            \
+#define heap_add(h, d, cmp, set_backpointer)                           \
 ({                                                                     \
        bool _r = !heap_full(h);                                        \
        if (_r)                                                         \
-               __heap_add(h, d, cmp);                                  \
+               __heap_add(h, d, cmp, set_backpointer);                 \
        _r;                                                             \
 })
 
-#define heap_add_or_replace(h, new, cmp)                               \
+#define heap_add_or_replace(h, new, cmp, set_backpointer)              \
 do {                                                                   \
-       if (!heap_add(h, new, cmp) &&                                   \
+       if (!heap_add(h, new, cmp, set_backpointer) &&                  \
            cmp(h, new, heap_peek(h)) >= 0) {                           \
                (h)->data[0] = new;                                     \
-               heap_sift_down(h, 0, cmp);                              \
+               heap_set_backpointer(h, 0, set_backpointer);            \
+               heap_sift_down(h, 0, cmp, set_backpointer);             \
        }                                                               \
 } while (0)
 
-#define heap_del(h, i, cmp)                                            \
+#define heap_del(h, i, cmp, set_backpointer)                           \
 do {                                                                   \
        size_t _i = (i);                                                \
                                                                        \
        BUG_ON(_i >= (h)->used);                                        \
        (h)->used--;                                                    \
-       heap_swap(h, _i, (h)->used);                                    \
-       heap_sift_up(h, _i, cmp);                                       \
-       heap_sift_down(h, _i, cmp);                                     \
+       heap_swap(h, _i, (h)->used, set_backpointer);                   \
+       heap_sift_up(h, _i, cmp, set_backpointer);                      \
+       heap_sift_down(h, _i, cmp, set_backpointer);                    \
 } while (0)
 
-#define heap_pop(h, d, cmp)                                            \
+#define heap_pop(h, d, cmp, set_backpointer)                           \
 ({                                                                     \
        bool _r = (h)->used;                                            \
        if (_r) {                                                       \
                (d) = (h)->data[0];                                     \
-               heap_del(h, 0, cmp);                                    \
+               heap_del(h, 0, cmp, set_backpointer);                   \
        }                                                               \
        _r;                                                             \
 })
 
-#define heap_resort(heap, cmp)                                         \
+#define heap_resort(heap, cmp, set_backpointer)                                \
 do {                                                                   \
        ssize_t _i;                                                     \
        for (_i = (ssize_t) (heap)->used / 2 -  1; _i >= 0; --_i)       \
-               heap_sift_down(heap, _i, cmp);                          \
+               heap_sift_down(heap, _i, cmp, set_backpointer);         \
 } while (0)
 
 #define ANYSINT_MAX(t)                                                 \