]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 2e70771b8d
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 24 Apr 2017 05:56:57 +0000 (21:56 -0800)
committerKent Overstreet <kent.overstreet@gmail.com>
Mon, 24 Apr 2017 06:00:36 +0000 (22:00 -0800)
47 files changed:
.bcachefs_revision
cmd_debug.c
cmd_migrate.c
include/linux/backing-dev.h
include/linux/bio.h
include/trace/events/bcachefs.h
libbcachefs/alloc.c
libbcachefs/alloc.h
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bset.c
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_io.h
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/btree_update.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/buckets_types.h
libbcachefs/clock.c
libbcachefs/clock_types.h
libbcachefs/debug.c
libbcachefs/dirent.c
libbcachefs/extents.c
libbcachefs/eytzinger.h
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/movinggc.c
libbcachefs/six.c
libbcachefs/str_hash.h
libbcachefs/super.c
libbcachefs/sysfs.c
libbcachefs/tier.c
libbcachefs/util.c
libbcachefs/util.h
libbcachefs/xattr.c
linux/bio.c

index 5f16f468e2b3d4d8a7a905edafb76688e6e197b9..b04d44bcaa1d07257910317a8b4fbea845d10d5d 100644 (file)
@@ -1 +1 @@
-846600a41b7853588796a5403b07347d36c5a65c
+2e70771b8dc0d0f2d0356a5a7d16cab9430cd49e
index d825753d011bf225e208d0c806db12686346af37..195e5885754472ebfe369a56d52f3b883ae42064 100644 (file)
@@ -160,7 +160,8 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
        struct bkey_s_c k;
        char buf[512];
 
-       for_each_btree_key(&iter, c, btree_id, start, k) {
+       for_each_btree_key(&iter, c, btree_id, start,
+                          BTREE_ITER_PREFETCH, k) {
                if (bkey_cmp(k.k->p, end) > 0)
                        break;
 
index a18aae10b5b7e107c7da51553fe373e814f83875..72cc004d0d42aba63dae1a8a9df74f28c1bed01f 100644 (file)
@@ -259,9 +259,7 @@ static void write_data(struct bch_fs *c,
 
        closure_init_stack(&cl);
 
-       bio_init(&bio.bio);
-       bio.bio.bi_max_vecs     = 1;
-       bio.bio.bi_io_vec       = &bv;
+       bio_init(&bio.bio, &bv, 1);
        bio.bio.bi_iter.bi_size = len;
        bch2_bio_map(&bio.bio, buf);
 
index a68fca4b08f6ba2501ceec9b71be4be47c1a8e7d..01b2c15301091309cbf6e69047c6d67517a76ac5 100644 (file)
@@ -9,6 +9,7 @@ enum wb_congested_state {
 };
 
 struct backing_dev_info {
+       struct list_head bdi_list;
        unsigned        ra_pages;
        unsigned        capabilities;
 
index 49d26b53fa103d54bc87acf892c7648599f9fcae..10cad5ccf74643bebe63fb39ebe3aab310167ade 100644 (file)
@@ -451,11 +451,15 @@ static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
        return bio_clone_bioset(bio, gfp_mask, NULL);
 }
 
-static inline void bio_init(struct bio *bio)
+static inline void bio_init(struct bio *bio, struct bio_vec *table,
+             unsigned short max_vecs)
 {
        memset(bio, 0, sizeof(*bio));
        atomic_set(&bio->__bi_remaining, 1);
        atomic_set(&bio->__bi_cnt, 1);
+
+       bio->bi_io_vec = table;
+       bio->bi_max_vecs = max_vecs;
 }
 
 #endif /* __LINUX_BIO_H */
index 7dea9d63e654dbc25ee3b96cca04a97e3f59b4ac..06cb5ff33b32a44d6ae736c9ccda94260ebf88f7 100644 (file)
@@ -90,8 +90,7 @@ DECLARE_EVENT_CLASS(bio,
                __entry->dev            = bio->bi_bdev->bd_dev;
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-                             bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
        ),
 
        TP_printk("%d,%d  %s %llu + %u",
@@ -156,8 +155,7 @@ TRACE_EVENT(write_throttle,
                __entry->inode          = inode;
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-                             bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
                __entry->delay          = delay;
        ),
 
index d5d2679f3e2219c55ada2b5a7e55704d9f816df7..a4e412ea9bf6ac3964db2a471f0432557a86cd01 100644 (file)
@@ -233,11 +233,8 @@ static void pd_controllers_update(struct work_struct *work)
 
 static int prio_io(struct bch_dev *ca, uint64_t bucket, int op)
 {
-       bio_init(ca->bio_prio);
-       bio_set_op_attrs(ca->bio_prio, op, REQ_SYNC|REQ_META);
-
-       ca->bio_prio->bi_max_vecs       = bucket_pages(ca);
-       ca->bio_prio->bi_io_vec         = ca->bio_prio->bi_inline_vecs;
+       bio_init(ca->bio_prio, ca->bio_prio->bi_inline_vecs, bucket_pages(ca));
+       ca->bio_prio->bi_opf            = op|REQ_SYNC|REQ_META;
        ca->bio_prio->bi_iter.bi_sector = bucket * ca->mi.bucket_size;
        ca->bio_prio->bi_bdev           = ca->disk_sb.bdev;
        ca->bio_prio->bi_iter.bi_size   = bucket_bytes(ca);
@@ -636,9 +633,10 @@ static inline bool can_inc_bucket_gen(struct bch_dev *ca, struct bucket *g)
        return bucket_gc_gen(ca, g) < BUCKET_GC_GEN_MAX;
 }
 
-static bool bch2_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g)
+static bool bch2_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g,
+                                      struct bucket_mark mark)
 {
-       if (!is_available_bucket(READ_ONCE(g->mark)))
+       if (!is_available_bucket(mark))
                return false;
 
        if (bucket_gc_gen(ca, g) >= BUCKET_GC_GEN_MAX - 1)
@@ -679,24 +677,38 @@ static void bch2_invalidate_one_bucket(struct bch_dev *ca, struct bucket *g)
  *   btree GC to rewrite nodes with stale pointers.
  */
 
-#define bucket_sort_key(g)                                             \
-({                                                                     \
-       unsigned long prio = g->read_prio - ca->min_prio[READ];         \
-       prio = (prio * 7) / (ca->fs->prio_clock[READ].hand -            \
-                            ca->min_prio[READ]);                       \
-                                                                       \
-       (((prio + 1) * bucket_sectors_used(g)) << 8) | bucket_gc_gen(ca, g);\
-})
+static unsigned long bucket_sort_key(bucket_heap *h,
+                                    struct bucket_heap_entry e)
+{
+       struct bch_dev *ca = container_of(h, struct bch_dev, alloc_heap);
+       struct bucket *g = ca->buckets + e.bucket;
+       unsigned long prio = g->read_prio - ca->min_prio[READ];
+       prio = (prio * 7) / (ca->fs->prio_clock[READ].hand -
+                            ca->min_prio[READ]);
+
+       return (prio + 1) * bucket_sectors_used(e.mark);
+}
+
+static inline int bucket_alloc_cmp(bucket_heap *h,
+                                  struct bucket_heap_entry l,
+                                  struct bucket_heap_entry r)
+{
+       return bucket_sort_key(h, l) - bucket_sort_key(h, r);
+}
+
+static inline long bucket_idx_cmp(bucket_heap *h,
+                                 struct bucket_heap_entry l,
+                                 struct bucket_heap_entry r)
+{
+       return l.bucket - r.bucket;
+}
 
 static void invalidate_buckets_lru(struct bch_dev *ca)
 {
        struct bucket_heap_entry e;
        struct bucket *g;
-       unsigned i;
-
-       mutex_lock(&ca->heap_lock);
 
-       ca->heap.used = 0;
+       ca->alloc_heap.used = 0;
 
        mutex_lock(&ca->fs->bucket_lock);
        bch2_recalc_min_prio(ca, READ);
@@ -708,37 +720,32 @@ static void invalidate_buckets_lru(struct bch_dev *ca)
         * all buckets have been visited.
         */
        for_each_bucket(g, ca) {
-               if (!bch2_can_invalidate_bucket(ca, g))
+               struct bucket_mark m = READ_ONCE(g->mark);
+               struct bucket_heap_entry e = { g - ca->buckets, m };
+
+               if (!bch2_can_invalidate_bucket(ca, g, m))
                        continue;
 
-               bucket_heap_push(ca, g, bucket_sort_key(g));
+               heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
        }
 
        /* Sort buckets by physical location on disk for better locality */
-       for (i = 0; i < ca->heap.used; i++) {
-               struct bucket_heap_entry *e = &ca->heap.data[i];
-
-               e->val = e->g - ca->buckets;
-       }
-
-       heap_resort(&ca->heap, bucket_max_cmp);
+       heap_resort(&ca->alloc_heap, bucket_idx_cmp);
 
        /*
         * If we run out of buckets to invalidate, bch2_allocator_thread() will
         * kick stuff and retry us
         */
        while (!fifo_full(&ca->free_inc) &&
-              heap_pop(&ca->heap, e, bucket_max_cmp)) {
-               BUG_ON(!bch2_can_invalidate_bucket(ca, e.g));
-               bch2_invalidate_one_bucket(ca, e.g);
-       }
+              heap_pop(&ca->alloc_heap, e, bucket_idx_cmp))
+               bch2_invalidate_one_bucket(ca, &ca->buckets[e.bucket]);
 
        mutex_unlock(&ca->fs->bucket_lock);
-       mutex_unlock(&ca->heap_lock);
 }
 
 static void invalidate_buckets_fifo(struct bch_dev *ca)
 {
+       struct bucket_mark m;
        struct bucket *g;
        size_t checked = 0;
 
@@ -748,8 +755,9 @@ static void invalidate_buckets_fifo(struct bch_dev *ca)
                        ca->fifo_last_bucket = ca->mi.first_bucket;
 
                g = ca->buckets + ca->fifo_last_bucket++;
+               m = READ_ONCE(g->mark);
 
-               if (bch2_can_invalidate_bucket(ca, g))
+               if (bch2_can_invalidate_bucket(ca, g, m))
                        bch2_invalidate_one_bucket(ca, g);
 
                if (++checked >= ca->mi.nbuckets)
@@ -759,6 +767,7 @@ static void invalidate_buckets_fifo(struct bch_dev *ca)
 
 static void invalidate_buckets_random(struct bch_dev *ca)
 {
+       struct bucket_mark m;
        struct bucket *g;
        size_t checked = 0;
 
@@ -768,8 +777,9 @@ static void invalidate_buckets_random(struct bch_dev *ca)
                        ca->mi.first_bucket;
 
                g = ca->buckets + n;
+               m = READ_ONCE(g->mark);
 
-               if (bch2_can_invalidate_bucket(ca, g))
+               if (bch2_can_invalidate_bucket(ca, g, m))
                        bch2_invalidate_one_bucket(ca, g);
 
                if (++checked >= ca->mi.nbuckets / 2)
index c6b57fa1a7f63defde7936c46c3d02669dc0e33d..195108c20b4c18b9d9fe712b519e5fefbaed1412 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _BCACHE_ALLOC_H
 #define _BCACHE_ALLOC_H
 
+#include "bcachefs.h"
 #include "alloc_types.h"
 
 struct bkey;
index b1f2528a329bb5b0234e01fbbe8e1362b0923c75..6259b50e4e7cc44fc11623d8fef28d8a0c3dd763 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef _BCACHE_H
-#define _BCACHE_H
+#ifndef _BCACHEFS_H
+#define _BCACHEFS_H
 
 /*
  * SOME HIGH LEVEL CODE DOCUMENTATION:
@@ -418,8 +418,8 @@ struct bch_dev {
        atomic_long_t           saturated_count;
        size_t                  inc_gen_needs_gc;
 
-       struct mutex            heap_lock;
-       DECLARE_HEAP(struct bucket_heap_entry, heap);
+       bucket_heap             alloc_heap;
+       bucket_heap             copygc_heap;
 
        /* Moving GC: */
        struct task_struct      *moving_gc_read;
@@ -803,4 +803,4 @@ static inline unsigned block_bytes(const struct bch_fs *c)
        return c->sb.block_size << 9;
 }
 
-#endif /* _BCACHE_H */
+#endif /* _BCACHEFS_H */
index a99d96cd6297a92f251a7ce26318a96babc3b212..ef854fb1c36839e58cd09162e42c9dd51ec44ae3 100644 (file)
@@ -1,15 +1,10 @@
-#ifndef _LINUX_BCACHE_H
-#define _LINUX_BCACHE_H
+#ifndef _BCACHEFS_FORMAT_H
+#define _BCACHEFS_FORMAT_H
 
 /*
  * Bcache on disk data structures
  */
 
-#ifdef __cplusplus
-typedef bool _Bool;
-extern "C" {
-#endif
-
 #include <asm/types.h>
 #include <asm/byteorder.h>
 #include <linux/uuid.h>
@@ -230,8 +225,6 @@ struct bkey_i {
        };
 };
 
-#ifndef __cplusplus
-
 #define KEY(_inode, _offset, _size)                                    \
 ((struct bkey) {                                                       \
        .u64s           = BKEY_U64s,                                    \
@@ -240,24 +233,6 @@ struct bkey_i {
        .size           = _size,                                        \
 })
 
-#else
-
-static inline struct bkey KEY(__u64 inode, __u64 offset, __u64 size)
-{
-       struct bkey ret;
-
-       memset(&ret, 0, sizeof(ret));
-       ret.u64s        = BKEY_U64s;
-       ret.format      = KEY_FORMAT_CURRENT;
-       ret.p.inode     = inode;
-       ret.p.offset    = offset;
-       ret.size        = size;
-
-       return ret;
-}
-
-#endif
-
 static inline void bkey_init(struct bkey *k)
 {
        *k = KEY(0, 0, 0);
@@ -1344,9 +1319,4 @@ struct btree_node_entry {
        };
 } __attribute__((packed, aligned(8)));
 
-#ifdef __cplusplus
-}
-#endif
-#endif /* _LINUX_BCACHE_H */
-
-/* vim: set foldnestmax=2: */
+#endif /* _BCACHEFS_FORMAT_H */
index 280dcf3e14795eb162022e56d461e2aa5c16e3f7..53627380d81aa893f54df733f03c0eda7355252d 100644 (file)
@@ -473,7 +473,7 @@ void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
  * in one cacheline in t->set (BSET_CACHELINE bytes).
  *
  * This means we don't have to store the full index of the key that a node in
- * the binary tree points to; eytzinger_to_inorder() gives us the cacheline, and
+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
  * then bkey_float->m gives us the offset within that cacheline, in units of 8
  * bytes.
  *
@@ -534,7 +534,7 @@ static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
                                               unsigned j)
 {
        return cacheline_to_bkey(b, t,
-                       __eytzinger_to_inorder(j, t->size, t->extra),
+                       __eytzinger1_to_inorder(j, t->size, t->extra),
                        bkey_float(b, t, j)->key_offset);
 }
 
@@ -882,7 +882,7 @@ retry:
        t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
 
        /* First we figure out where the first key in each cacheline is */
-       eytzinger_for_each(j, t->size) {
+       eytzinger1_for_each(j, t->size) {
                while (bkey_to_cacheline(b, t, k) < cacheline)
                        prev = k, k = bkey_next(k);
 
@@ -905,7 +905,7 @@ retry:
        t->max_key = bkey_unpack_pos(b, k);
 
        /* Then we build the tree */
-       eytzinger_for_each(j, t->size)
+       eytzinger1_for_each(j, t->size)
                make_bfloat(b, t, j, &min_key, &max_key);
 }
 
@@ -996,7 +996,7 @@ static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
 
                do {
                        p = j ? tree_to_bkey(b, t,
-                                       __inorder_to_eytzinger(j--,
+                                       __inorder_to_eytzinger1(j--,
                                                        t->size, t->extra))
                              : btree_bkey_first(b, t);
                } while (p >= k);
@@ -1087,30 +1087,30 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
 
        if (inorder &&
            inorder < t->size) {
-               j = __inorder_to_eytzinger(inorder, t->size, t->extra);
+               j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
 
                if (k == tree_to_bkey(b, t, j)) {
                        /* Fix the node this key corresponds to */
                        make_bfloat(b, t, j, &min_key, &max_key);
 
                        /* Children for which this key is the right boundary */
-                       for (j = eytzinger_left_child(j);
+                       for (j = eytzinger1_left_child(j);
                             j < t->size;
-                            j = eytzinger_right_child(j))
+                            j = eytzinger1_right_child(j))
                                make_bfloat(b, t, j, &min_key, &max_key);
                }
        }
 
        if (inorder + 1 < t->size) {
-               j = __inorder_to_eytzinger(inorder + 1, t->size, t->extra);
+               j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra);
 
                if (k == tree_to_prev_bkey(b, t, j)) {
                        make_bfloat(b, t, j, &min_key, &max_key);
 
                        /* Children for which this key is the left boundary */
-                       for (j = eytzinger_right_child(j);
+                       for (j = eytzinger1_right_child(j);
                             j < t->size;
-                            j = eytzinger_left_child(j))
+                            j = eytzinger1_left_child(j))
                                make_bfloat(b, t, j, &min_key, &max_key);
                }
        }
@@ -1331,7 +1331,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
                        p = bkey_float_get(base, n << 4);
                        prefetch(p);
                } else if (n << 3 < t->size) {
-                       inorder = __eytzinger_to_inorder(n, t->size, t->extra);
+                       inorder = __eytzinger1_to_inorder(n, t->size, t->extra);
                        p = bset_cacheline(b, t, inorder);
 #ifdef CONFIG_X86_64
                        asm(".intel_syntax noprefix;"
@@ -1362,7 +1362,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
                                                &search, packed_search, n);
        } while (n < t->size);
 
-       inorder = __eytzinger_to_inorder(n >> 1, t->size, t->extra);
+       inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra);
 
        /*
         * n would have been the node we recursed to - the low bit tells us if
@@ -1372,7 +1372,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
                return cacheline_to_bkey(b, t, inorder, f->key_offset);
        } else {
                if (--inorder) {
-                       n = eytzinger_prev(n >> 1, t->size);
+                       n = eytzinger1_prev(n >> 1, t->size);
                        f = bkey_float_get(base, n);
                        return cacheline_to_bkey(b, t, inorder, f->key_offset);
                } else
@@ -1790,7 +1790,7 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
        if (!bset_has_ro_aux_tree(t))
                goto out;
 
-       j = __inorder_to_eytzinger(bkey_to_cacheline(b, t, k), t->size, t->extra);
+       j = __inorder_to_eytzinger1(bkey_to_cacheline(b, t, k), t->size, t->extra);
        if (j &&
            j < t->size &&
            k == tree_to_bkey(b, t, j))
index c37c8959d02ba511c277174481a363d5f6e045da..bdbe21accfc0e7886831c1754331312b4cede1a9 100644 (file)
@@ -163,10 +163,14 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
                goto out_unlock;
 
        if (btree_node_dirty(b) ||
-           btree_node_write_in_flight(b)) {
+           btree_node_write_in_flight(b) ||
+           btree_node_read_in_flight(b)) {
                if (!flush)
                        goto out_unlock;
 
+               wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+                              TASK_UNINTERRUPTIBLE);
+
                /*
                 * Using the underscore version because we don't want to compact
                 * bsets after the write, since this node is about to be evicted
@@ -582,7 +586,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_iter *iter,
        if (btree_node_read_locked(iter, level + 1))
                btree_node_unlock(iter, level + 1);
 
-       bch2_btree_node_read(c, b);
+       bch2_btree_node_read(c, b, true);
        six_unlock_write(&b->lock);
 
        if (lock_type == SIX_LOCK_read)
@@ -673,6 +677,9 @@ retry:
                }
        }
 
+       wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+                      TASK_UNINTERRUPTIBLE);
+
        prefetch(b->aux_data);
 
        for_each_bset(b, t) {
@@ -700,6 +707,44 @@ retry:
        return b;
 }
 
+void bch2_btree_node_prefetch(struct btree_iter *iter,
+                             const struct bkey_i *k, unsigned level)
+{
+       struct bch_fs *c = iter->c;
+       struct btree *b;
+
+       BUG_ON(level >= BTREE_MAX_DEPTH);
+
+       rcu_read_lock();
+       b = mca_find(c, k);
+       rcu_read_unlock();
+
+       if (b)
+               return;
+
+       b = bch2_btree_node_mem_alloc(c);
+       if (IS_ERR(b))
+               return;
+
+       bkey_copy(&b->key, k);
+       if (bch2_btree_node_hash_insert(c, b, level, iter->btree_id)) {
+               /* raced with another fill: */
+
+               /* mark as unhashed... */
+               bkey_i_to_extent(&b->key)->v._data[0] = 0;
+
+               mutex_lock(&c->btree_cache_lock);
+               list_add(&b->list, &c->btree_cache_freeable);
+               mutex_unlock(&c->btree_cache_lock);
+               goto out;
+       }
+
+       bch2_btree_node_read(c, b, false);
+out:
+       six_unlock_write(&b->lock);
+       six_unlock_intent(&b->lock);
+}
+
 int bch2_print_btree_node(struct bch_fs *c, struct btree *b,
                          char *buf, size_t len)
 {
index 23f637ab64cd4fe000da2d2f94dc594c6831f4f8..ca8e3195203e12cef752b63a692db5e0a8bc15c1 100644 (file)
@@ -22,6 +22,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
 struct btree *bch2_btree_node_get(struct btree_iter *, const struct bkey_i *,
                                  unsigned, enum six_lock_type);
 
+void bch2_btree_node_prefetch(struct btree_iter *, const struct bkey_i *,
+                             unsigned);
+
 void bch2_fs_btree_exit(struct bch_fs *);
 int bch2_fs_btree_init(struct bch_fs *);
 
index 88ae396782d6a1a8d1d6b77a974668081774e81f..99d28f649f4aaac7df86b48a35c21d9d05b789c2 100644 (file)
@@ -225,7 +225,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
 
        btree_node_range_checks_init(&r, depth);
 
-       for_each_btree_node(&iter, c, btree_id, POS_MIN, depth, b) {
+       __for_each_btree_node(&iter, c, btree_id, POS_MIN,
+                             0, depth, BTREE_ITER_PREFETCH, b) {
                btree_node_range_checks(c, b, &r);
 
                bch2_verify_btree_nr_keys(b);
@@ -779,7 +780,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
         */
        memset(merge, 0, sizeof(merge));
 
-       __for_each_btree_node(&iter, c, btree_id, POS_MIN, 0, b, U8_MAX) {
+       __for_each_btree_node(&iter, c, btree_id, POS_MIN,
+                             U8_MAX, 0, BTREE_ITER_PREFETCH, b) {
                memmove(merge + 1, merge,
                        sizeof(merge) - sizeof(merge[0]));
                memmove(lock_seq + 1, lock_seq,
@@ -952,7 +954,7 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
         * We have to hit every btree node before starting journal replay, in
         * order for the journal seq blacklist machinery to work:
         */
-       for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
+       for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
                btree_node_range_checks(c, b, &r);
 
                if (btree_node_has_ptrs(b)) {
index 82dd196d6cddd12ed78f3fa298904d478e3312de..541fffb65a53d8cd7dddb6d0e855458282c982cb 100644 (file)
@@ -1196,6 +1196,8 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
 
        btree_node_reset_sib_u64s(b);
 out:
+       clear_btree_node_read_in_flight(b);
+       wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
        mempool_free(iter, &c->fill_iter);
        return;
 err:
@@ -1206,13 +1208,48 @@ fsck_err:
        goto out;
 }
 
-void bch2_btree_node_read(struct bch_fs *c, struct btree *b)
+static void btree_node_read_work(struct work_struct *work)
+{
+       struct btree_read_bio *rb =
+               container_of(work, struct btree_read_bio, work);
+
+       bch2_btree_node_read_done(rb->c, rb->bio.bi_private,
+                                 rb->pick.ca, &rb->pick.ptr);
+
+       percpu_ref_put(&rb->pick.ca->io_ref);
+       bio_put(&rb->bio);
+}
+
+static void btree_node_read_endio(struct bio *bio)
+{
+       struct btree *b = bio->bi_private;
+       struct btree_read_bio *rb =
+               container_of(bio, struct btree_read_bio, bio);
+
+       if (bch2_dev_fatal_io_err_on(bio->bi_error,
+                       rb->pick.ca, "IO error reading bucket %zu",
+                       PTR_BUCKET_NR(rb->pick.ca, &rb->pick.ptr)) ||
+           bch2_meta_read_fault("btree")) {
+               set_btree_node_read_error(b);
+               percpu_ref_put(&rb->pick.ca->io_ref);
+               bio_put(bio);
+               return;
+       }
+
+       INIT_WORK(&rb->work, btree_node_read_work);
+       schedule_work(&rb->work);
+}
+
+void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
+                         bool sync)
 {
        uint64_t start_time = local_clock();
-       struct bio *bio;
        struct extent_pick_ptr pick;
+       struct btree_read_bio *rb;
+       struct bio *bio;
 
        trace_btree_read(c, b);
+       set_btree_node_read_in_flight(b);
 
        pick = bch2_btree_pick_ptr(c, b);
        if (bch2_fs_fatal_err_on(!pick.ca, c,
@@ -1222,27 +1259,36 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b)
        }
 
        bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio);
+       rb = container_of(bio, struct btree_read_bio, bio);
+       rb->c                   = c;
+       rb->pick                = pick;
+       bio->bi_opf             = REQ_OP_READ|REQ_SYNC|REQ_META;
        bio->bi_bdev            = pick.ca->disk_sb.bdev;
        bio->bi_iter.bi_sector  = pick.ptr.offset;
        bio->bi_iter.bi_size    = btree_bytes(c);
-       bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
        bch2_bio_map(bio, b->data);
 
-       submit_bio_wait(bio);
+       if (sync) {
+               submit_bio_wait(bio);
 
-       if (bch2_dev_fatal_io_err_on(bio->bi_error,
-                                 pick.ca, "IO error reading bucket %zu",
-                                 PTR_BUCKET_NR(pick.ca, &pick.ptr)) ||
-           bch2_meta_read_fault("btree")) {
-               set_btree_node_read_error(b);
-               goto out;
-       }
+               if (bch2_dev_fatal_io_err_on(bio->bi_error,
+                               pick.ca, "IO error reading bucket %zu",
+                               PTR_BUCKET_NR(pick.ca, &pick.ptr)) ||
+                   bch2_meta_read_fault("btree")) {
+                       set_btree_node_read_error(b);
+                       goto out;
+               }
 
-       bch2_btree_node_read_done(c, b, pick.ca, &pick.ptr);
-       bch2_time_stats_update(&c->btree_read_time, start_time);
+               bch2_btree_node_read_done(c, b, pick.ca, &pick.ptr);
+               bch2_time_stats_update(&c->btree_read_time, start_time);
 out:
-       bio_put(bio);
-       percpu_ref_put(&pick.ca->io_ref);
+               bio_put(bio);
+               percpu_ref_put(&pick.ca->io_ref);
+       } else {
+               bio->bi_end_io  = btree_node_read_endio;
+               bio->bi_private = b;
+               submit_bio(bio);
+       }
 }
 
 int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
@@ -1267,7 +1313,7 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
        bkey_copy(&b->key, k);
        BUG_ON(bch2_btree_node_hash_insert(c, b, level, id));
 
-       bch2_btree_node_read(c, b);
+       bch2_btree_node_read(c, b, true);
        six_unlock_write(&b->lock);
 
        if (btree_node_read_error(b)) {
@@ -1557,10 +1603,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
        wbio->put_bio           = true;
        wbio->order             = order;
        wbio->used_mempool      = used_mempool;
+       bio->bi_opf             = REQ_OP_WRITE|REQ_META|REQ_FUA;
        bio->bi_iter.bi_size    = sectors_to_write << 9;
        bio->bi_end_io          = btree_node_write_endio;
        bio->bi_private         = b;
-       bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA);
 
        if (parent)
                closure_get(parent);
index d023dfae6d9786b5ee47c789a74000b907274fc5..7333f3052c65f2db03b410ff36f54763cc78efdc 100644 (file)
@@ -1,11 +1,20 @@
 #ifndef _BCACHE_BTREE_IO_H
 #define _BCACHE_BTREE_IO_H
 
+#include "extents.h"
+
 struct bch_fs;
 struct btree_write;
 struct btree;
 struct btree_iter;
 
+struct btree_read_bio {
+       struct bch_fs           *c;
+       struct extent_pick_ptr  pick;
+       struct work_struct      work;
+       struct bio              bio;
+};
+
 static inline void btree_node_io_unlock(struct btree *b)
 {
        EBUG_ON(!btree_node_write_in_flight(b));
@@ -64,7 +73,7 @@ void bch2_btree_init_next(struct bch_fs *, struct btree *,
 
 void bch2_btree_node_read_done(struct bch_fs *, struct btree *,
                              struct bch_dev *, const struct bch_extent_ptr *);
-void bch2_btree_node_read(struct bch_fs *, struct btree *);
+void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
 int bch2_btree_root_read(struct bch_fs *, enum btree_id,
                        const struct bkey_i *, unsigned);
 
index 0b28082e670ceb66baf80043a528f6975931671f..e5da186bfbbcfb89a07bb4bac9274617b73b1d8d 100644 (file)
@@ -161,8 +161,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                 */
                if (type == SIX_LOCK_intent &&
                    linked->nodes_locked != linked->nodes_intent_locked) {
-                       linked->locks_want = max(linked->locks_want,
-                                                iter->locks_want);
+                       linked->locks_want = max_t(unsigned,
+                                                  linked->locks_want,
+                                                  iter->locks_want);
                        return false;
                }
 
@@ -177,8 +178,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                 */
                if (linked->btree_id == iter->btree_id &&
                    level > __fls(linked->nodes_locked)) {
-                       linked->locks_want = max(linked->locks_want,
-                                                iter->locks_want);
+                       linked->locks_want = max_t(unsigned,
+                                                  linked->locks_want,
+                                                  iter->locks_want);
                        return false;
                }
        }
@@ -247,12 +249,10 @@ fail:
 
 static int __bch2_btree_iter_unlock(struct btree_iter *iter)
 {
-       BUG_ON(iter->error == -EINTR);
-
        while (iter->nodes_locked)
                btree_node_unlock(iter, __ffs(iter->nodes_locked));
 
-       return iter->error;
+       return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
 }
 
 int bch2_btree_iter_unlock(struct btree_iter *iter)
@@ -285,7 +285,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
                ? bch2_btree_node_iter_prev(&tmp, b)
                : bch2_btree_node_iter_prev_all(&tmp, b);
        if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k,
-                                          iter->is_extents)) {
+                               iter->flags & BTREE_ITER_IS_EXTENTS)) {
                char buf[100];
                struct bkey uk = bkey_unpack_key(b, k);
 
@@ -296,7 +296,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
 
        k = bch2_btree_node_iter_peek_all(node_iter, b);
        if (k && !btree_iter_pos_cmp_packed(b, &iter->pos, k,
-                                           iter->is_extents)) {
+                               iter->flags & BTREE_ITER_IS_EXTENTS)) {
                char buf[100];
                struct bkey uk = bkey_unpack_key(b, k);
 
@@ -340,7 +340,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
        /* didn't find the bset in the iterator - might have to readd it: */
        if (new_u64s &&
            btree_iter_pos_cmp_packed(b, &iter->pos, where,
-                                     iter->is_extents))
+                                     iter->flags & BTREE_ITER_IS_EXTENTS))
                bch2_btree_node_iter_push(node_iter, b, where, end);
        return;
 found:
@@ -352,7 +352,7 @@ found:
 
        if (new_u64s &&
            btree_iter_pos_cmp_packed(b, &iter->pos, where,
-                                     iter->is_extents)) {
+                               iter->flags & BTREE_ITER_IS_EXTENTS)) {
                set->k = offset;
                bch2_btree_node_iter_sort(node_iter, b);
        } else if (set->k < offset + clobber_u64s) {
@@ -388,7 +388,7 @@ found:
         */
        if (b->level && new_u64s && !bkey_deleted(where) &&
            btree_iter_pos_cmp_packed(b, &iter->pos, where,
-                                     iter->is_extents)) {
+                               iter->flags & BTREE_ITER_IS_EXTENTS)) {
                struct bset_tree *t;
                struct bkey_packed *k;
 
@@ -535,9 +535,9 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
 static inline void __btree_iter_init(struct btree_iter *iter,
                                     struct btree *b)
 {
-       bch2_btree_node_iter_init(&iter->node_iters[b->level], b,
-                                iter->pos, iter->is_extents,
-                                btree_node_is_extents(b));
+       bch2_btree_node_iter_init(&iter->node_iters[b->level], b, iter->pos,
+                                 iter->flags & BTREE_ITER_IS_EXTENTS,
+                                 btree_node_is_extents(b));
 
        /* Skip to first non whiteout: */
        if (b->level)
@@ -549,7 +549,8 @@ static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
 {
        return iter->btree_id == b->btree_id &&
                bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
-               btree_iter_pos_cmp(iter->pos, &b->key.k, iter->is_extents);
+               btree_iter_pos_cmp(iter->pos, &b->key.k,
+                                  iter->flags & BTREE_ITER_IS_EXTENTS);
 }
 
 static inline void btree_iter_node_set(struct btree_iter *iter,
@@ -695,6 +696,26 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
        }
 }
 
+noinline
+static void btree_iter_prefetch(struct btree_iter *iter)
+{
+       struct btree *b = iter->nodes[iter->level + 1];
+       struct btree_node_iter node_iter = iter->node_iters[iter->level + 1];
+       struct bkey_packed *k;
+       BKEY_PADDED(k) tmp;
+       unsigned nr = iter->level ? 1 : 8;
+
+       while (nr) {
+               bch2_btree_node_iter_advance(&node_iter, b);
+               k = bch2_btree_node_iter_peek(&node_iter, b);
+               if (!k)
+                       break;
+
+               bch2_bkey_unpack(b, &tmp.k, k);
+               bch2_btree_node_prefetch(iter, &tmp.k, iter->level);
+       }
+}
+
 static inline int btree_iter_down(struct btree_iter *iter)
 {
        struct btree *b;
@@ -712,6 +733,10 @@ static inline int btree_iter_down(struct btree_iter *iter)
        iter->level = level;
        mark_btree_node_locked(iter, level, lock_type);
        btree_iter_node_set(iter, b);
+
+       if (iter->flags & BTREE_ITER_PREFETCH)
+               btree_iter_prefetch(iter);
+
        return 0;
 }
 
@@ -791,7 +816,7 @@ out:
 io_error:
        BUG_ON(ret != -EIO);
 
-       iter->error = ret;
+       iter->flags |= BTREE_ITER_ERROR;
        iter->nodes[iter->level] = NULL;
        goto out;
 }
@@ -834,7 +859,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
                 bch2_btree_node_relock(iter, iter->level) &&
                 btree_iter_pos_cmp(iter->pos,
                                    &iter->nodes[iter->level]->key.k,
-                                   iter->is_extents)))
+                                   iter->flags & BTREE_ITER_IS_EXTENTS)))
                btree_iter_up(iter);
 
        /*
@@ -845,7 +870,8 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
                struct bkey_s_c k;
 
                while ((k = __btree_iter_peek_all(iter)).k &&
-                      !btree_iter_pos_cmp(iter->pos, k.k, iter->is_extents))
+                      !btree_iter_pos_cmp(iter->pos, k.k,
+                                          iter->flags & BTREE_ITER_IS_EXTENTS))
                        __btree_iter_advance(iter);
        }
 
@@ -875,7 +901,7 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
        if (unlikely(!iter->nodes[iter->level]))
                return 0;
 
-       iter->at_end_of_leaf = false;
+       iter->flags &= ~BTREE_ITER_AT_END_OF_LEAF;
 
        ret = __bch2_btree_iter_traverse(iter);
        if (unlikely(ret))
@@ -891,7 +917,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
        struct btree *b;
        int ret;
 
-       EBUG_ON(iter->is_extents);
+       EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
 
        ret = bch2_btree_iter_traverse(iter);
        if (ret)
@@ -912,7 +938,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
        struct btree *b;
        int ret;
 
-       EBUG_ON(iter->is_extents);
+       EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
 
        btree_iter_up(iter);
 
@@ -964,12 +990,13 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
 
        while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
               !btree_iter_pos_cmp_packed(b, &new_pos, k,
-                                         iter->is_extents))
+                                         iter->flags & BTREE_ITER_IS_EXTENTS))
                bch2_btree_node_iter_advance(node_iter, b);
 
        if (!k &&
-           !btree_iter_pos_cmp(new_pos, &b->key.k, iter->is_extents))
-               iter->at_end_of_leaf = true;
+           !btree_iter_pos_cmp(new_pos, &b->key.k,
+                               iter->flags & BTREE_ITER_IS_EXTENTS))
+               iter->flags |= BTREE_ITER_AT_END_OF_LEAF;
 
        iter->pos = new_pos;
 }
@@ -1006,6 +1033,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
        struct bkey_s_c k;
        int ret;
 
+       EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
+               (iter->btree_id == BTREE_ID_EXTENTS));
+
        while (1) {
                ret = bch2_btree_iter_traverse(iter);
                if (unlikely(ret)) {
@@ -1019,7 +1049,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                         * iter->pos should always be equal to the key we just
                         * returned - except extents can straddle iter->pos:
                         */
-                       if (!iter->is_extents ||
+                       if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
                            bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
                                bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
                        return k;
@@ -1043,6 +1073,9 @@ struct bkey_s_c bch2_btree_iter_peek_with_holes(struct btree_iter *iter)
        struct bkey n;
        int ret;
 
+       EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
+               (iter->btree_id == BTREE_ID_EXTENTS));
+
        while (1) {
                ret = bch2_btree_iter_traverse(iter);
                if (unlikely(ret)) {
@@ -1057,7 +1090,7 @@ recheck:
                        bkey_init(&n);
                        n.p = iter->pos;
 
-                       if (iter->is_extents) {
+                       if (iter->flags & BTREE_ITER_IS_EXTENTS) {
                                if (n.p.offset == KEY_OFFSET_MAX) {
                                        iter->pos = bkey_successor(iter->pos);
                                        goto recheck;
@@ -1087,21 +1120,18 @@ recheck:
 }
 
 void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
-                          enum btree_id btree_id, struct bpos pos,
-                          unsigned locks_want, unsigned depth)
+                           enum btree_id btree_id, struct bpos pos,
+                           unsigned locks_want, unsigned depth,
+                           unsigned flags)
 {
+       iter->c                         = c;
+       iter->pos                       = pos;
+       iter->flags                     = flags;
+       iter->btree_id                  = btree_id;
        iter->level                     = depth;
-       /* bch2_bkey_ops isn't used much, this would be a cache miss */
-       /* iter->is_extents             = bch2_bkey_ops[btree_id]->is_extents; */
-       iter->is_extents                = btree_id == BTREE_ID_EXTENTS;
+       iter->locks_want                = min(locks_want, BTREE_MAX_DEPTH);
        iter->nodes_locked              = 0;
        iter->nodes_intent_locked       = 0;
-       iter->locks_want                = min(locks_want, BTREE_MAX_DEPTH);
-       iter->btree_id                  = btree_id;
-       iter->at_end_of_leaf            = 0;
-       iter->error                     = 0;
-       iter->c                         = c;
-       iter->pos                       = pos;
        memset(iter->nodes, 0, sizeof(iter->nodes));
        iter->nodes[iter->level]        = BTREE_ITER_NOT_END;
        iter->next                      = iter;
index 7cf9bd633e3d7da08a0d4086a4ce85f748b7459a..57f38765f72eaba5a48a24fc00eace1739f98c2f 100644 (file)
@@ -3,37 +3,38 @@
 
 #include "btree_types.h"
 
-struct btree_iter {
-       /* Current btree depth */
-       u8                      level;
-
-       /*
-        * Used in bch2_btree_iter_traverse(), to indicate whether we're
-        * searching for @pos or the first key strictly greater than @pos
-        */
-       u8                      is_extents;
 
-       /* Bitmasks for read/intent locks held per level */
-       u8                      nodes_locked;
-       u8                      nodes_intent_locked;
+#define BTREE_ITER_INTENT              (1 << 0)
+#define BTREE_ITER_WITH_HOLES          (1 << 1)
+#define BTREE_ITER_PREFETCH            (1 << 2)
+/*
+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
+ * @pos or the first key strictly greater than @pos
+ */
+#define BTREE_ITER_IS_EXTENTS          (1 << 3)
+/*
+ * indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
+ */
+#define BTREE_ITER_AT_END_OF_LEAF      (1 << 4)
+#define BTREE_ITER_ERROR               (1 << 5)
 
-       /* Btree level below which we start taking intent locks */
-       u8                      locks_want;
+/*
+ * @pos                        - iterator's current position
+ * @level              - current btree depth
+ * @locks_want         - btree level below which we start taking intent locks
+ * @nodes_locked       - bitmask indicating which nodes in @nodes are locked
+ * @nodes_intent_locked        - bitmask indicating which locks are intent locks
+ */
+struct btree_iter {
+       struct bch_fs           *c;
+       struct bpos             pos;
 
+       u8                      flags;
        enum btree_id           btree_id:8;
-
-       /*
-        * indicates we need to call bch2_btree_iter_traverse() to revalidate
-        * iterator:
-        */
-       u8                      at_end_of_leaf;
-
-       s8                      error;
-
-       struct bch_fs   *c;
-
-       /* Current position of the iterator */
-       struct bpos             pos;
+       unsigned                level:4,
+                               locks_want:4,
+                               nodes_locked:4,
+                               nodes_intent_locked:4;
 
        u32                     lock_seq[BTREE_MAX_DEPTH];
 
@@ -166,22 +167,17 @@ void bch2_btree_iter_advance_pos(struct btree_iter *);
 void bch2_btree_iter_rewind(struct btree_iter *, struct bpos);
 
 void __bch2_btree_iter_init(struct btree_iter *, struct bch_fs *,
-                          enum btree_id, struct bpos, unsigned , unsigned);
+                          enum btree_id, struct bpos,
+                          unsigned , unsigned, unsigned);
 
 static inline void bch2_btree_iter_init(struct btree_iter *iter,
-                                      struct bch_fs *c,
-                                      enum btree_id btree_id,
-                                      struct bpos pos)
-{
-       __bch2_btree_iter_init(iter, c, btree_id, pos, 0, 0);
-}
-
-static inline void bch2_btree_iter_init_intent(struct btree_iter *iter,
-                                             struct bch_fs *c,
-                                             enum btree_id btree_id,
-                                             struct bpos pos)
+                       struct bch_fs *c, enum btree_id btree_id,
+                       struct bpos pos, unsigned flags)
 {
-       __bch2_btree_iter_init(iter, c, btree_id, pos, 1, 0);
+       __bch2_btree_iter_init(iter, c, btree_id, pos,
+                              flags & BTREE_ITER_INTENT ? 1 : 0, 0,
+                              btree_id == BTREE_ID_EXTENTS
+                              ?  BTREE_ITER_IS_EXTENTS : 0);
 }
 
 void bch2_btree_iter_link(struct btree_iter *, struct btree_iter *);
@@ -216,45 +212,25 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
        return __btree_iter_cmp(l->btree_id, l->pos, r);
 }
 
-#define __for_each_btree_node(_iter, _c, _btree_id, _start, _depth,    \
-                             _b, _locks_want)                          \
-       for (__bch2_btree_iter_init((_iter), (_c), (_btree_id),         \
-                                  _start, _locks_want, _depth),        \
-            (_iter)->is_extents = false,                               \
+#define __for_each_btree_node(_iter, _c, _btree_id, _start,            \
+                             _locks_want, _depth, _flags, _b)          \
+       for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \
+                                   _locks_want, _depth, _flags),       \
             _b = bch2_btree_iter_peek_node(_iter);                     \
             (_b);                                                      \
             (_b) = bch2_btree_iter_next_node(_iter, _depth))
 
-#define for_each_btree_node(_iter, _c, _btree_id, _start, _depth, _b)  \
-       __for_each_btree_node(_iter, _c, _btree_id, _start, _depth, _b, 0)
+#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b)  \
+       __for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b)
 
-#define __for_each_btree_key(_iter, _c, _btree_id,  _start,            \
-                            _k, _locks_want)                           \
-       for (__bch2_btree_iter_init((_iter), (_c), (_btree_id),         \
-                                  _start, _locks_want, 0);             \
-            !IS_ERR_OR_NULL(((_k) = bch2_btree_iter_peek(_iter)).k);   \
+#define for_each_btree_key(_iter, _c, _btree_id,  _start, _flags, _k)  \
+       for (bch2_btree_iter_init((_iter), (_c), (_btree_id),   \
+                                 (_start), (_flags));          \
+            !IS_ERR_OR_NULL(((_k) = (((_flags) & BTREE_ITER_WITH_HOLES)\
+                               ? bch2_btree_iter_peek_with_holes(_iter)\
+                               : bch2_btree_iter_peek(_iter))).k);     \
             bch2_btree_iter_advance_pos(_iter))
 
-#define for_each_btree_key(_iter, _c, _btree_id,  _start, _k)          \
-       __for_each_btree_key(_iter, _c, _btree_id, _start, _k, 0)
-
-#define for_each_btree_key_intent(_iter, _c, _btree_id,  _start, _k)   \
-       __for_each_btree_key(_iter, _c, _btree_id, _start, _k, 1)
-
-#define __for_each_btree_key_with_holes(_iter, _c, _btree_id,          \
-                                       _start, _k, _locks_want)        \
-       for (__bch2_btree_iter_init((_iter), (_c), (_btree_id),         \
-                                  _start, _locks_want, 0);             \
-            !IS_ERR_OR_NULL(((_k) = bch2_btree_iter_peek_with_holes(_iter)).k);\
-            bch2_btree_iter_advance_pos(_iter))
-
-#define for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k)        \
-       __for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k, 0)
-
-#define for_each_btree_key_with_holes_intent(_iter, _c, _btree_id,     \
-                                            _start, _k)                \
-       __for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k, 1)
-
 static inline int btree_iter_err(struct bkey_s_c k)
 {
        return IS_ERR(k.k) ? PTR_ERR(k.k) : 0;
index a0f5b579fe2a92f50ac69160aa9f81fb12ec25b2..c613a7bc8335f4e13a228c6450e6297b331636cd 100644 (file)
@@ -141,6 +141,7 @@ static inline void clear_btree_node_ ## flag(struct btree *b)               \
 {      clear_bit(BTREE_NODE_ ## flag, &b->flags); }
 
 enum btree_flags {
+       BTREE_NODE_read_in_flight,
        BTREE_NODE_read_error,
        BTREE_NODE_write_error,
        BTREE_NODE_dirty,
@@ -152,6 +153,7 @@ enum btree_flags {
        BTREE_NODE_just_written,
 };
 
+BTREE_FLAG(read_in_flight);
 BTREE_FLAG(read_error);
 BTREE_FLAG(write_error);
 BTREE_FLAG(dirty);
index cfd2a455fffe6ee58efce84b827f09050faa35cf..2f67c0927a4d0e33aff09bacd962c48990d1f8e5 100644 (file)
@@ -2047,7 +2047,7 @@ unlock:
         * traversed again
         */
        trans_for_each_entry(trans, i)
-               if (i->iter->at_end_of_leaf)
+               if (i->iter->flags & BTREE_ITER_AT_END_OF_LEAF)
                        goto out;
 
        trans_for_each_entry(trans, i)
@@ -2161,7 +2161,8 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
        struct btree_iter iter;
        int ret, ret2;
 
-       bch2_btree_iter_init_intent(&iter, c, id, bkey_start_pos(&k->k));
+       bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k),
+                            BTREE_ITER_INTENT);
 
        ret = bch2_btree_iter_traverse(&iter);
        if (unlikely(ret))
@@ -2187,7 +2188,8 @@ int bch2_btree_update(struct bch_fs *c, enum btree_id id,
 
        EBUG_ON(id == BTREE_ID_EXTENTS);
 
-       bch2_btree_iter_init_intent(&iter, c, id, k->k.p);
+       bch2_btree_iter_init(&iter, c, id, k->k.p,
+                            BTREE_ITER_INTENT);
 
        u = bch2_btree_iter_peek_with_holes(&iter);
        ret = btree_iter_err(u);
@@ -2222,7 +2224,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_btree_iter_init_intent(&iter, c, id, start);
+       bch2_btree_iter_init(&iter, c, id, start,
+                            BTREE_ITER_INTENT);
 
        while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = btree_iter_err(k))) {
@@ -2248,7 +2251,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
                delete.k.p = iter.pos;
                delete.k.version = version;
 
-               if (iter.is_extents) {
+               if (iter.flags & BTREE_ITER_IS_EXTENTS) {
                        /*
                         * The extents btree is special - KEY_TYPE_DISCARD is
                         * used for deletions, not KEY_TYPE_DELETED. This is an
index 184a29f93e413e4276702714f8d35ae52f343e9f..1c2f692160874b396a0a5ffc51532a0747b1be58 100644 (file)
@@ -317,7 +317,6 @@ void bch2_invalidate_bucket(struct bch_dev *ca, struct bucket *g)
                new.data_type           = 0;
                new.cached_sectors      = 0;
                new.dirty_sectors       = 0;
-               new.copygc              = 0;
                new.gen++;
        }));
 
index 3b82d7f32120e2552ee1ab88f46df728a0067be8..f99a62bcc9bf418cc6f60b257629615b0eecdbf8 100644 (file)
@@ -95,33 +95,6 @@ static inline u8 ptr_stale(const struct bch_dev *ca,
        return gen_after(PTR_BUCKET(ca, ptr)->mark.gen, ptr->gen);
 }
 
-/* bucket heaps */
-
-static inline bool bucket_min_cmp(struct bucket_heap_entry l,
-                                 struct bucket_heap_entry r)
-{
-       return l.val < r.val;
-}
-
-static inline bool bucket_max_cmp(struct bucket_heap_entry l,
-                                 struct bucket_heap_entry r)
-{
-       return l.val > r.val;
-}
-
-static inline void bucket_heap_push(struct bch_dev *ca, struct bucket *g,
-                                   unsigned long val)
-{
-       struct bucket_heap_entry new = { g, val };
-
-       if (!heap_full(&ca->heap))
-               heap_add(&ca->heap, new, bucket_min_cmp);
-       else if (bucket_min_cmp(new, heap_peek(&ca->heap))) {
-               ca->heap.data[0] = new;
-               heap_sift(&ca->heap, 0, bucket_min_cmp);
-       }
-}
-
 /* bucket gc marks */
 
 /* The dirty and cached sector counts saturate. If this occurs,
@@ -129,14 +102,16 @@ static inline void bucket_heap_push(struct bch_dev *ca, struct bucket *g,
  * GC must be performed. */
 #define GC_MAX_SECTORS_USED ((1U << 15) - 1)
 
-static inline bool bucket_unused(struct bucket *g)
+static inline unsigned bucket_sectors_used(struct bucket_mark mark)
 {
-       return !g->mark.counter;
+       return mark.dirty_sectors + mark.cached_sectors;
 }
 
-static inline unsigned bucket_sectors_used(struct bucket *g)
+static inline bool bucket_unused(struct bucket_mark mark)
 {
-       return g->mark.dirty_sectors + g->mark.cached_sectors;
+       return !mark.owned_by_allocator &&
+               !mark.data_type &&
+               !bucket_sectors_used(mark);
 }
 
 /* Per device stats: */
index ca187099ee4185ec02933f695d4866806566aff4..18bf1713997a93eb148a44aa43ff114af20f3915 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _BUCKETS_TYPES_H
 #define _BUCKETS_TYPES_H
 
+#include "util.h"
+
 enum bucket_data_type {
        BUCKET_DATA     = 0,
        BUCKET_BTREE,
@@ -18,9 +20,6 @@ struct bucket_mark {
        struct {
                u8              gen;
 
-               /* generation copygc is going to move this bucket into */
-               unsigned        copygc:1;
-
                unsigned        journal_seq_valid:1;
 
                /*
@@ -96,10 +95,12 @@ struct bch_fs_usage {
 };
 
 struct bucket_heap_entry {
-       struct bucket *g;
-       unsigned long val;
+       size_t                  bucket;
+       struct bucket_mark      mark;
 };
 
+typedef HEAP(struct bucket_heap_entry) bucket_heap;
+
 /*
  * A reservation for space on disk:
  */
index 68ac62b4c9a7922ddff8e07afe47a798d4297495..650be8cebe8fea8ee8f3ac1e9747d0ee47ab8172 100644 (file)
@@ -5,9 +5,11 @@
 #include <linux/kthread.h>
 #include <linux/preempt.h>
 
-static inline bool io_timer_cmp(struct io_timer *l, struct io_timer *r)
+static inline long io_timer_cmp(io_timer_heap *h,
+                               struct io_timer *l,
+                               struct io_timer *r)
 {
-       return time_after(l->expire, r->expire);
+       return l->expire - r->expire;
 }
 
 void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
index 4a02f46716031b998a22923deb188557c7409c81..ae068c6d7acb2ecbc313662a91e445f7ee0a97b2 100644 (file)
@@ -22,12 +22,14 @@ struct io_timer {
 /* Amount to buffer up on a percpu counter */
 #define IO_CLOCK_PCPU_SECTORS  128
 
+typedef HEAP(struct io_timer *)        io_timer_heap;
+
 struct io_clock {
        atomic_long_t           now;
        u16 __percpu            *pcpu_buf;
 
        spinlock_t              timer_lock;
-       DECLARE_HEAP(struct io_timer *, timers);
+       io_timer_heap           timers;
 };
 
 #endif /* _BCACHE_CLOCK_TYPES_H */
index bf160e0b93daf2a19f32d27661700258d14610f9..d4c8ce55e752113ce1152a6841642be81bd0bae8 100644 (file)
@@ -60,9 +60,9 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
 
        bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio);
        bio->bi_bdev            = pick.ca->disk_sb.bdev;
+       bio->bi_opf             = REQ_OP_READ|REQ_META;
        bio->bi_iter.bi_sector  = pick.ptr.offset;
        bio->bi_iter.bi_size    = btree_bytes(c);
-       bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
        bch2_bio_map(bio, n_sorted);
 
        submit_bio_wait(bio);
@@ -212,7 +212,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
        if (!i->size)
                return i->ret;
 
-       bch2_btree_iter_init(&iter, i->c, i->id, i->from);
+       bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH);
 
        while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(err = btree_iter_err(k))) {
@@ -314,7 +314,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
        if (!i->size)
                return i->ret;
 
-       bch2_btree_iter_init(&iter, i->c, i->id, i->from);
+       bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH);
 
        while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(err = btree_iter_err(k))) {
index e2978bab46c92594800fede5ec65b795c25da58a..056715bc3ebb912e189a105ecbd6f6adfc0d5f6c 100644 (file)
@@ -214,11 +214,13 @@ int bch2_dirent_rename(struct bch_fs *c,
        bool need_whiteout;
        int ret = -ENOMEM;
 
-       bch2_btree_iter_init_intent(&src_iter, c, BTREE_ID_DIRENTS, src_pos);
-       bch2_btree_iter_init_intent(&dst_iter, c, BTREE_ID_DIRENTS, dst_pos);
+       bch2_btree_iter_init(&src_iter, c, BTREE_ID_DIRENTS, src_pos,
+                            BTREE_ITER_INTENT);
+       bch2_btree_iter_init(&dst_iter, c, BTREE_ID_DIRENTS, dst_pos,
+                            BTREE_ITER_INTENT);
        bch2_btree_iter_link(&src_iter, &dst_iter);
 
-       bch2_btree_iter_init(&whiteout_iter, c, BTREE_ID_DIRENTS, src_pos);
+       bch2_btree_iter_init(&whiteout_iter, c, BTREE_ID_DIRENTS, src_pos, 0);
        bch2_btree_iter_link(&src_iter, &whiteout_iter);
 
        if (mode == BCH_RENAME_EXCHANGE) {
@@ -376,7 +378,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
        struct bkey_s_c k;
        int ret = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), k) {
+       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), 0, k) {
                if (k.k->p.inode > dir_inum)
                        break;
 
@@ -405,7 +407,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
        pr_debug("listing for %lu from %llu", inode->i_ino, ctx->pos);
 
        for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
-                          POS(inode->i_ino, ctx->pos), k) {
+                          POS(inode->i_ino, ctx->pos), 0, k) {
                if (k.k->type != BCH_DIRENT)
                        continue;
 
index c80da3620dbabf4600d7b38ca196afb6a70b8ff8..219b60a3341dbf457d5199840d3a120875cd3acb 100644 (file)
@@ -41,13 +41,13 @@ static void sort_key_next(struct btree_node_iter *iter,
  * Necessary for btree_sort_fixup() - if there are multiple keys that compare
  * equal in different sets, we have to process them newest to oldest.
  */
-#define key_sort_cmp(l, r)                                             \
+#define key_sort_cmp(h, l, r)                                          \
 ({                                                                     \
-       int _c = bkey_cmp_packed(b,                                     \
-                                __btree_node_offset_to_key(b, (l).k),  \
-                                __btree_node_offset_to_key(b, (r).k)); \
+       bkey_cmp_packed(b,                                              \
+                       __btree_node_offset_to_key(b, (l).k),           \
+                       __btree_node_offset_to_key(b, (r).k))           \
                                                                        \
-       _c ? _c > 0 : (l).k > (r).k;                                    \
+       ?: (l).k - (r).k;                                               \
 })
 
 static inline bool should_drop_next_key(struct btree_node_iter *iter,
@@ -63,7 +63,7 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter,
                return false;
 
        if (iter->used > 2 &&
-           key_sort_cmp(r[0], r[1]))
+           key_sort_cmp(iter, r[0], r[1]) >= 0)
                r++;
 
        /*
@@ -98,7 +98,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
                }
 
                sort_key_next(iter, b, iter->data);
-               heap_sift(iter, 0, key_sort_cmp);
+               heap_sift_down(iter, 0, key_sort_cmp);
        }
 
        dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
@@ -754,27 +754,26 @@ static void extent_save(struct btree *b, struct btree_node_iter *iter,
 }
 
 /*
- * Returns true if l > r - unless l == r, in which case returns true if l is
- * older than r.
+ * If keys compare equal, compare by pointer order:
  *
  * Necessary for sort_fix_overlapping() - if there are multiple keys that
  * compare equal in different sets, we have to process them newest to oldest.
  */
-#define extent_sort_cmp(l, r)                                          \
+#define extent_sort_cmp(h, l, r)                                       \
 ({                                                                     \
        struct bkey _ul = bkey_unpack_key(b,                            \
                                __btree_node_offset_to_key(b, (l).k));  \
        struct bkey _ur = bkey_unpack_key(b,                            \
                                __btree_node_offset_to_key(b, (r).k));  \
                                                                        \
-       int _c = bkey_cmp(bkey_start_pos(&_ul), bkey_start_pos(&_ur));  \
-       _c ? _c > 0 : (l).k < (r).k;                                    \
+       bkey_cmp(bkey_start_pos(&_ul),                                  \
+                bkey_start_pos(&_ur)) ?: (r).k - (l).k;                \
 })
 
 static inline void extent_sort_sift(struct btree_node_iter *iter,
                                    struct btree *b, size_t i)
 {
-       heap_sift(iter, i, extent_sort_cmp);
+       heap_sift_down(iter, i, extent_sort_cmp);
 }
 
 static inline void extent_sort_next(struct btree_node_iter *iter,
@@ -782,7 +781,7 @@ static inline void extent_sort_next(struct btree_node_iter *iter,
                                    struct btree_node_iter_set *i)
 {
        sort_key_next(iter, b, i);
-       heap_sift(iter, i - iter->data, extent_sort_cmp);
+       heap_sift_down(iter, i - iter->data, extent_sort_cmp);
 }
 
 static void extent_sort_append(struct bch_fs *c,
@@ -843,7 +842,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 
                _r = iter->data + 1;
                if (iter->used > 2 &&
-                   extent_sort_cmp(_r[0], _r[1]))
+                   extent_sort_cmp(iter, _r[0], _r[1]) >= 0)
                        _r++;
 
                rk = __btree_node_offset_to_key(b, _r->k);
@@ -1433,11 +1432,12 @@ stop:
                           gc_pos_btree_node(b));
 
        EBUG_ON(bkey_cmp(iter->pos, s->committed));
-       EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) != iter->at_end_of_leaf);
+       EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) !=
+               !!(iter->flags & BTREE_ITER_AT_END_OF_LEAF));
 
        bch2_cut_front(iter->pos, insert);
 
-       if (insert->k.size && iter->at_end_of_leaf)
+       if (insert->k.size && (iter->flags & BTREE_ITER_AT_END_OF_LEAF))
                ret = BTREE_INSERT_NEED_TRAVERSE;
 
        EBUG_ON(insert->k.size && ret == BTREE_INSERT_OK);
@@ -1596,9 +1596,10 @@ stop:
 
        EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
        EBUG_ON(bkey_cmp(iter->pos, s.committed));
-       EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) != iter->at_end_of_leaf);
+       EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) !=
+               !!(iter->flags & BTREE_ITER_AT_END_OF_LEAF));
 
-       if (insert->k->k.size && iter->at_end_of_leaf)
+       if (insert->k->k.size && (iter->flags & BTREE_ITER_AT_END_OF_LEAF))
                ret = BTREE_INSERT_NEED_TRAVERSE;
 
        EBUG_ON(insert->k->k.size && ret == BTREE_INSERT_OK);
index 13d54e5eb2faa44bb6c6b6addf374726f5f45046..dc23e44d86b5a1be6182d63cb694ca16d0168f4f 100644 (file)
 /*
  * Traversal for trees in eytzinger layout - a full binary tree layed out in an
  * array
+ */
+
+/*
+ * One based indexing version:
  *
- * We used one based indexing, not zero based: with one based indexing, each
- * level of the tree starts at a power of two - leading to better alignment -
- * and it's what you want for implementing next/prev and to/from inorder.
- *
- * To/from inorder also uses 1 based indexing.
+ * With one based indexing each level of the tree starts at a power of two -
+ * good for cacheline alignment:
  *
  * Size parameter is treated as if we were using 0 based indexing, however:
- * valid nodes, and inorder indices, are in the range [1..size)
+ * valid nodes, and inorder indices, are in the range [1..size) - that is, there
+ * are actually size - 1 elements
  */
 
-static inline unsigned eytzinger_child(unsigned j, unsigned child)
+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
 {
        EBUG_ON(child > 1);
 
-       return (j << 1) + child;
+       return (i << 1) + child;
 }
 
-static inline unsigned eytzinger_left_child(unsigned j)
+static inline unsigned eytzinger1_left_child(unsigned i)
 {
-       return eytzinger_child(j, 0);
+       return eytzinger1_child(i, 0);
 }
 
-static inline unsigned eytzinger_right_child(unsigned j)
+static inline unsigned eytzinger1_right_child(unsigned i)
 {
-       return eytzinger_child(j, 1);
+       return eytzinger1_child(i, 1);
 }
 
-static inline unsigned eytzinger_first(unsigned size)
+static inline unsigned eytzinger1_first(unsigned size)
 {
        return rounddown_pow_of_two(size - 1);
 }
 
-static inline unsigned eytzinger_last(unsigned size)
+static inline unsigned eytzinger1_last(unsigned size)
 {
        return rounddown_pow_of_two(size) - 1;
 }
 
 /*
- * eytzinger_next() and eytzinger_prev() have the nice properties that
+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
  *
- * eytzinger_next(0) == eytzinger_first())
- * eytzinger_prev(0) == eytzinger_last())
+ * eytzinger1_next(0) == eytzinger1_first())
+ * eytzinger1_prev(0) == eytzinger1_last())
  *
- * eytzinger_prev(eytzinger_first()) == 0
- * eytzinger_next(eytzinger_last()) == 0
+ * eytzinger1_prev(eytzinger1_first()) == 0
+ * eytzinger1_next(eytzinger1_last()) == 0
  */
 
-static inline unsigned eytzinger_next(unsigned j, unsigned size)
+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
 {
-       EBUG_ON(j >= size);
+       EBUG_ON(i >= size);
 
-       if (eytzinger_right_child(j) < size) {
-               j = eytzinger_right_child(j);
+       if (eytzinger1_right_child(i) < size) {
+               i = eytzinger1_right_child(i);
 
-               j <<= __fls(size) - __fls(j);
-               j >>= j >= size;
+               i <<= __fls(size) - __fls(i);
+               i >>= i >= size;
        } else {
-               j >>= ffz(j) + 1;
+               i >>= ffz(i) + 1;
        }
 
-       return j;
+       return i;
 }
 
-static inline unsigned eytzinger_prev(unsigned j, unsigned size)
+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
 {
-       EBUG_ON(j >= size);
+       EBUG_ON(i >= size);
 
-       if (eytzinger_left_child(j) < size) {
-               j = eytzinger_left_child(j);
+       if (eytzinger1_left_child(i) < size) {
+               i = eytzinger1_left_child(i);
 
-               j <<= __fls(size) - __fls(j);
-               j -= 1;
-               j >>= j >= size;
+               i <<= __fls(size) - __fls(i);
+               i -= 1;
+               i >>= i >= size;
        } else {
-               j >>= __ffs(j) + 1;
+               i >>= __ffs(i) + 1;
        }
 
-       return j;
+       return i;
 }
 
-static inline unsigned eytzinger_extra(unsigned size)
+static inline unsigned eytzinger1_extra(unsigned size)
 {
        return (size - rounddown_pow_of_two(size - 1)) << 1;
 }
 
-static inline unsigned __eytzinger_to_inorder(unsigned j, unsigned size,
+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
                                              unsigned extra)
 {
-       unsigned b = __fls(j);
+       unsigned b = __fls(i);
        unsigned shift = __fls(size - 1) - b;
        int s;
 
-       EBUG_ON(!j || j >= size);
+       EBUG_ON(!i || i >= size);
 
-       j  ^= 1U << b;
-       j <<= 1;
-       j  |= 1;
-       j <<= shift;
+       i  ^= 1U << b;
+       i <<= 1;
+       i  |= 1;
+       i <<= shift;
 
        /*
         * sign bit trick:
         *
-        * if (j > extra)
-        *      j -= (j - extra) >> 1;
+        * if (i > extra)
+        *      i -= (i - extra) >> 1;
         */
-       s = extra - j;
-       j += (s >> 1) & (s >> 31);
+       s = extra - i;
+       i += (s >> 1) & (s >> 31);
 
-       return j;
+       return i;
 }
 
-static inline unsigned __inorder_to_eytzinger(unsigned j, unsigned size,
-                                             unsigned extra)
+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
+                                              unsigned extra)
 {
        unsigned shift;
        int s;
 
-       EBUG_ON(!j || j >= size);
+       EBUG_ON(!i || i >= size);
 
        /*
         * sign bit trick:
         *
-        * if (j > extra)
-        *      j += j - extra;
+        * if (i > extra)
+        *      i += i - extra;
         */
-       s = extra - j;
-       j -= s & (s >> 31);
+       s = extra - i;
+       i -= s & (s >> 31);
 
-       shift = __ffs(j);
+       shift = __ffs(i);
 
-       j >>= shift + 1;
-       j  |= 1U << (__fls(size - 1) - shift);
+       i >>= shift + 1;
+       i  |= 1U << (__fls(size - 1) - shift);
 
-       return j;
+       return i;
 }
 
-static inline unsigned eytzinger_to_inorder(unsigned j, unsigned size)
+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
 {
-       return __eytzinger_to_inorder(j, size, eytzinger_extra(size));
+       return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
 }
 
-static inline unsigned inorder_to_eytzinger(unsigned j, unsigned size)
+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
 {
-       return __inorder_to_eytzinger(j, size, eytzinger_extra(size));
+       return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
 }
 
-#define eytzinger_for_each(_i, _size)                  \
-       for ((_i) = eytzinger_first((_size));           \
+#define eytzinger1_for_each(_i, _size)                 \
+       for ((_i) = eytzinger1_first((_size));          \
             (_i) != 0;                                 \
-            (_i) = eytzinger_next((_i), (_size)))
+            (_i) = eytzinger1_next((_i), (_size)))
 
 #if 0
-void eytzinger_test(void)
+void eytzinger0_test(void)
 {
        unsigned i, j, size;
 
@@ -172,20 +174,20 @@ void eytzinger_test(void)
                if (!(size % 4096))
                        printk(KERN_INFO "tree size %u\n", size);
 
-               assert(eytzinger_prev(0, size) == eytzinger_last(size));
-               assert(eytzinger_next(0, size) == eytzinger_first(size));
+               assert(eytzinger1_prev(0, size) == eytzinger1_last(size));
+               assert(eytzinger1_next(0, size) == eytzinger1_first(size));
 
-               assert(eytzinger_prev(eytzinger_first(size), size) == 0);
-               assert(eytzinger_next(eytzinger_last(size), size) == 0);
+               assert(eytzinger1_prev(eytzinger1_first(size), size) == 0);
+               assert(eytzinger1_next(eytzinger1_last(size), size) == 0);
 
-               eytzinger_for_each(j, size) {
+               eytzinger1_for_each(j, size) {
                        assert(from_inorder(i, size) == j);
                        assert(to_inorder(j, size) == i);
 
-                       if (j != eytzinger_last(size)) {
-                               unsigned next = eytzinger_next(j, size);
+                       if (j != eytzinger1_last(size)) {
+                               unsigned next = eytzinger1_next(j, size);
 
-                               assert(eytzinger_prev(next, size) == j);
+                               assert(eytzinger1_prev(next, size) == j);
                        }
                }
        }
@@ -193,4 +195,96 @@ void eytzinger_test(void)
 }
 #endif
 
+/* Zero based indexing version: */
+
+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
+{
+       EBUG_ON(child > 1);
+
+       return (i << 1) + 1 + child;
+}
+
+static inline unsigned eytzinger0_left_child(unsigned i)
+{
+       return eytzinger0_child(i, 0);
+}
+
+static inline unsigned eytzinger0_right_child(unsigned i)
+{
+       return eytzinger0_child(i, 1);
+}
+
+#if 0
+static inline unsigned eytzinger0_first(unsigned size)
+{
+}
+
+static inline unsigned eytzinger0_last(unsigned size)
+{
+}
+
+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
+{
+}
+
+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
+{
+}
+#endif
+
+static inline unsigned eytzinger0_extra(unsigned size)
+{
+       return (size + 1 - rounddown_pow_of_two(size)) << 1;
+}
+
+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
+                                              unsigned extra)
+{
+       return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1;
+}
+
+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
+                                              unsigned extra)
+{
+       return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1;
+}
+
+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
+{
+       return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
+}
+
+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
+{
+       return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
+}
+
+#define eytzinger0_find(base, _nr, _size, _cmp, _search)               \
+({                                                                     \
+       void *_base = base;                                             \
+       size_t _i = 0;                                                  \
+       int _res;                                                       \
+                                                                       \
+       while (_i < (_nr) &&                                            \
+              (_res = _cmp(_search, _base + _i * (_size), _size)))     \
+               _i = eytzinger0_child(_i, _res > 0);                    \
+                                                                       \
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {                        \
+               bool found1 = _i < _nr, found2 = false;                 \
+               unsigned _j;                                            \
+                                                                       \
+               for (_j = 0; _j < _nr; _j++)                            \
+                       if (!_cmp(_base + _j * (_size), _search, _size))\
+                               found2 = true;                          \
+                                                                       \
+               BUG_ON(found1 != found2);                               \
+       }                                                               \
+                                                                       \
+       _i;                                                             \
+})
+
+void eytzinger0_sort(void *, size_t, size_t,
+                   int (*cmp_func)(const void *, const void *, size_t),
+                   void (*swap_func)(void *, void *, size_t));
+
 #endif /* _EYTZINGER_H */
index dc5c7f4cdbc668af73371f1741d66083a5dda165..4a680ade37203fc675db94e1af170482d1cfc084 100644 (file)
@@ -282,10 +282,12 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
 
        BUG_ON(k->k.p.inode != op->ei->vfs_inode.i_ino);
 
-       bch2_btree_iter_init_intent(&extent_iter, wop->c, BTREE_ID_EXTENTS,
-                                  bkey_start_pos(&bch2_keylist_front(keys)->k));
-       bch2_btree_iter_init_intent(&inode_iter, wop->c,        BTREE_ID_INODES,
-                                  POS(extent_iter.pos.inode, 0));
+       bch2_btree_iter_init(&extent_iter, wop->c, BTREE_ID_EXTENTS,
+                            bkey_start_pos(&bch2_keylist_front(keys)->k),
+                            BTREE_ITER_INTENT);
+       bch2_btree_iter_init(&inode_iter, wop->c, BTREE_ID_INODES,
+                            POS(extent_iter.pos.inode, 0),
+                            BTREE_ITER_INTENT);
 
        hook.op                 = op;
        hook.hook.fn            = bchfs_extent_update_hook;
@@ -786,7 +788,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
                .mapping = mapping, .nr_pages = nr_pages
        };
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0);
 
        INIT_LIST_HEAD(&readpages_iter.pages);
        list_add(&readpages_iter.pages, pages);
@@ -841,7 +843,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
        bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
        bio_add_page_contig(&rbio->bio, page);
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0);
        bchfs_read(c, &iter, rbio, inode, NULL);
 }
 
@@ -1036,7 +1038,7 @@ do_io:
        w->io->op.new_i_size = i_size;
 
        if (wbc->sync_mode == WB_SYNC_ALL)
-               w->io->bio.bio.bi_opf |= WRITE_SYNC;
+               w->io->bio.bio.bi_opf |= REQ_SYNC;
 
        /* Before unlocking the page, transfer reservation to w->io: */
        old = page_state_cmpxchg(page_state(page), new, {
@@ -1448,7 +1450,7 @@ start:
                bio->bi_iter.bi_sector  = offset >> 9;
                bio->bi_private         = dio;
 
-               ret = bio_get_user_pages(bio, iter, 1);
+               ret = bio_iov_iter_get_pages(bio, iter);
                if (ret < 0) {
                        /* XXX: fault inject this path */
                        bio->bi_error = ret;
@@ -1537,7 +1539,7 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
 
        bio->bi_iter.bi_sector = (dio->offset + dio->written) >> 9;
 
-       ret = bio_get_user_pages(bio, &dio->iter, 0);
+       ret = bio_iov_iter_get_pages(bio, &dio->iter);
        if (ret < 0) {
                /*
                 * these didn't get initialized, but bch2_dio_write_done() will
@@ -1908,7 +1910,7 @@ static int __bch2_truncate_page(struct address_space *mapping,
                 */
                for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
                                   POS(inode->i_ino,
-                                      index << (PAGE_SHIFT - 9)), k) {
+                                      index << (PAGE_SHIFT - 9)), 0, k) {
                        if (bkey_cmp(bkey_start_pos(k.k),
                                     POS(inode->i_ino,
                                         (index + 1) << (PAGE_SHIFT - 9))) >= 0)
@@ -2122,10 +2124,11 @@ static long bch2_fcollapse(struct inode *inode, loff_t offset, loff_t len)
        if ((offset | len) & (PAGE_SIZE - 1))
                return -EINVAL;
 
-       bch2_btree_iter_init_intent(&dst, c, BTREE_ID_EXTENTS,
-                                  POS(inode->i_ino, offset >> 9));
+       bch2_btree_iter_init(&dst, c, BTREE_ID_EXTENTS,
+                            POS(inode->i_ino, offset >> 9),
+                            BTREE_ITER_INTENT);
        /* position will be set from dst iter's position: */
-       bch2_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN);
+       bch2_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN, 0);
        bch2_btree_iter_link(&src, &dst);
 
        /*
@@ -2249,7 +2252,8 @@ static long bch2_fallocate(struct inode *inode, int mode,
        unsigned replicas = READ_ONCE(c->opts.data_replicas);
        int ret;
 
-       bch2_btree_iter_init_intent(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
+                            BTREE_ITER_INTENT);
 
        inode_lock(inode);
        inode_dio_wait(inode);
@@ -2459,7 +2463,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
                return -ENXIO;
 
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
-                          POS(inode->i_ino, offset >> 9), k) {
+                          POS(inode->i_ino, offset >> 9), 0, k) {
                if (k.k->p.inode != inode->i_ino) {
                        break;
                } else if (bkey_extent_is_data(k.k)) {
@@ -2527,8 +2531,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
        if (offset >= isize)
                return -ENXIO;
 
-       for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
-                                     POS(inode->i_ino, offset >> 9), k) {
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+                          POS(inode->i_ino, offset >> 9),
+                          BTREE_ITER_WITH_HOLES, k) {
                if (k.k->p.inode != inode->i_ino) {
                        next_hole = bch2_next_pagecache_hole(inode,
                                        offset, MAX_LFS_FILESIZE);
index 3c02b0c6eb744a8d547265881e63e5f5dffba0b8..201cdfcb2a3c0770c7c00bebb7d0c0445415cd4d 100644 (file)
@@ -81,7 +81,8 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
 
        lockdep_assert_held(&ei->update_lock);
 
-       bch2_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(inum, 0));
+       bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0),
+                            BTREE_ITER_INTENT);
 
        do {
                struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
@@ -714,7 +715,7 @@ static int bch2_fiemap(struct inode *inode, struct fiemap_extent_info *info,
                return -EINVAL;
 
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
-                          POS(inode->i_ino, start >> 9), k)
+                          POS(inode->i_ino, start >> 9), 0, k)
                if (bkey_extent_is_data(k.k) ||
                    k.k->type == BCH_RESERVATION) {
                        if (bkey_cmp(bkey_start_pos(k.k),
@@ -990,7 +991,6 @@ static const struct file_operations bch_dir_file_operations = {
 };
 
 static const struct inode_operations bch_symlink_inode_operations = {
-       .readlink       = generic_readlink,
        .get_link       = page_get_link,
        .setattr        = bch2_setattr,
        .listxattr      = bch2_xattr_list,
index e50520702fab29be59204615e13067746fb7e826..18d1d5336408b6dffde61493bfe13d06057fdccb 100644 (file)
@@ -134,8 +134,8 @@ struct hash_check {
 static void hash_check_init(const struct bch_hash_desc desc,
                            struct hash_check *h, struct bch_fs *c)
 {
-       bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN);
-       bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN);
+       bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN, 0);
+       bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN, 0);
 }
 
 static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
@@ -251,7 +251,7 @@ static int check_extents(struct bch_fs *c)
        int ret = 0;
 
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
-                          POS(BCACHE_ROOT_INO, 0), k) {
+                          POS(BCACHE_ROOT_INO, 0), 0, k) {
                if (k.k->type == KEY_TYPE_DISCARD)
                        continue;
 
@@ -310,7 +310,7 @@ static int check_dirents(struct bch_fs *c)
        hash_check_init(bch2_dirent_hash_desc, &h, c);
 
        for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
-                          POS(BCACHE_ROOT_INO, 0), k) {
+                          POS(BCACHE_ROOT_INO, 0), 0, k) {
                struct bkey_s_c_dirent d;
                struct bch_inode_unpacked target;
                bool have_target;
@@ -444,7 +444,7 @@ static int check_xattrs(struct bch_fs *c)
        hash_check_init(bch2_xattr_hash_desc, &h, c);
 
        for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
-                          POS(BCACHE_ROOT_INO, 0), k) {
+                          POS(BCACHE_ROOT_INO, 0), 0, k) {
                ret = walk_inode(c, &w, k.k->p.inode);
                if (ret)
                        break;
@@ -664,7 +664,7 @@ next:
                        goto up;
 
                for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
-                                  POS(e->inum, e->offset + 1), k) {
+                                  POS(e->inum, e->offset + 1), 0, k) {
                        if (k.k->p.inode != e->inum)
                                break;
 
@@ -712,7 +712,7 @@ up:
                path.nr--;
        }
 
-       for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
+       for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
                if (k.k->type != BCH_INODE_FS ||
                    !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
                        continue;
@@ -794,7 +794,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
 
        inc_link(c, links, range_start, range_end, BCACHE_ROOT_INO, false);
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) {
+       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
                switch (k.k->type) {
                case BCH_DIRENT:
                        d = bkey_s_c_to_dirent(k);
@@ -825,7 +825,7 @@ s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
        struct bkey_s_c k;
        u64 sectors = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) {
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
                if (k.k->p.inode != inum)
                        break;
 
@@ -999,7 +999,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
        int ret = 0, ret2 = 0;
        u64 nlinks_pos;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0));
+       bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
        genradix_iter_init(&nlinks_iter);
 
        while ((k = bch2_btree_iter_peek(&iter)).k &&
index 5b56a628a77bac67211dc463608908736c930658..0a37153d2625cd5a2cf9d205d8bfe52957fafe84 100644 (file)
@@ -276,7 +276,8 @@ int bch2_inode_create(struct bch_fs *c, struct bkey_i *inode,
        if (*hint == min)
                searched_from_start = true;
 again:
-       bch2_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(*hint, 0));
+       bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(*hint, 0),
+                            BTREE_ITER_INTENT);
 
        while (1) {
                struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
@@ -376,8 +377,9 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
        struct bkey_s_c k;
        int ret = -ENOENT;
 
-       for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES,
-                                     POS(inode_nr, 0), k) {
+       for_each_btree_key(&iter, c, BTREE_ID_INODES,
+                          POS(inode_nr, 0),
+                          BTREE_ITER_WITH_HOLES, k) {
                switch (k.k->type) {
                case BCH_INODE_FS:
                        ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
@@ -400,7 +402,7 @@ int bch2_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid,
        struct btree_iter iter;
        struct bkey_s_c k;
 
-       for_each_btree_key(&iter, c, BTREE_ID_INODES, POS(0, 0), k) {
+       for_each_btree_key(&iter, c, BTREE_ID_INODES, POS(0, 0), 0, k) {
                if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
                        break;
 
index 0f27eaf6f63d48192eb97f77a64a9ea02d271b12..d588f6ab691b2290e7ebe38fe808e185ab1e60f5 100644 (file)
@@ -182,8 +182,9 @@ static int bch2_write_index_default(struct bch_write_op *op)
        struct btree_iter iter;
        int ret;
 
-       bch2_btree_iter_init_intent(&iter, op->c, BTREE_ID_EXTENTS,
-               bkey_start_pos(&bch2_keylist_front(keys)->k));
+       bch2_btree_iter_init(&iter, op->c, BTREE_ID_EXTENTS,
+                            bkey_start_pos(&bch2_keylist_front(keys)->k),
+                            BTREE_ITER_INTENT);
 
        ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
                                       NULL, op_journal_seq(op),
@@ -1112,9 +1113,9 @@ void bch2_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
                if (promote_op) {
                        struct bio *promote_bio = &promote_op->write.wbio.bio;
 
-                       bio_init(promote_bio);
-                       promote_bio->bi_max_vecs = pages;
-                       promote_bio->bi_io_vec  = promote_bio->bi_inline_vecs;
+                       bio_init(promote_bio,
+                                promote_bio->bi_inline_vecs,
+                                pages);
                        bounce = true;
                        /* could also set read_full */
                }
@@ -1265,8 +1266,9 @@ static void bch2_read_iter(struct bch_fs *c, struct bch_read_bio *rbio,
        struct bkey_s_c k;
        int ret;
 
-       for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
-                                     POS(inode, bvec_iter.bi_sector), k) {
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+                          POS(inode, bvec_iter.bi_sector),
+                          BTREE_ITER_WITH_HOLES, k) {
                BKEY_PADDED(k) tmp;
                struct extent_pick_ptr pick;
                unsigned bytes, sectors;
index ca96330ce27f436c4b92803fcd40d5b1d4e9b78f..510066a2112eedaeacf704e604f75b83022eb391 100644 (file)
@@ -163,8 +163,7 @@ static void journal_seq_blacklist_flush(struct journal *j,
                n = bl->entries[i];
                mutex_unlock(&j->blacklist_lock);
 
-               bch2_btree_iter_init(&iter, c, n.btree_id, n.pos);
-               iter.is_extents = false;
+               __bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, 0, 0, 0);
 redo_peek:
                b = bch2_btree_iter_peek_node(&iter);
 
@@ -1921,6 +1920,9 @@ void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
        struct journal_entry_pin *pin;
        u64 pin_seq;
 
+       if (!test_bit(JOURNAL_STARTED, &j->flags))
+               return;
+
        while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq)))
                pin->flush(j, pin, pin_seq);
 
@@ -2374,9 +2376,9 @@ static void journal_write(struct closure *cl)
                        bio = ca->journal.bio;
                        bio_reset(bio);
                        bio->bi_bdev            = ca->disk_sb.bdev;
+                       bio->bi_opf             = REQ_OP_FLUSH;
                        bio->bi_end_io          = journal_write_endio;
                        bio->bi_private         = ca;
-                       bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
                        closure_bio_submit(bio, cl);
                }
 
index f79b624d367b7711aaa19f6b453bf778dc004b70..8680b100e2e8cf757bf4fd7fc247fbc39d8596d0 100644 (file)
@@ -97,7 +97,8 @@ int bch2_move_data_off_device(struct bch_dev *ca)
                atomic_set(&ctxt.error_count, 0);
                atomic_set(&ctxt.error_flags, 0);
 
-               bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+               bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
+                                    BTREE_ITER_PREFETCH);
 
                while (!bch2_move_ctxt_wait(&ctxt) &&
                       (k = bch2_btree_iter_peek(&iter)).k &&
@@ -167,7 +168,7 @@ static int bch2_move_btree_off(struct bch_dev *ca, enum btree_id id)
 
        closure_init_stack(&cl);
 
-       for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
+       for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
                struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
 retry:
                if (!bch2_extent_has_device(e, ca->dev_idx))
@@ -197,7 +198,7 @@ retry:
                return ret; /* btree IO error */
 
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
-               for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
+               for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
                        struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
 
                        BUG_ON(bch2_extent_has_device(e, ca->dev_idx));
@@ -341,7 +342,8 @@ int bch2_flag_data_bad(struct bch_dev *ca)
        struct bkey_s_c_extent e;
        struct btree_iter iter;
 
-       bch2_btree_iter_init(&iter, ca->fs, BTREE_ID_EXTENTS, POS_MIN);
+       bch2_btree_iter_init(&iter, ca->fs, BTREE_ID_EXTENTS,
+                            POS_MIN, BTREE_ITER_PREFETCH);
 
        while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = btree_iter_err(k))) {
index f718f42ad45411c88d3176539658fb7783b83d7f..8c9395ded10cca7fff7b33e519fcf7077b88cc98 100644 (file)
@@ -54,8 +54,9 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
        struct btree_iter iter;
        int ret = 0;
 
-       bch2_btree_iter_init_intent(&iter, c, BTREE_ID_EXTENTS,
-               bkey_start_pos(&bch2_keylist_front(keys)->k));
+       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
+                            bkey_start_pos(&bch2_keylist_front(keys)->k),
+                            BTREE_ITER_INTENT);
 
        while (1) {
                struct bkey_s_extent insert =
@@ -171,13 +172,12 @@ void bch2_migrate_write_init(struct bch_fs *c,
 static void migrate_bio_init(struct moving_io *io, struct bio *bio,
                             unsigned sectors)
 {
-       bio_init(bio);
+       bio_init(bio, io->bi_inline_vecs,
+                DIV_ROUND_UP(sectors, PAGE_SECTORS));
        bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
 
        bio->bi_iter.bi_size    = sectors << 9;
-       bio->bi_max_vecs        = DIV_ROUND_UP(sectors, PAGE_SECTORS);
        bio->bi_private         = &io->cl;
-       bio->bi_io_vec          = io->bi_inline_vecs;
        bch2_bio_map(bio, NULL);
 }
 
index cc7d3f68302034b6b5e41a6dbac444fe9d1f970e..72cbb9d50f69162ce1b525789efc0a34508424ca 100644 (file)
@@ -9,6 +9,7 @@
 #include "buckets.h"
 #include "clock.h"
 #include "extents.h"
+#include "eytzinger.h"
 #include "io.h"
 #include "keylist.h"
 #include "move.h"
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/math64.h>
+#include <linux/sort.h>
 #include <linux/wait.h>
 
 /* Moving GC - IO loop */
 
+static int bucket_idx_cmp(const void *_l, const void *_r, size_t size)
+{
+       const struct bucket_heap_entry *l = _l;
+       const struct bucket_heap_entry *r = _r;
+
+       if (l->bucket < r->bucket)
+               return -1;
+       if (l->bucket > r->bucket)
+               return 1;
+       return 0;
+}
+
 static const struct bch_extent_ptr *moving_pred(struct bch_dev *ca,
                                                struct bkey_s_c k)
 {
+       bucket_heap *h = &ca->copygc_heap;
        const struct bch_extent_ptr *ptr;
 
        if (bkey_extent_is_data(k.k) &&
            (ptr = bch2_extent_has_device(bkey_s_c_to_extent(k),
-                                        ca->dev_idx)) &&
-           PTR_BUCKET(ca, ptr)->mark.copygc)
-               return ptr;
+                                         ca->dev_idx))) {
+               struct bucket_heap_entry search = {
+                       .bucket = PTR_BUCKET_NR(ca, ptr)
+               };
+
+               size_t i = eytzinger0_find(h->data, h->used,
+                                          sizeof(h->data[0]),
+                                          bucket_idx_cmp, &search);
+
+               if (i < h->used)
+                       return ptr;
+       }
 
        return NULL;
 }
@@ -60,17 +84,19 @@ static void read_moving(struct bch_dev *ca, size_t buckets_to_move,
                        u64 sectors_to_move)
 {
        struct bch_fs *c = ca->fs;
-       struct bucket *g;
+       bucket_heap *h = &ca->copygc_heap;
        struct moving_context ctxt;
        struct btree_iter iter;
        struct bkey_s_c k;
        u64 sectors_not_moved = 0;
        size_t buckets_not_moved = 0;
+       struct bucket_heap_entry *i;
 
        bch2_ratelimit_reset(&ca->moving_gc_pd.rate);
        bch2_move_ctxt_init(&ctxt, &ca->moving_gc_pd.rate,
                                SECTORS_IN_FLIGHT_PER_DEVICE);
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
+                            BTREE_ITER_PREFETCH);
 
        while (1) {
                if (kthread_should_stop())
@@ -108,11 +134,14 @@ next:
                                   buckets_to_move);
 
        /* don't check this if we bailed out early: */
-       for_each_bucket(g, ca)
-               if (g->mark.copygc && bucket_sectors_used(g)) {
-                       sectors_not_moved += bucket_sectors_used(g);
+       for (i = h->data; i < h->data + h->used; i++) {
+               struct bucket_mark m = READ_ONCE(ca->buckets[i->bucket].mark);
+
+               if (i->mark.gen == m.gen && bucket_sectors_used(m)) {
+                       sectors_not_moved += bucket_sectors_used(m);
                        buckets_not_moved++;
                }
+       }
 
        if (sectors_not_moved)
                bch_warn(c, "copygc finished but %llu/%llu sectors, %zu/%zu buckets not moved",
@@ -138,15 +167,20 @@ static bool have_copygc_reserve(struct bch_dev *ca)
        return ret;
 }
 
+static inline int sectors_used_cmp(bucket_heap *heap,
+                                  struct bucket_heap_entry l,
+                                  struct bucket_heap_entry r)
+{
+       return bucket_sectors_used(l.mark) - bucket_sectors_used(r.mark);
+}
+
 static void bch2_moving_gc(struct bch_dev *ca)
 {
        struct bch_fs *c = ca->fs;
        struct bucket *g;
-       struct bucket_mark new;
-       u64 sectors_to_move;
+       u64 sectors_to_move = 0;
        size_t buckets_to_move, buckets_unused = 0;
-       struct bucket_heap_entry e;
-       unsigned sectors_used, i;
+       struct bucket_heap_entry e, *i;
        int reserve_sectors;
 
        if (!have_copygc_reserve(ca)) {
@@ -174,52 +208,47 @@ static void bch2_moving_gc(struct bch_dev *ca)
         */
 
        /*
-        * We need bucket marks to be up to date, so gc can't be recalculating
-        * them, and we don't want the allocator invalidating a bucket after
-        * we've decided to evacuate it but before we set copygc:
+        * We need bucket marks to be up to date - gc can't be recalculating
+        * them:
         */
        down_read(&c->gc_lock);
-       mutex_lock(&ca->heap_lock);
-       mutex_lock(&ca->fs->bucket_lock);
-
-       ca->heap.used = 0;
+       ca->copygc_heap.used = 0;
        for_each_bucket(g, ca) {
-               bucket_cmpxchg(g, new, new.copygc = 0);
+               struct bucket_mark m = READ_ONCE(g->mark);
+               struct bucket_heap_entry e = { g - ca->buckets, m };
 
-               if (bucket_unused(g)) {
+               if (bucket_unused(m)) {
                        buckets_unused++;
                        continue;
                }
 
-               if (g->mark.owned_by_allocator ||
-                   g->mark.data_type != BUCKET_DATA)
+               if (m.owned_by_allocator ||
+                   m.data_type != BUCKET_DATA)
                        continue;
 
-               sectors_used = bucket_sectors_used(g);
-
-               if (sectors_used >= ca->mi.bucket_size)
+               if (bucket_sectors_used(m) >= ca->mi.bucket_size)
                        continue;
 
-               bucket_heap_push(ca, g, sectors_used);
+               heap_add_or_replace(&ca->copygc_heap, e, -sectors_used_cmp);
        }
+       up_read(&c->gc_lock);
 
-       sectors_to_move = 0;
-       for (i = 0; i < ca->heap.used; i++)
-               sectors_to_move += ca->heap.data[i].val;
+       for (i = ca->copygc_heap.data;
+            i < ca->copygc_heap.data + ca->copygc_heap.used;
+            i++)
+               sectors_to_move += bucket_sectors_used(i->mark);
 
        while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
-               BUG_ON(!heap_pop(&ca->heap, e, bucket_min_cmp));
-               sectors_to_move -= e.val;
+               BUG_ON(!heap_pop(&ca->copygc_heap, e, -sectors_used_cmp));
+               sectors_to_move -= bucket_sectors_used(e.mark);
        }
 
-       for (i = 0; i < ca->heap.used; i++)
-               bucket_cmpxchg(ca->heap.data[i].g, new, new.copygc = 1);
+       buckets_to_move = ca->copygc_heap.used;
 
-       buckets_to_move = ca->heap.used;
-
-       mutex_unlock(&ca->fs->bucket_lock);
-       mutex_unlock(&ca->heap_lock);
-       up_read(&c->gc_lock);
+       eytzinger0_sort(ca->copygc_heap.data,
+                       ca->copygc_heap.used,
+                       sizeof(ca->copygc_heap.data[0]),
+                       bucket_idx_cmp, NULL);
 
        read_moving(ca, buckets_to_move, sectors_to_move);
 }
index 328378550bf7255acbb00e8b9a4b2b8d6dbb4370..c60a67307773327e8c5d6e4e915e4f11c43c0d83 100644 (file)
@@ -169,7 +169,7 @@ static bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner)
                        break;
                }
 
-               cpu_relax_lowlatency();
+               cpu_relax();
        }
        rcu_read_unlock();
 
@@ -222,7 +222,7 @@ static bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
                 * memory barriers as we'll eventually observe the right
                 * values at the cost of a few extra spins.
                 */
-               cpu_relax_lowlatency();
+               cpu_relax();
        }
 
        osq_unlock(&lock->osq);
index 02052165f74d1b04df6722700cdf05f25c8253c8..b237b751053bb3dbceb98e2a1402bf063b70161c 100644 (file)
@@ -190,7 +190,7 @@ bch2_hash_lookup(const struct bch_hash_desc desc,
                struct btree_iter *iter, const void *key)
 {
        bch2_btree_iter_init(iter, c, desc.btree_id,
-                           POS(inode, desc.hash_key(info, key)));
+                           POS(inode, desc.hash_key(info, key)), 0);
 
        return bch2_hash_lookup_at(desc, info, iter, key);
 }
@@ -201,8 +201,9 @@ bch2_hash_lookup_intent(const struct bch_hash_desc desc,
                       struct bch_fs *c, u64 inode,
                       struct btree_iter *iter, const void *key)
 {
-       bch2_btree_iter_init_intent(iter, c, desc.btree_id,
-                           POS(inode, desc.hash_key(info, key)));
+       bch2_btree_iter_init(iter, c, desc.btree_id,
+                            POS(inode, desc.hash_key(info, key)),
+                            BTREE_ITER_INTENT);
 
        return bch2_hash_lookup_at(desc, info, iter, key);
 }
@@ -232,8 +233,9 @@ static inline struct bkey_s_c bch2_hash_hole(const struct bch_hash_desc desc,
                                            struct btree_iter *iter,
                                            const void *key)
 {
-       bch2_btree_iter_init_intent(iter, c, desc.btree_id,
-                           POS(inode, desc.hash_key(info, key)));
+       bch2_btree_iter_init(iter, c, desc.btree_id,
+                            POS(inode, desc.hash_key(info, key)),
+                            BTREE_ITER_INTENT);
 
        return bch2_hash_hole_at(desc, iter);
 }
@@ -278,9 +280,11 @@ static inline int bch2_hash_set(const struct bch_hash_desc desc,
        struct bkey_s_c k;
        int ret;
 
-       bch2_btree_iter_init_intent(&hashed_slot, c, desc.btree_id,
-               POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))));
-       bch2_btree_iter_init_intent(&iter, c, desc.btree_id, hashed_slot.pos);
+       bch2_btree_iter_init(&hashed_slot, c, desc.btree_id,
+               POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
+               BTREE_ITER_INTENT);
+       bch2_btree_iter_init(&iter, c, desc.btree_id, hashed_slot.pos,
+                            BTREE_ITER_INTENT);
        bch2_btree_iter_link(&hashed_slot, &iter);
 retry:
        /*
@@ -353,7 +357,7 @@ static inline int bch2_hash_delete_at(const struct bch_hash_desc desc,
        int ret = -ENOENT;
 
        bch2_btree_iter_init(&whiteout_iter, iter->c, desc.btree_id,
-                            iter->pos);
+                            iter->pos, 0);
        bch2_btree_iter_link(iter, &whiteout_iter);
 
        ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, iter);
@@ -382,10 +386,11 @@ static inline int bch2_hash_delete(const struct bch_hash_desc desc,
        struct bkey_s_c k;
        int ret = -ENOENT;
 
-       bch2_btree_iter_init_intent(&iter, c, desc.btree_id,
-                           POS(inode, desc.hash_key(info, key)));
+       bch2_btree_iter_init(&iter, c, desc.btree_id,
+                            POS(inode, desc.hash_key(info, key)),
+                            BTREE_ITER_INTENT);
        bch2_btree_iter_init(&whiteout_iter, c, desc.btree_id,
-                           POS(inode, desc.hash_key(info, key)));
+                           POS(inode, desc.hash_key(info, key)), 0);
        bch2_btree_iter_link(&iter, &whiteout_iter);
 retry:
        k = bch2_hash_lookup_at(desc, info, &iter, key);
index 7a98136047d5757da2a52704895bf341b69319eb..528538b5c74014c8b4b191fd75c14287e9e36216 100644 (file)
@@ -377,7 +377,8 @@ static void bch2_fs_free(struct bch_fs *c)
        bch2_io_clock_exit(&c->io_clock[WRITE]);
        bch2_io_clock_exit(&c->io_clock[READ]);
        bch2_fs_compress_exit(c);
-       bdi_destroy(&c->bdi);
+       if (c->bdi.bdi_list.next)
+               bdi_destroy(&c->bdi);
        lg_lock_free(&c->usage_lock);
        free_percpu(c->usage_percpu);
        mempool_exit(&c->btree_bounce_pool);
@@ -572,7 +573,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
                                      sizeof(struct btree_interior_update)) ||
            mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
-           bioset_init(&c->btree_read_bio, 1, 0) ||
+           bioset_init(&c->btree_read_bio, 1,
+                       offsetof(struct btree_read_bio, bio)) ||
            bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio)) ||
            bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio)) ||
            bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio)) ||
@@ -984,7 +986,8 @@ static void bch2_dev_free(struct bch_dev *ca)
        kfree(ca->bio_prio);
        kvpfree(ca->buckets,     ca->mi.nbuckets * sizeof(struct bucket));
        kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
-       free_heap(&ca->heap);
+       free_heap(&ca->copygc_heap);
+       free_heap(&ca->alloc_heap);
        free_fifo(&ca->free_inc);
 
        for (i = 0; i < RESERVE_NR; i++)
@@ -1105,7 +1108,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
 
        spin_lock_init(&ca->freelist_lock);
        spin_lock_init(&ca->prio_buckets_lock);
-       mutex_init(&ca->heap_lock);
        mutex_init(&ca->prio_write_lock);
        bch2_dev_moving_gc_init(ca);
 
@@ -1142,7 +1144,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
                       movinggc_reserve, GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
            !init_fifo(&ca->free_inc,   free_inc_reserve, GFP_KERNEL) ||
-           !init_heap(&ca->heap,       heap_size, GFP_KERNEL) ||
+           !init_heap(&ca->alloc_heap, heap_size, GFP_KERNEL) ||
+           !init_heap(&ca->copygc_heap,heap_size, GFP_KERNEL) ||
            !(ca->oldest_gens   = kvpmalloc(ca->mi.nbuckets *
                                            sizeof(u8),
                                            GFP_KERNEL|__GFP_ZERO)) ||
index c34048a32b56fad176e3b1ec4882fe0b66d0afee..3c47f1cb2ee763b7b88a22019239f46891d3c64c 100644 (file)
@@ -263,7 +263,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
        if (!bch2_fs_running(c))
                return -EPERM;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
                if (k.k->type == BCH_EXTENT) {
                        struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
                        const struct bch_extent_ptr *ptr;
@@ -604,7 +604,7 @@ static unsigned bucket_priority_fn(struct bch_dev *ca, struct bucket *g,
 static unsigned bucket_sectors_used_fn(struct bch_dev *ca, struct bucket *g,
                                       void *private)
 {
-       return bucket_sectors_used(g);
+       return bucket_sectors_used(g->mark);
 }
 
 static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, struct bucket *g,
index 16d32928a2172b254eabbed7f52248e1d5ee23cb..6bc208450c944ba14b28ac21f5c74f9a3a9c7b68 100644 (file)
@@ -118,7 +118,8 @@ static s64 read_tiering(struct bch_fs *c, struct bch_tier *tier)
 
        bch2_move_ctxt_init(&ctxt, &tier->pd.rate,
                           nr_devices * SECTORS_IN_FLIGHT_PER_DEVICE);
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
+                            BTREE_ITER_PREFETCH);
 
        while (!kthread_should_stop() &&
               !bch2_move_ctxt_wait(&ctxt) &&
index 79a2aeb1a8469825c15963a31e102a52e3f4787d..6ffc9811092d170e0a77675410b9ceeac1b6ecaf 100644 (file)
@@ -431,3 +431,104 @@ size_t bch_scnmemcpy(char *buf, size_t size, const char *src, size_t len)
 
        return n;
 }
+
+#include "eytzinger.h"
+
+static int alignment_ok(const void *base, size_t align)
+{
+       return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+               ((unsigned long)base & (align - 1)) == 0;
+}
+
+static void u32_swap(void *a, void *b, size_t size)
+{
+       u32 t = *(u32 *)a;
+       *(u32 *)a = *(u32 *)b;
+       *(u32 *)b = t;
+}
+
+static void u64_swap(void *a, void *b, size_t size)
+{
+       u64 t = *(u64 *)a;
+       *(u64 *)a = *(u64 *)b;
+       *(u64 *)b = t;
+}
+
+static void generic_swap(void *a, void *b, size_t size)
+{
+       char t;
+
+       do {
+               t = *(char *)a;
+               *(char *)a++ = *(char *)b;
+               *(char *)b++ = t;
+       } while (--size > 0);
+}
+
+static inline int do_cmp(void *base, size_t n, size_t size,
+                        int (*cmp_func)(const void *, const void *, size_t),
+                        size_t l, size_t r)
+{
+       return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
+                       base + inorder_to_eytzinger0(r, n) * size,
+                       size);
+}
+
+static inline void do_swap(void *base, size_t n, size_t size,
+                          void (*swap_func)(void *, void *, size_t),
+                          size_t l, size_t r)
+{
+       swap_func(base + inorder_to_eytzinger0(l, n) * size,
+                 base + inorder_to_eytzinger0(r, n) * size,
+                 size);
+}
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+                    int (*cmp_func)(const void *, const void *, size_t),
+                    void (*swap_func)(void *, void *, size_t))
+{
+       int i, c, r;
+
+       if (!swap_func) {
+               if (size == 4 && alignment_ok(base, 4))
+                       swap_func = u32_swap;
+               else if (size == 8 && alignment_ok(base, 8))
+                       swap_func = u64_swap;
+               else
+                       swap_func = generic_swap;
+       }
+
+       /* heapify */
+       for (i = n / 2 - 1; i >= 0; --i) {
+               for (r = i; r * 2 + 1 < n; r = c) {
+                       c = r * 2 + 1;
+
+                       if (c + 1 < n &&
+                           do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
+                               c++;
+
+                       if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
+                               break;
+
+                       do_swap(base, n, size, swap_func, r, c);
+               }
+       }
+
+       /* sort */
+       for (i = n - 1; i > 0; --i) {
+               do_swap(base, n, size, swap_func, 0, i);
+
+               for (r = 0; r * 2 + 1 < i; r = c) {
+                       c = r * 2 + 1;
+
+                       if (c + 1 < i &&
+                           do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
+                               c++;
+
+                       if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
+                               break;
+
+                       do_swap(base, n, size, swap_func, r, c);
+               }
+       }
+}
index 8aa5c34b456dbf8f8e35aaa04ac5e50102b78cc8..d7511aebda5e11b1423b1ba5583bf4b78ec9d0d1 100644 (file)
@@ -98,11 +98,13 @@ static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
                ?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
 }
 
-#define DECLARE_HEAP(type, name)                                       \
-       struct {                                                        \
-               size_t size, used;                                      \
-               type *data;                                             \
-       } name
+#define HEAP(type)                                                     \
+struct {                                                               \
+       size_t size, used;                                              \
+       type *data;                                                     \
+}
+
+#define DECLARE_HEAP(type, name) HEAP(type) name
 
 #define init_heap(heap, _size, gfp)                                    \
 ({                                                                     \
@@ -120,46 +122,62 @@ do {                                                                      \
 
 #define heap_swap(h, i, j)     swap((h)->data[i], (h)->data[j])
 
-#define heap_sift(h, i, cmp)                                           \
+#define heap_peek(h)                                                   \
+({                                                                     \
+       EBUG_ON(!(h)->used);                                            \
+       (h)->data[0];                                                   \
+})
+
+#define heap_full(h)   ((h)->used == (h)->size)
+
+#define heap_sift_down(h, i, cmp)                                      \
 do {                                                                   \
-       size_t _r, _j = i;                                              \
+       size_t _c, _j = i;                                              \
                                                                        \
-       for (; _j * 2 + 1 < (h)->used; _j = _r) {                       \
-               _r = _j * 2 + 1;                                        \
-               if (_r + 1 < (h)->used &&                               \
-                   cmp((h)->data[_r], (h)->data[_r + 1]))              \
-                       _r++;                                           \
+       for (; _j * 2 + 1 < (h)->used; _j = _c) {                       \
+               _c = _j * 2 + 1;                                        \
+               if (_c + 1 < (h)->used &&                               \
+                   cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0)      \
+                       _c++;                                           \
                                                                        \
-               if (cmp((h)->data[_r], (h)->data[_j]))                  \
+               if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0)          \
                        break;                                          \
-               heap_swap(h, _r, _j);                                   \
+               heap_swap(h, _c, _j);                                   \
        }                                                               \
 } while (0)
 
-#define heap_sift_down(h, i, cmp)                                      \
+#define heap_sift_up(h, i, cmp)                                                \
 do {                                                                   \
        while (i) {                                                     \
                size_t p = (i - 1) / 2;                                 \
-               if (cmp((h)->data[i], (h)->data[p]))                    \
+               if (cmp(h, (h)->data[i], (h)->data[p]) >= 0)            \
                        break;                                          \
                heap_swap(h, i, p);                                     \
                i = p;                                                  \
        }                                                               \
 } while (0)
 
-#define heap_add(h, d, cmp)                                            \
+#define heap_add(h, new, cmp)                                          \
 ({                                                                     \
        bool _r = !heap_full(h);                                        \
        if (_r) {                                                       \
                size_t _i = (h)->used++;                                \
-               (h)->data[_i] = d;                                      \
+               (h)->data[_i] = new;                                    \
                                                                        \
-               heap_sift_down(h, _i, cmp);                             \
-               heap_sift(h, _i, cmp);                                  \
+               heap_sift_up(h, _i, cmp);                               \
        }                                                               \
        _r;                                                             \
 })
 
+#define heap_add_or_replace(h, new, cmp)                               \
+do {                                                                   \
+       if (!heap_add(h, new, cmp) &&                                   \
+           cmp(h, new, heap_peek(h)) >= 0) {                           \
+               (h)->data[0] = new;                                     \
+               heap_sift_down(h, 0, cmp);                              \
+       }                                                               \
+} while (0)
+
 #define heap_del(h, i, cmp)                                            \
 do {                                                                   \
        size_t _i = (i);                                                \
@@ -167,8 +185,8 @@ do {                                                                        \
        BUG_ON(_i >= (h)->used);                                        \
        (h)->used--;                                                    \
        heap_swap(h, _i, (h)->used);                                    \
+       heap_sift_up(h, _i, cmp);                                       \
        heap_sift_down(h, _i, cmp);                                     \
-       heap_sift(h, _i, cmp);                                          \
 } while (0)
 
 #define heap_pop(h, d, cmp)                                            \
@@ -181,19 +199,11 @@ do {                                                                      \
        _r;                                                             \
 })
 
-#define heap_peek(h)                                                   \
-({                                                                     \
-       EBUG_ON(!(h)->used);                                            \
-       (h)->data[0];                                                   \
-})
-
-#define heap_full(h)   ((h)->used == (h)->size)
-
 #define heap_resort(heap, cmp)                                         \
 do {                                                                   \
        ssize_t _i;                                                     \
        for (_i = (ssize_t) (heap)->used / 2 -  1; _i >= 0; --_i)       \
-               heap_sift(heap, _i, cmp);                               \
+               heap_sift_down(heap, _i, cmp);                          \
 } while (0)
 
 /*
index 4e82e42cec14c808eaf3482cce253d953f8b7c7c..b2075c2e03f485f8f29ed9788ee5c1aa32ebcb15 100644 (file)
@@ -282,7 +282,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
        ssize_t ret = 0;
        size_t len;
 
-       for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), k) {
+       for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), 0, k) {
                BUG_ON(k.k->p.inode < inum);
 
                if (k.k->p.inode > inum)
index 8fb10ce4a46d039b89076979ee4f9c2357ce6625..f43566993171bf3c7f0641a1f7a06d996b6230aa 100644 (file)
@@ -278,10 +278,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                return NULL;
 
        bio = p + front_pad;
-       bio_init(bio);
-       bio->bi_pool            = bs;
-       bio->bi_max_vecs        = nr_iovecs;
-       bio->bi_io_vec          = bio->bi_inline_vecs;
+       bio_init(bio, bio->bi_inline_vecs, nr_iovecs);
+       bio->bi_pool = bs;
 
        return bio;
 }