]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 5a3a4087af bcachefs: Convert a BUG_ON() to a warning
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 25 Sep 2019 19:23:29 +0000 (15:23 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Wed, 25 Sep 2019 19:23:50 +0000 (15:23 -0400)
33 files changed:
.bcachefs_revision
include/linux/bio.h
include/linux/bvec.h
libbcachefs/alloc_background.c
libbcachefs/alloc_foreground.c
libbcachefs/alloc_foreground.h
libbcachefs/bcachefs_format.h
libbcachefs/bkey_methods.c
libbcachefs/bkey_methods.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/checksum.c
libbcachefs/ec.c
libbcachefs/error.c
libbcachefs/extents.c
libbcachefs/extents.h
libbcachefs/fs-io.c
libbcachefs/fsck.c
libbcachefs/io.c
libbcachefs/move.c
libbcachefs/rebalance.c
libbcachefs/recovery.c
libbcachefs/replicas.c
libbcachefs/str_hash.h
libbcachefs/super.c
linux/bio.c

index 77bb3639fd2020221123bc6edf73d7198502f678..6ceb95f88d5f7547bbad9b52a32f47b4e3c734eb 100644 (file)
@@ -1 +1 @@
-fee79cd6543ed687efe86458e3c4479eff818488
+5a3a4087af27aa10da5f23cb174a439946153584
index e93341e60cb60fcd75b1d69622a6f36e478818e9..cdbbcb390984d6a336396fbe473ba4c8eb214c34 100644 (file)
@@ -113,13 +113,17 @@ static inline void *bio_data(struct bio *bio)
 
 #define __bio_kunmap_atomic(addr)      kunmap_atomic(addr)
 
-struct bvec_iter_all {
-       unsigned        done;
-};
+static inline struct bio_vec *bio_next_segment(const struct bio *bio,
+                                              struct bvec_iter_all *iter)
+{
+       if (iter->idx >= bio->bi_vcnt)
+               return NULL;
+
+       return &bio->bi_io_vec[iter->idx];
+}
 
-#define bio_for_each_segment_all(bvl, bio, i, iter)                    \
-       for (i = 0, bvl = (bio)->bi_io_vec, iter = (struct bvec_iter_all) { 0 };                \
-            i < (bio)->bi_vcnt; i++, bvl++)
+#define bio_for_each_segment_all(bvl, bio, iter) \
+       for ((iter).idx = 0; (bvl = bio_next_segment((bio), &(iter))); (iter).idx++)
 
 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
                                    unsigned bytes)
index 89b65b82d98f5c5e77c34f967e856dcc6028dabe..5bc68b42db7165a56b2ee9ddafe96d8be670571c 100644 (file)
@@ -43,6 +43,10 @@ struct bvec_iter {
                                                   current bvec */
 };
 
+struct bvec_iter_all {
+       int             idx;
+};
+
 /*
  * various member access, note that bio_data should of course not be used
  * on highmem page vectors
index 7a457729cf1896b16127f57fc99ada0209cafa7c..9814179a6406eca226f80e69e63bb5889987fbd7 100644 (file)
@@ -1164,7 +1164,7 @@ static int bch2_allocator_thread(void *arg)
                         */
                        if (!nr ||
                            (nr < ALLOC_SCAN_BATCH(ca) &&
-                            !fifo_full(&ca->free[RESERVE_MOVINGGC]))) {
+                            !fifo_empty(&ca->free[RESERVE_NONE]))) {
                                ret = wait_buckets_available(c, ca);
                                if (ret) {
                                        up_read(&c->gc_lock);
index e64f8449462fcad1d19321e2e3cd0cbccce0c3de..697d576802b6525004809f09099b490aade59499 100644 (file)
@@ -693,8 +693,7 @@ retry_blocking:
 }
 
 void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
-                               struct open_buckets *obs,
-                               enum bch_data_type data_type)
+                               struct open_buckets *obs)
 {
        struct open_buckets ptrs = { .nr = 0 };
        struct open_bucket *ob, *ob2;
@@ -725,7 +724,7 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
                          struct write_point *wp)
 {
        mutex_lock(&wp->lock);
-       bch2_open_buckets_stop_dev(c, ca, &wp->ptrs, wp->type);
+       bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
        mutex_unlock(&wp->lock);
 }
 
index 6d8ffb0cd06dfb849cc7a274b95b45bd334d0c1c..687f973e4b3a98b3cc9e1aaaab6f89c73d6da584 100644 (file)
@@ -106,7 +106,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
 void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
 
 void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
-                               struct open_buckets *, enum bch_data_type);
+                               struct open_buckets *);
 
 void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
                          struct write_point *);
index 667170b54ce6769d0d748d14ad2d12856895a6a4..4577d77a9f38901826284b0f20a4549a60617940 100644 (file)
@@ -657,7 +657,7 @@ struct bch_reservation {
 
 /* Maximum possible size of an entire extent value: */
 #define BKEY_EXTENT_VAL_U64s_MAX                               \
-       (BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
+       (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
 
 #define BKEY_PADDED(key)       __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
 
index 6fa6ac1fadc13494c826c0175a5588220558862f..f01405dd502bb64612f44303a0ca5daad58810f5 100644 (file)
@@ -145,7 +145,7 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
        }
 
        if (ops->key_debugcheck)
-               ops->key_debugcheck(c, b, k);
+               ops->key_debugcheck(c, k);
 }
 
 void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
index e6e97cda4f501154c69fe09654a830391bf9a48b..8568b65c1ed2e9ac1a15f8a2f437961f678698ef 100644 (file)
@@ -26,8 +26,7 @@ struct bkey_ops {
        /* Returns reason for being invalid if invalid, else NULL: */
        const char *    (*key_invalid)(const struct bch_fs *,
                                       struct bkey_s_c);
-       void            (*key_debugcheck)(struct bch_fs *, struct btree *,
-                                         struct bkey_s_c);
+       void            (*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
        void            (*val_to_text)(struct printbuf *, struct bch_fs *,
                                       struct bkey_s_c);
        void            (*swab)(const struct bkey_format *, struct bkey_packed *);
index 046524c8d5ea6928a25ce207ce4d629b87f5b062..416949512057cf8467ee73b63e6fb3fda5757926 100644 (file)
@@ -674,10 +674,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
        EBUG_ON(!btree_node_locked(iter, level + 1));
        EBUG_ON(level >= BTREE_MAX_DEPTH);
 retry:
-       rcu_read_lock();
        b = btree_cache_find(bc, k);
-       rcu_read_unlock();
-
        if (unlikely(!b)) {
                /*
                 * We must have the parent locked to call bch2_btree_node_fill(),
@@ -878,10 +875,7 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
        BUG_ON(!btree_node_locked(iter, level + 1));
        BUG_ON(level >= BTREE_MAX_DEPTH);
 
-       rcu_read_lock();
        b = btree_cache_find(bc, k);
-       rcu_read_unlock();
-
        if (b)
                return;
 
index 5c77a9552a16d9d93e16edd5feb8e93a4df45b15..f4adb07a3de280672e4348d5e83855ee87f0fc89 100644 (file)
@@ -762,6 +762,8 @@ out:
                        percpu_down_write(&c->mark_lock);
                        bch2_gc_free(c);
                        percpu_up_write(&c->mark_lock);
+                       /* flush fsck errors, reset counters */
+                       bch2_flush_fsck_errs(c);
 
                        goto again;
                }
index a28d2dd7d5b3d2f23c8c600bf2e4cc5923565e1e..40cd87d73a4fbcbdba8cb8109783f3792e85ed7d 100644 (file)
@@ -526,6 +526,10 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
        unsigned offset = __btree_node_key_to_offset(b, where);
        int shift = new_u64s - clobber_u64s;
        unsigned old_end = t->end_offset - shift;
+       unsigned orig_iter_pos = node_iter->data[0].k;
+       bool iter_current_key_modified =
+               orig_iter_pos >= offset &&
+               orig_iter_pos <= offset + clobber_u64s;
 
        btree_node_iter_for_each(node_iter, set)
                if (set->end == old_end)
@@ -534,18 +538,12 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
        /* didn't find the bset in the iterator - might have to readd it: */
        if (new_u64s &&
            btree_iter_pos_cmp(iter, b, where) > 0) {
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
                bch2_btree_node_iter_push(node_iter, b, where, end);
-
-               if (!b->level &&
-                   node_iter == &iter->l[0].iter)
-                       bkey_disassemble(b,
-                               bch2_btree_node_iter_peek_all(node_iter, b),
-                               &iter->k);
+               goto fixup_done;
+       } else {
+               /* Iterator is after key that changed */
+               return;
        }
-
-       goto iter_current_key_not_modified;
 found:
        set->end = t->end_offset;
 
@@ -561,40 +559,25 @@ found:
                if (set->k == set->end)
                        bch2_btree_node_iter_set_drop(node_iter, set);
        } else {
+               /* Iterator is after key that changed */
                set->k = (int) set->k + shift;
-               goto iter_current_key_not_modified;
+               return;
        }
 
-       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
        bch2_btree_node_iter_sort(node_iter, b);
-       if (!b->level && node_iter == &iter->l[0].iter) {
-               /*
-                * not legal to call bkey_debugcheck() here, because we're
-                * called midway through the update path after update has been
-                * marked but before deletes have actually happened:
-                */
-#if 0
-               __btree_iter_peek_all(iter, &iter->l[0], &iter->k);
-#endif
-               struct btree_iter_level *l = &iter->l[0];
-               struct bkey_packed *k =
-                       bch2_btree_node_iter_peek_all(&l->iter, l->b);
+fixup_done:
+       if (node_iter->data[0].k != orig_iter_pos)
+               iter_current_key_modified = true;
 
-               if (unlikely(!k))
-                       iter->k.type = KEY_TYPE_deleted;
-               else
-                       bkey_disassemble(l->b, k, &iter->k);
-       }
-iter_current_key_not_modified:
        /*
         * When a new key is added, and the node iterator now points to that
         * key, the iterator might have skipped past deleted keys that should
         * come after the key the iterator now points to. We have to rewind to
-        * before those deleted keys - otherwise bch2_btree_node_iter_prev_all()
-        * breaks:
+        * before those deleted keys - otherwise
+        * bch2_btree_node_iter_prev_all() breaks:
         */
        if (!bch2_btree_node_iter_end(node_iter) &&
+           iter_current_key_modified &&
            (b->level ||
             (iter->flags & BTREE_ITER_IS_EXTENTS))) {
                struct bset_tree *t;
@@ -622,7 +605,21 @@ iter_current_key_not_modified:
                }
        }
 
-       bch2_btree_node_iter_verify(node_iter, b);
+       if (!b->level &&
+           node_iter == &iter->l[0].iter &&
+           iter_current_key_modified) {
+               struct bkey_packed *k =
+                       bch2_btree_node_iter_peek_all(node_iter, b);
+
+               if (likely(k)) {
+                       bkey_disassemble(b, k, &iter->k);
+               } else {
+                       /* XXX: for extents, calculate size of hole? */
+                       iter->k.type = KEY_TYPE_deleted;
+               }
+
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+       }
 }
 
 void bch2_btree_node_iter_fix(struct btree_iter *iter,
@@ -635,14 +632,18 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
        struct bset_tree *t = bch2_bkey_to_bset(b, where);
        struct btree_iter *linked;
 
-       if (node_iter != &iter->l[b->level].iter)
+       if (node_iter != &iter->l[b->level].iter) {
                __bch2_btree_node_iter_fix(iter, b, node_iter, t,
-                                         where, clobber_u64s, new_u64s);
+                                          where, clobber_u64s, new_u64s);
+               bch2_btree_node_iter_verify(node_iter, b);
+       }
 
-       trans_for_each_iter_with_node(iter->trans, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked) {
                __bch2_btree_node_iter_fix(linked, b,
-                                         &linked->l[b->level].iter, t,
-                                         where, clobber_u64s, new_u64s);
+                                          &linked->l[b->level].iter, t,
+                                          where, clobber_u64s, new_u64s);
+               __bch2_btree_iter_verify(linked, b);
+       }
 }
 
 static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
@@ -685,6 +686,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
                        bch2_btree_node_iter_peek(&l->iter, l->b));
 }
 
+static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
+                                               struct btree_iter_level *l)
+{
+       return __btree_iter_unpack(iter, l, &iter->k,
+                       bch2_btree_node_iter_prev(&l->iter, l->b));
+}
+
 static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
                                             struct btree_iter_level *l,
                                             int max_advance)
@@ -743,18 +751,29 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
                btree_node_unlock(iter, b->level + 1);
 }
 
+static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
+                                             struct btree *b)
+{
+       return bkey_cmp(iter->pos, b->data->min_key) < 0;
+}
+
 static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
                                             struct btree *b)
 {
-       return __btree_iter_pos_cmp(iter, NULL,
-                       bkey_to_packed(&b->key), true) < 0;
+       int cmp = bkey_cmp(b->key.k.p, iter->pos);
+
+       if (!cmp &&
+           (iter->flags & BTREE_ITER_IS_EXTENTS) &&
+           bkey_cmp(b->key.k.p, POS_MAX))
+               cmp = -1;
+       return cmp < 0;
 }
 
 static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
                                          struct btree *b)
 {
        return iter->btree_id == b->btree_id &&
-               bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
+               !btree_iter_pos_before_node(iter, b) &&
                !btree_iter_pos_after_node(iter, b);
 }
 
@@ -956,10 +975,10 @@ static void btree_iter_up(struct btree_iter *iter)
        btree_node_unlock(iter, iter->level++);
 }
 
-int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
+static int btree_iter_traverse_one(struct btree_iter *);
 
 static int __btree_iter_traverse_all(struct btree_trans *trans,
-                                    struct btree_iter *orig_iter, int ret)
+                                  struct btree_iter *orig_iter, int ret)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter *iter;
@@ -1003,7 +1022,7 @@ retry_all:
                iter = &trans->iters[sorted[i]];
 
                do {
-                       ret = __bch2_btree_iter_traverse(iter);
+                       ret = btree_iter_traverse_one(iter);
                } while (ret == -EINTR);
 
                if (ret)
@@ -1021,16 +1040,27 @@ int bch2_btree_iter_traverse_all(struct btree_trans *trans)
        return __btree_iter_traverse_all(trans, NULL, 0);
 }
 
-static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
-                                          bool check_pos)
+static inline bool btree_iter_good_node(struct btree_iter *iter,
+                                       unsigned l, int check_pos)
+{
+       if (!is_btree_node(iter, l) ||
+           !bch2_btree_node_relock(iter, l))
+               return false;
+
+       if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
+               return false;
+       if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
+               return false;
+       return true;
+}
+
+static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
+                                                    int check_pos)
 {
        unsigned l = iter->level;
 
        while (btree_iter_node(iter, l) &&
-              (!is_btree_node(iter, l) ||
-               !bch2_btree_node_relock(iter, l) ||
-                (check_pos &&
-                 !btree_iter_pos_in_node(iter, iter->l[l].b)))) {
+              !btree_iter_good_node(iter, l, check_pos)) {
                btree_node_unlock(iter, l);
                iter->l[l].b = BTREE_ITER_NO_NODE_UP;
                l++;
@@ -1048,7 +1078,7 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
  * On error, caller (peek_node()/peek_key()) must return NULL; the error is
  * stashed in the iterator and returned from bch2_trans_exit().
  */
-int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
+static int btree_iter_traverse_one(struct btree_iter *iter)
 {
        unsigned depth_want = iter->level;
 
@@ -1062,7 +1092,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
         * XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
         * here unnecessary
         */
-       iter->level = btree_iter_up_until_locked(iter, true);
+       iter->level = btree_iter_up_until_good_node(iter, 0);
 
        /*
         * If we've got a btree node locked (i.e. we aren't about to relock the
@@ -1070,8 +1100,11 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
         *
         * XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
         */
-       if (btree_iter_node(iter, iter->level))
+       if (btree_iter_node(iter, iter->level)) {
+               BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
+
                btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
+       }
 
        /*
         * Note: iter->nodes[iter->level] may be temporarily NULL here - that
@@ -1100,12 +1133,12 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
        return 0;
 }
 
-int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
 {
        int ret;
 
        ret =   bch2_trans_cond_resched(iter->trans) ?:
-               __bch2_btree_iter_traverse(iter);
+               btree_iter_traverse_one(iter);
        if (unlikely(ret))
                ret = __btree_iter_traverse_all(iter->trans, iter, ret);
 
@@ -1234,19 +1267,11 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
                btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
 }
 
-void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+static unsigned btree_iter_pos_changed(struct btree_iter *iter, int cmp)
 {
-       int cmp = bkey_cmp(new_pos, iter->pos);
-       unsigned level;
-
-       if (!cmp)
-               return;
-
-       iter->pos = new_pos;
-
-       level = btree_iter_up_until_locked(iter, true);
+       unsigned l = btree_iter_up_until_good_node(iter, cmp);
 
-       if (btree_iter_node(iter, level)) {
+       if (btree_iter_node(iter, l)) {
                /*
                 * We might have to skip over many keys, or just a few: try
                 * advancing the node iterator, and if we have to skip over too
@@ -1254,37 +1279,98 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
                 * is expensive).
                 */
                if (cmp < 0 ||
-                   !btree_iter_advance_to_pos(iter, &iter->l[level], 8))
-                       __btree_iter_init(iter, level);
+                   !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
+                       __btree_iter_init(iter, l);
 
                /* Don't leave it locked if we're not supposed to: */
-               if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED)
-                       btree_node_unlock(iter, level);
+               if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
+                       btree_node_unlock(iter, l);
        }
 
-       if (level != iter->level)
+       return l;
+}
+
+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+{
+       int cmp = bkey_cmp(new_pos, iter->pos);
+       unsigned l;
+
+       if (!cmp)
+               return;
+
+       iter->pos = new_pos;
+
+       l = btree_iter_pos_changed(iter, cmp);
+
+       if (l != iter->level)
                btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
        else
                btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
 }
 
+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+
+       iter->pos       = l->b->key.k.p;
+       iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+
+       if (!bkey_cmp(iter->pos, POS_MAX)) {
+               bkey_init(&iter->k);
+               iter->k.p       = POS_MAX;
+               return false;
+       }
+
+       iter->pos = btree_type_successor(iter->btree_id, iter->pos);
+       btree_iter_pos_changed(iter, 1);
+       return true;
+}
+
+static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+
+       iter->pos       = l->b->data->min_key;
+       iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+
+       if (!bkey_cmp(iter->pos, POS_MIN)) {
+               bkey_init(&iter->k);
+               iter->k.p       = POS_MIN;
+               return false;
+       }
+
+       iter->pos = btree_type_predecessor(iter->btree_id, iter->pos);
+       btree_iter_pos_changed(iter, -1);
+       return true;
+}
+
 static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
 {
        struct btree_iter_level *l = &iter->l[0];
        struct bkey_s_c ret = { .k = &iter->k };
 
        if (!bkey_deleted(&iter->k)) {
-               EBUG_ON(bch2_btree_node_iter_end(&l->iter));
-               ret.v = bkeyp_val(&l->b->format,
-                       __bch2_btree_node_iter_peek_all(&l->iter, l->b));
+               struct bkey_packed *_k =
+                       __bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+               ret.v = bkeyp_val(&l->b->format, _k);
+
+               if (debug_check_iterators(iter->trans->c)) {
+                       struct bkey k = bkey_unpack_key(l->b, _k);
+                       BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
+               }
+
+               if (debug_check_bkeys(iter->trans->c))
+                       bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
        }
 
-       if (debug_check_bkeys(iter->trans->c) &&
-           !bkey_deleted(ret.k))
-               bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
        return ret;
 }
 
+/**
+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
+ * current position
+ */
 struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 {
        struct btree_iter_level *l = &iter->l[0];
@@ -1297,24 +1383,16 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                return btree_iter_peek_uptodate(iter);
 
        while (1) {
-               if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
-                       ret = bch2_btree_iter_traverse(iter);
-                       if (unlikely(ret))
-                               return bkey_s_c_err(ret);
-               }
+               ret = bch2_btree_iter_traverse(iter);
+               if (unlikely(ret))
+                       return bkey_s_c_err(ret);
 
                k = __btree_iter_peek(iter, l);
                if (likely(k.k))
                        break;
 
-               /* got to the end of the leaf, iterator needs to be traversed: */
-               iter->pos       = l->b->key.k.p;
-               iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
-
-               if (!bkey_cmp(iter->pos, POS_MAX))
+               if (!btree_iter_set_pos_to_next_leaf(iter))
                        return bkey_s_c_null;
-
-               iter->pos = btree_type_successor(iter->btree_id, iter->pos);
        }
 
        /*
@@ -1329,22 +1407,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
        return k;
 }
 
-static noinline
-struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
-{
-       struct btree_iter_level *l = &iter->l[0];
-
-       iter->pos       = l->b->key.k.p;
-       iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
-
-       if (!bkey_cmp(iter->pos, POS_MAX))
-               return bkey_s_c_null;
-
-       iter->pos = btree_type_successor(iter->btree_id, iter->pos);
-
-       return bch2_btree_iter_peek(iter);
-}
-
+/**
+ * bch2_btree_iter_next: returns first key greater than iterator's current
+ * position
+ */
 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
 {
        struct btree_iter_level *l = &iter->l[0];
@@ -1353,15 +1419,19 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
 
        bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
 
-       iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
-
        if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
+               if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+                       return bkey_s_c_null;
+
                /*
                 * XXX: when we just need to relock we should be able to avoid
                 * calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
                 * for that to work
                 */
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+               iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+
+               bch2_btree_iter_set_pos(iter,
+                       btree_type_successor(iter->btree_id, iter->k.p));
 
                return bch2_btree_iter_peek(iter);
        }
@@ -1369,9 +1439,12 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
        do {
                bch2_btree_node_iter_advance(&l->iter, l->b);
                p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-               if (unlikely(!p))
-                       return bch2_btree_iter_peek_next_leaf(iter);
-       } while (bkey_whiteout(p));
+       } while (likely(p) && bkey_whiteout(p));
+
+       if (unlikely(!p))
+               return btree_iter_set_pos_to_next_leaf(iter)
+                       ? bch2_btree_iter_peek(iter)
+                       : bkey_s_c_null;
 
        k = __btree_iter_unpack(iter, l, &iter->k, p);
 
@@ -1380,51 +1453,79 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
        return k;
 }
 
-struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+/**
+ * bch2_btree_iter_peek_prev: returns first key less than or equal to
+ * iterator's current position
+ */
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
 {
        struct btree_iter_level *l = &iter->l[0];
-       struct bkey_packed *p;
        struct bkey_s_c k;
        int ret;
 
        bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
 
-       if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
-               k = bch2_btree_iter_peek(iter);
-               if (IS_ERR(k.k))
-                       return k;
-       }
+       if (iter->uptodate == BTREE_ITER_UPTODATE)
+               return btree_iter_peek_uptodate(iter);
 
        while (1) {
-               p = bch2_btree_node_iter_prev(&l->iter, l->b);
-               if (likely(p))
-                       break;
-
-               iter->pos = l->b->data->min_key;
-               if (!bkey_cmp(iter->pos, POS_MIN))
-                       return bkey_s_c_null;
-
-               bch2_btree_iter_set_pos(iter,
-                       btree_type_predecessor(iter->btree_id, iter->pos));
-
                ret = bch2_btree_iter_traverse(iter);
                if (unlikely(ret))
                        return bkey_s_c_err(ret);
 
-               p = bch2_btree_node_iter_peek(&l->iter, l->b);
-               if (p)
+               k = __btree_iter_peek(iter, l);
+               if (!k.k ||
+                   bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+                       k = __btree_iter_prev(iter, l);
+
+               if (likely(k.k))
                        break;
-       }
 
-       k = __btree_iter_unpack(iter, l, &iter->k, p);
+               if (!btree_iter_set_pos_to_prev_leaf(iter))
+                       return bkey_s_c_null;
+       }
 
        EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
-
        iter->pos       = bkey_start_pos(k.k);
        iter->uptodate  = BTREE_ITER_UPTODATE;
        return k;
 }
 
+/**
+ * bch2_btree_iter_prev: returns first key less than iterator's current
+ * position
+ */
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_s_c k;
+
+       bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
+
+       if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
+               /*
+                * XXX: when we just need to relock we should be able to avoid
+                * calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
+                * for that to work
+                */
+               iter->pos       = btree_type_predecessor(iter->btree_id,
+                                                        iter->pos);
+               iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+
+               return bch2_btree_iter_peek_prev(iter);
+       }
+
+       k = __btree_iter_prev(iter, l);
+       if (unlikely(!k.k))
+               return btree_iter_set_pos_to_prev_leaf(iter)
+                       ? bch2_btree_iter_peek(iter)
+                       : bkey_s_c_null;
+
+       EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0);
+       iter->pos       = bkey_start_pos(k.k);
+       return k;
+}
+
 static inline struct bkey_s_c
 __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
 {
@@ -1565,11 +1666,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
        if (iter->uptodate == BTREE_ITER_UPTODATE)
                return btree_iter_peek_uptodate(iter);
 
-       if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
-               ret = bch2_btree_iter_traverse(iter);
-               if (unlikely(ret))
-                       return bkey_s_c_err(ret);
-       }
+       ret = bch2_btree_iter_traverse(iter);
+       if (unlikely(ret))
+               return bkey_s_c_err(ret);
 
        return __bch2_btree_iter_peek_slot(iter);
 }
@@ -1671,7 +1770,10 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
 static int bch2_trans_realloc_iters(struct btree_trans *trans,
                                    unsigned new_size)
 {
-       void *new_iters, *new_updates;
+       void *new_iters, *new_updates, *new_sorted;
+       size_t iters_bytes;
+       size_t updates_bytes;
+       size_t sorted_bytes;
 
        new_size = roundup_pow_of_two(new_size);
 
@@ -1684,9 +1786,13 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
 
        bch2_trans_unlock(trans);
 
-       new_iters = kmalloc(sizeof(struct btree_iter) * new_size +
-                           sizeof(struct btree_insert_entry) * (new_size + 4),
-                           GFP_NOFS);
+       iters_bytes     = sizeof(struct btree_iter) * new_size;
+       updates_bytes   = sizeof(struct btree_insert_entry) * (new_size + 4);
+       sorted_bytes    = sizeof(u8) * (new_size + 4);
+
+       new_iters = kmalloc(iters_bytes +
+                           updates_bytes +
+                           sorted_bytes, GFP_NOFS);
        if (new_iters)
                goto success;
 
@@ -1695,7 +1801,8 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
 
        trans->used_mempool = true;
 success:
-       new_updates = new_iters + sizeof(struct btree_iter) * new_size;
+       new_updates     = new_iters + iters_bytes;
+       new_sorted      = new_updates + updates_bytes;
 
        memcpy(new_iters, trans->iters,
               sizeof(struct btree_iter) * trans->nr_iters);
@@ -1710,9 +1817,10 @@ success:
        if (trans->iters != trans->iters_onstack)
                kfree(trans->iters);
 
-       trans->iters    = new_iters;
-       trans->updates  = new_updates;
-       trans->size     = new_size;
+       trans->iters            = new_iters;
+       trans->updates          = new_updates;
+       trans->updates_sorted   = new_sorted;
+       trans->size             = new_size;
 
        if (trans->iters_live) {
                trace_trans_restart_iters_realloced(trans->ip, trans->size);
@@ -1957,6 +2065,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
        trans->size             = ARRAY_SIZE(trans->iters_onstack);
        trans->iters            = trans->iters_onstack;
        trans->updates          = trans->updates_onstack;
+       trans->updates_sorted   = trans->updates_sorted_onstack;
        trans->fs_usage_deltas  = NULL;
 
        if (expected_nr_iters > trans->size)
@@ -1981,3 +2090,18 @@ int bch2_trans_exit(struct btree_trans *trans)
 
        return trans->error ? -EIO : 0;
 }
+
+void bch2_fs_btree_iter_exit(struct bch_fs *c)
+{
+       mempool_exit(&c->btree_iters_pool);
+}
+
+int bch2_fs_btree_iter_init(struct bch_fs *c)
+{
+       unsigned nr = BTREE_ITER_MAX;
+
+       return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
+                       sizeof(struct btree_iter) * nr +
+                       sizeof(struct btree_insert_entry) * (nr + 4) +
+                       sizeof(u8) * (nr + 4));
+}
index 249df21b9a97385094db8365f1c06bebcaea762b..e4967215e1d914e669b021ee0872730bb56f736c 100644 (file)
@@ -134,7 +134,16 @@ void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
 
 void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
 
-int __must_check bch2_btree_iter_traverse(struct btree_iter *);
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
+
+static inline int __must_check
+bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+       return iter->uptodate >= BTREE_ITER_NEED_RELOCK
+               ? __bch2_btree_iter_traverse(iter)
+               : 0;
+}
+
 int bch2_btree_iter_traverse_all(struct btree_trans *);
 
 struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
@@ -142,6 +151,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
 
 struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
 
 struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
@@ -303,4 +314,7 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
 void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
 int bch2_trans_exit(struct btree_trans *);
 
+void bch2_fs_btree_iter_exit(struct bch_fs *);
+int bch2_fs_btree_iter_init(struct bch_fs *);
+
 #endif /* _BCACHEFS_BTREE_ITER_H */
index ea07ba19c5dc17ac83f52c95d1ed12e76643c651..592c3b4eed649da56f296747f9369a2b0125c92e 100644 (file)
@@ -212,7 +212,7 @@ static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter
        EBUG_ON(iter->l[b->level].b != b);
        EBUG_ON(iter->l[b->level].lock_seq != b->lock.state.seq);
 
-       if (!six_trylock_write(&b->lock))
+       if (unlikely(!six_trylock_write(&b->lock)))
                __bch2_btree_node_lock_write(b, iter);
 }
 
index f4e1bfe129a0815a43682cca96bf0d3bd709106f..b0da0963091181ac05e3800dc8675988f4221aad 100644 (file)
@@ -261,8 +261,6 @@ struct btree_insert_entry {
        };
 
        bool                    deferred;
-       bool                    triggered;
-       bool                    marked;
 };
 
 #define BTREE_ITER_MAX         64
@@ -291,6 +289,7 @@ struct btree_trans {
 
        struct btree_iter       *iters;
        struct btree_insert_entry *updates;
+       u8                      *updates_sorted;
 
        /* update path: */
        struct journal_res      journal_res;
@@ -302,6 +301,7 @@ struct btree_trans {
 
        struct btree_iter       iters_onstack[2];
        struct btree_insert_entry updates_onstack[6];
+       u8                      updates_sorted_onstack[6];
 
        struct replicas_delta_list *fs_usage_deltas;
 };
index 616c103c05ecd874fdb97c4d8bb8408413006085..36e34b3d9213298fba90e02823f67bb4d0d33221 100644 (file)
@@ -43,7 +43,6 @@ enum {
        __BTREE_INSERT_USE_ALLOC_RESERVE,
        __BTREE_INSERT_JOURNAL_REPLAY,
        __BTREE_INSERT_JOURNAL_RESERVED,
-       __BTREE_INSERT_NOMARK_INSERT,
        __BTREE_INSERT_NOMARK_OVERWRITES,
        __BTREE_INSERT_NOMARK,
        __BTREE_INSERT_MARK_INMEM,
@@ -81,9 +80,6 @@ enum {
 
 #define BTREE_INSERT_JOURNAL_RESERVED  (1 << __BTREE_INSERT_JOURNAL_RESERVED)
 
-/* Don't mark new key, just overwrites: */
-#define BTREE_INSERT_NOMARK_INSERT     (1 << __BTREE_INSERT_NOMARK_INSERT)
-
 /* Don't mark overwrites, just new key: */
 #define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
 
@@ -123,8 +119,13 @@ int bch2_trans_commit(struct btree_trans *,
                      struct disk_reservation *,
                      u64 *, unsigned);
 
-struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
-                                            struct btree_insert_entry);
+static inline void bch2_trans_update(struct btree_trans *trans,
+                                    struct btree_insert_entry entry)
+{
+       EBUG_ON(trans->nr_updates >= trans->nr_iters + 4);
+
+       trans->updates[trans->nr_updates++] = entry;
+}
 
 #define bch2_trans_do(_c, _journal_seq, _flags, _do)                   \
 ({                                                                     \
@@ -144,18 +145,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
        _ret;                                                           \
 })
 
-/*
- * We sort transaction entries so that if multiple iterators point to the same
- * leaf node they'll be adjacent:
- */
-static inline bool same_leaf_as_prev(struct btree_trans *trans,
-                                    struct btree_insert_entry *i)
-{
-       return i != trans->updates &&
-               !i->deferred &&
-               i[0].iter->l[0].b == i[-1].iter->l[0].b;
-}
-
 #define __trans_next_update(_trans, _i, _filter)                       \
 ({                                                                     \
        while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
@@ -175,8 +164,4 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans,
 #define trans_for_each_update_iter(trans, i)                           \
        __trans_for_each_update(trans, i, !(i)->deferred)
 
-#define trans_for_each_update_leaf(trans, i)                           \
-       __trans_for_each_update(trans, i, !(i)->deferred &&             \
-                              !same_leaf_as_prev(trans, i))
-
 #endif /* _BCACHEFS_BTREE_UPDATE_H */
index c0a84153ecda839b44b93713b6711bc8a0b6d81f..7d983b2104b4cc09a5c18d495ef880e666198e45 100644 (file)
 #include <linux/sort.h>
 #include <trace/events/bcachefs.h>
 
+static inline bool same_leaf_as_prev(struct btree_trans *trans,
+                                    unsigned sorted_idx)
+{
+       struct btree_insert_entry *i = trans->updates +
+               trans->updates_sorted[sorted_idx];
+       struct btree_insert_entry *prev = sorted_idx
+               ? trans->updates + trans->updates_sorted[sorted_idx - 1]
+               : NULL;
+
+       return !i->deferred &&
+               prev &&
+               i->iter->l[0].b == prev->iter->l[0].b;
+}
+
+#define trans_for_each_update_sorted(_trans, _i, _iter)                        \
+       for (_iter = 0;                                                 \
+            _iter < _trans->nr_updates &&                              \
+            (_i = _trans->updates + _trans->updates_sorted[_iter], 1); \
+            _iter++)
+
 inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
                                            struct btree_iter *iter)
 {
        bch2_btree_node_lock_write(b, iter);
 
-       if (btree_node_just_written(b) &&
+       if (unlikely(btree_node_just_written(b)) &&
            bch2_btree_post_write_cleanup(c, b))
                bch2_btree_iter_reinit_node(iter, b);
 
@@ -36,20 +56,21 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
                bch2_btree_init_next(c, b, iter);
 }
 
-static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans)
+static void btree_trans_lock_write(struct btree_trans *trans, bool lock)
 {
+       struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
+       unsigned iter;
 
-       trans_for_each_update_leaf(trans, i)
-               bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
-}
-
-static void btree_trans_unlock_write(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
+       trans_for_each_update_sorted(trans, i, iter) {
+               if (same_leaf_as_prev(trans, iter))
+                       continue;
 
-       trans_for_each_update_leaf(trans, i)
-               bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
+               if (lock)
+                       bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
+               else
+                       bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
+       }
 }
 
 static inline int btree_trans_cmp(struct btree_insert_entry l,
@@ -59,6 +80,30 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
                btree_iter_cmp(l.iter, r.iter);
 }
 
+static inline void btree_trans_sort_updates(struct btree_trans *trans)
+{
+       struct btree_insert_entry *l, *r;
+       unsigned nr = 0, pos;
+
+       trans_for_each_update(trans, l) {
+               for (pos = 0; pos < nr; pos++) {
+                       r = trans->updates + trans->updates_sorted[pos];
+
+                       if (btree_trans_cmp(*l, *r) <= 0)
+                               break;
+               }
+
+               memmove(&trans->updates_sorted[pos + 1],
+                       &trans->updates_sorted[pos],
+                       (nr - pos) * sizeof(trans->updates_sorted[0]));
+
+               trans->updates_sorted[pos] = l - trans->updates;
+               nr++;
+       }
+
+       BUG_ON(nr != trans->nr_updates);
+}
+
 /* Inserting into a given leaf node (last stage of insert): */
 
 /* Handle overwrites and do insert, for non extents: */
@@ -106,7 +151,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
                                bch2_bset_delete(b, k, clobber_u64s);
                                bch2_btree_node_iter_fix(iter, b, node_iter,
                                                         k, clobber_u64s, 0);
-                               bch2_btree_iter_verify(iter, b);
                                return true;
                        }
 
@@ -116,7 +160,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
                k->type = KEY_TYPE_deleted;
                bch2_btree_node_iter_fix(iter, b, node_iter, k,
                                         k->u64s, k->u64s);
-               bch2_btree_iter_verify(iter, b);
 
                if (bkey_whiteout(&insert->k)) {
                        reserve_whiteout(b, k);
@@ -138,10 +181,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
        clobber_u64s = 0;
 overwrite:
        bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
-       if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
-               bch2_btree_node_iter_fix(iter, b, node_iter, k,
-                                        clobber_u64s, k->u64s);
-       bch2_btree_iter_verify(iter, b);
+       bch2_btree_node_iter_fix(iter, b, node_iter, k,
+                                clobber_u64s, k->u64s);
        return true;
 }
 
@@ -488,12 +529,12 @@ static int btree_trans_check_can_insert(struct btree_trans *trans,
                                        struct btree_insert_entry **stopped_at)
 {
        struct btree_insert_entry *i;
-       unsigned u64s = 0;
+       unsigned iter, u64s = 0;
        int ret;
 
-       trans_for_each_update_iter(trans, i) {
+       trans_for_each_update_sorted(trans, i, iter) {
                /* Multiple inserts might go to same leaf: */
-               if (!same_leaf_as_prev(trans, i))
+               if (!same_leaf_as_prev(trans, iter))
                        u64s = 0;
 
                u64s += i->k->k.u64s;
@@ -542,7 +583,6 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct bch_fs_usage *fs_usage = NULL;
        struct btree_insert_entry *i;
-       bool saw_non_marked;
        unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
                ? BCH_BUCKET_MARK_BUCKET_INVALIDATE
                : 0;
@@ -551,35 +591,32 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        trans_for_each_update_iter(trans, i)
                BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
 
+       /*
+        * note: running triggers will append more updates to the list of
+        * updates as we're walking it:
+        */
        trans_for_each_update_iter(trans, i)
-               i->marked = false;
-
-       do {
-               saw_non_marked = false;
-
-               trans_for_each_update_iter(trans, i) {
-                       if (i->marked)
-                               continue;
-
-                       saw_non_marked = true;
-                       i->marked = true;
-
-                       if (update_has_triggers(trans, i) &&
-                           update_triggers_transactional(trans, i)) {
-                               ret = bch2_trans_mark_update(trans, i->iter, i->k);
-                               if (ret == -EINTR)
-                                       trace_trans_restart_mark(trans->ip);
-                               if (ret)
-                                       goto out_clear_replicas;
-                       }
+               if (update_has_triggers(trans, i) &&
+                   update_triggers_transactional(trans, i)) {
+                       ret = bch2_trans_mark_update(trans, i->iter, i->k);
+                       if (ret == -EINTR)
+                               trace_trans_restart_mark(trans->ip);
+                       if (ret)
+                               goto out_clear_replicas;
                }
-       } while (saw_non_marked);
 
-       trans_for_each_update(trans, i)
-               btree_insert_entry_checks(trans, i);
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+               trans_for_each_update(trans, i)
+                       btree_insert_entry_checks(trans, i);
        bch2_btree_trans_verify_locks(trans);
 
-       btree_trans_lock_write(c, trans);
+       /*
+        * No more updates can be added - sort updates so we can take write
+        * locks in the correct order:
+        */
+       btree_trans_sort_updates(trans);
+
+       btree_trans_lock_write(trans, true);
 
        if (race_fault()) {
                ret = -EINTR;
@@ -597,8 +634,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                goto out;
 
        trans_for_each_update_iter(trans, i) {
-               if (i->deferred ||
-                   !btree_node_type_needs_gc(i->iter->btree_id))
+               if (!btree_node_type_needs_gc(i->iter->btree_id))
                        continue;
 
                if (!fs_usage) {
@@ -664,7 +700,7 @@ out:
               (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
               trans->journal_res.ref);
 
-       btree_trans_unlock_write(trans);
+       btree_trans_lock_write(trans, false);
 
        if (fs_usage) {
                bch2_fs_usage_scratch_put(c, fs_usage);
@@ -689,19 +725,6 @@ int bch2_trans_commit_error(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        unsigned flags = trans->flags;
-       struct btree_insert_entry *src, *dst;
-
-       src = dst = trans->updates;
-
-       while (src < trans->updates + trans->nr_updates) {
-               if (!src->triggered) {
-                       *dst = *src;
-                       dst++;
-               }
-               src++;
-       }
-
-       trans->nr_updates = dst - trans->updates;
 
        /*
         * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
@@ -816,6 +839,7 @@ static int __bch2_trans_commit(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
+       unsigned iter;
        int ret;
 
        trans_for_each_update_iter(trans, i) {
@@ -837,8 +861,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
        if (trans->flags & BTREE_INSERT_NOUNLOCK)
                trans->nounlock = true;
 
-       trans_for_each_update_leaf(trans, i)
-               bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
+       trans_for_each_update_sorted(trans, i, iter)
+               if (!same_leaf_as_prev(trans, iter))
+                       bch2_foreground_maybe_merge(c, i->iter,
+                                                   0, trans->flags);
 
        trans->nounlock = false;
 
@@ -858,7 +884,8 @@ int bch2_trans_commit(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i = NULL;
-       unsigned orig_mem_top = trans->mem_top;
+       unsigned orig_nr_updates        = trans->nr_updates;
+       unsigned orig_mem_top           = trans->mem_top;
        int ret = 0;
 
        if (!trans->nr_updates)
@@ -931,39 +958,20 @@ out_noupdates:
 err:
        ret = bch2_trans_commit_error(trans, i, ret);
 
+       /* free updates and memory used by triggers, they'll be reexecuted: */
+       trans->nr_updates       = orig_nr_updates;
+       trans->mem_top          = orig_mem_top;
+
        /* can't loop if it was passed in and we changed it: */
        if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
                ret = -EINTR;
 
-       if (!ret) {
-               /* free memory used by triggers, they'll be reexecuted: */
-               trans->mem_top = orig_mem_top;
+       if (!ret)
                goto retry;
-       }
 
        goto out;
 }
 
-struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
-                                            struct btree_insert_entry entry)
-{
-       struct btree_insert_entry *i;
-
-       BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
-
-       for (i = trans->updates;
-            i < trans->updates + trans->nr_updates;
-            i++)
-               if (btree_trans_cmp(entry, *i) < 0)
-                       break;
-
-       memmove(&i[1], &i[0],
-               (void *) &trans->updates[trans->nr_updates] - (void *) i);
-       trans->nr_updates++;
-       *i = entry;
-       return i;
-}
-
 /**
  * bch2_btree_insert - insert keys into the extent btree
  * @c:                 pointer to struct bch_fs
index 1516df224d7de0e3f749246cc36259f869bc3fff..6a4773a92029f22ebee3f15782c210f036eecf95 100644 (file)
@@ -1265,11 +1265,10 @@ int bch2_mark_update(struct btree_trans *trans,
        if (!btree_node_type_needs_gc(iter->btree_id))
                return 0;
 
-       if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
-               bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
-                       0, insert->k->k.size,
-                       fs_usage, trans->journal_res.seq,
-                       BCH_BUCKET_MARK_INSERT|flags);
+       bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
+               0, insert->k->k.size,
+               fs_usage, trans->journal_res.seq,
+               BCH_BUCKET_MARK_INSERT|flags);
 
        if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
                return 0;
@@ -1359,11 +1358,8 @@ static int trans_get_key(struct btree_trans *trans,
        struct btree_insert_entry *i;
        int ret;
 
-       for (i = trans->updates;
-            i < trans->updates + trans->nr_updates;
-            i++)
-               if (!i->deferred &&
-                   i->iter->btree_id == btree_id &&
+       trans_for_each_update_iter(trans, i)
+               if (i->iter->btree_id == btree_id &&
                    (btree_node_type_is_extents(btree_id)
                     ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
                       bkey_cmp(pos, i->k->k.p) < 0
@@ -1391,8 +1387,8 @@ static void *trans_update_key(struct btree_trans *trans,
                              struct btree_iter *iter,
                              unsigned u64s)
 {
+       struct btree_insert_entry *i;
        struct bkey_i *new_k;
-       unsigned i;
 
        new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
        if (IS_ERR(new_k))
@@ -1401,19 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans,
        bkey_init(&new_k->k);
        new_k->k.p = iter->pos;
 
-       for (i = 0; i < trans->nr_updates; i++)
-               if (!trans->updates[i].deferred &&
-                   trans->updates[i].iter == iter) {
-                       trans->updates[i].k = new_k;
+       trans_for_each_update_iter(trans, i)
+               if (i->iter == iter) {
+                       i->k = new_k;
                        return new_k;
                }
 
-       bch2_trans_update(trans, ((struct btree_insert_entry) {
-               .iter = iter,
-               .k = new_k,
-               .triggered = true,
-       }));
-
+       bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k));
        return new_k;
 }
 
@@ -1496,6 +1486,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
        bch2_fs_inconsistent_on(overflow, c,
                "bucket sector count overflow: %u + %lli > U16_MAX",
                old, sectors);
+       BUG_ON(overflow);
 
        a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
        ret = PTR_ERR_OR_ZERO(a);
index e55aa98cf9ee48fe3d7d5bda4786ad20dc9bb072..a5c947e8adf34a0a2e8f1ce9777675679a136d6e 100644 (file)
@@ -127,7 +127,6 @@ static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
        do_encrypt(c->chacha20, nonce, key, sizeof(key));
 
        desc->tfm = c->poly1305;
-       desc->flags = 0;
        crypto_shash_init(desc);
        crypto_shash_update(desc, key, sizeof(key));
 }
index 0742d2c12047e3783f8107c5dc4463a84766b695..be2eca0fcdf7f67a4c8a210beb3977f329e21326 100644 (file)
@@ -1173,12 +1173,8 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
                struct ec_stripe_new *s = NULL;
 
                mutex_lock(&h->lock);
-               bch2_open_buckets_stop_dev(c, ca,
-                                          &h->blocks,
-                                          BCH_DATA_USER);
-               bch2_open_buckets_stop_dev(c, ca,
-                                          &h->parity,
-                                          BCH_DATA_USER);
+               bch2_open_buckets_stop_dev(c, ca, &h->blocks);
+               bch2_open_buckets_stop_dev(c, ca, &h->parity);
 
                if (!h->s)
                        goto unlock;
index 1aaff44e18cf0047f8a677852990c165b003a7d4..304ff92500be917022bad5e70e6e4c2a1458d8d4 100644 (file)
@@ -4,6 +4,8 @@
 #include "io.h"
 #include "super.h"
 
+#define FSCK_ERR_RATELIMIT_NR  10
+
 bool bch2_inconsistent_error(struct bch_fs *c)
 {
        set_bit(BCH_FS_ERROR, &c->flags);
@@ -97,8 +99,8 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
 found:
        list_move(&s->list, &c->fsck_errors);
        s->nr++;
-       suppressing     = s->nr == 10;
-       print           = s->nr <= 10;
+       suppressing     = s->nr == FSCK_ERR_RATELIMIT_NR;
+       print           = s->nr <= FSCK_ERR_RATELIMIT_NR;
        buf             = s->buf;
 print:
        va_start(args, fmt);
@@ -152,10 +154,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
        struct fsck_err_state *s, *n;
 
        mutex_lock(&c->fsck_error_lock);
-       set_bit(BCH_FS_FSCK_DONE, &c->flags);
 
        list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
-               if (s->nr > 10)
+               if (s->nr > FSCK_ERR_RATELIMIT_NR)
                        bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->buf);
 
                list_del(&s->list);
index ecebd7915f79d50c7fff9dfeb7adc9420832f2e9..e10ea43b71a348aa2a4aacfb11d50e06556e81b0 100644 (file)
@@ -672,8 +672,7 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
        return bch2_bkey_ptrs_invalid(c, k);
 }
 
-void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
-                              struct bkey_s_c k)
+void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr;
@@ -877,13 +876,6 @@ static void verify_extent_nonoverlapping(struct bch_fs *c,
 #endif
 }
 
-static void verify_modified_extent(struct btree_iter *iter,
-                                  struct bkey_packed *k)
-{
-       bch2_btree_iter_verify(iter, iter->l[0].b);
-       bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s);
-}
-
 static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
                               struct bkey_i *insert)
 {
@@ -896,6 +888,9 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
        EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
        verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
 
+       if (debug_check_bkeys(c))
+               bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert));
+
        node_iter = l->iter;
        k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
        if (k && !bkey_written(l->b, k) &&
@@ -922,7 +917,6 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
 
        bch2_bset_insert(l->b, &l->iter, k, insert, 0);
        bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
-       bch2_btree_iter_verify(iter, l->b);
 }
 
 static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
@@ -942,12 +936,13 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
        return ret;
 }
 
-static int __bch2_extent_atomic_end(struct btree_trans *trans,
-                                   struct bkey_s_c k,
-                                   unsigned offset,
-                                   struct bpos *end,
-                                   unsigned *nr_iters,
-                                   unsigned max_iters)
+static int count_iters_for_insert(struct btree_trans *trans,
+                                 struct bkey_s_c k,
+                                 unsigned offset,
+                                 struct bpos *end,
+                                 unsigned *nr_iters,
+                                 unsigned max_iters,
+                                 bool overwrite)
 {
        int ret = 0;
 
@@ -977,6 +972,20 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
                                break;
 
                        *nr_iters += 1;
+
+                       if (overwrite &&
+                           k.k->type == KEY_TYPE_reflink_v) {
+                               struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+                               if (le64_to_cpu(r.v->refcount) == 1)
+                                       *nr_iters += bch2_bkey_nr_alloc_ptrs(k);
+                       }
+
+                       /*
+                        * if we're going to be deleting an entry from
+                        * the reflink btree, need more iters...
+                        */
+
                        if (*nr_iters >= max_iters) {
                                struct bpos pos = bkey_start_pos(k.k);
                                pos.offset += r_k.k->p.offset - idx;
@@ -994,11 +1003,11 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
        return ret;
 }
 
-int bch2_extent_atomic_end(struct btree_trans *trans,
-                          struct btree_iter *iter,
+int bch2_extent_atomic_end(struct btree_iter *iter,
                           struct bkey_i *insert,
                           struct bpos *end)
 {
+       struct btree_trans *trans = iter->trans;
        struct btree *b = iter->l[0].b;
        struct btree_node_iter  node_iter = iter->l[0].iter;
        struct bkey_packed      *_k;
@@ -1011,8 +1020,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
 
        *end = bpos_min(insert->k.p, b->key.k.p);
 
-       ret = __bch2_extent_atomic_end(trans, bkey_i_to_s_c(insert),
-                                      0, end, &nr_iters, 10);
+       ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert),
+                                    0, end, &nr_iters, 10, false);
        if (ret)
                return ret;
 
@@ -1031,8 +1040,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
                        offset = bkey_start_offset(&insert->k) -
                                bkey_start_offset(k.k);
 
-               ret = __bch2_extent_atomic_end(trans, k, offset,
-                                              end, &nr_iters, 20);
+               ret = count_iters_for_insert(trans, k, offset,
+                                            end, &nr_iters, 20, true);
                if (ret)
                        return ret;
 
@@ -1050,7 +1059,7 @@ int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
        struct bpos end;
        int ret;
 
-       ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
+       ret = bch2_extent_atomic_end(iter, k, &end);
        if (ret)
                return ret;
 
@@ -1063,7 +1072,7 @@ int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
        struct bpos end;
        int ret;
 
-       ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
+       ret = bch2_extent_atomic_end(iter, k, &end);
        if (ret)
                return ret;
 
@@ -1137,15 +1146,16 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
        case BCH_EXTENT_OVERLAP_FRONT:
                /* insert overlaps with start of k: */
                __bch2_cut_front(insert->k.p, k);
-               BUG_ON(bkey_deleted(k.k));
+               EBUG_ON(bkey_deleted(k.k));
                extent_save(l->b, _k, k.k);
-               verify_modified_extent(iter, _k);
+               bch2_btree_node_iter_fix(iter, l->b, &l->iter,
+                                        _k, _k->u64s, _k->u64s);
                break;
 
        case BCH_EXTENT_OVERLAP_BACK:
                /* insert overlaps with end of k: */
                bch2_cut_back(bkey_start_pos(&insert->k), k.k);
-               BUG_ON(bkey_deleted(k.k));
+               EBUG_ON(bkey_deleted(k.k));
                extent_save(l->b, _k, k.k);
 
                /*
@@ -1156,7 +1166,6 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
                bch2_bset_fix_invalidated_key(l->b, _k);
                bch2_btree_node_iter_fix(iter, l->b, &l->iter,
                                         _k, _k->u64s, _k->u64s);
-               verify_modified_extent(iter, _k);
                break;
 
        case BCH_EXTENT_OVERLAP_ALL: {
@@ -1173,12 +1182,10 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
                        bch2_bset_delete(l->b, _k, _k->u64s);
                        bch2_btree_node_iter_fix(iter, l->b, &l->iter,
                                                 _k, u64s, 0);
-                       bch2_btree_iter_verify(iter, l->b);
                } else {
                        extent_save(l->b, _k, k.k);
                        bch2_btree_node_iter_fix(iter, l->b, &l->iter,
                                                 _k, _k->u64s, _k->u64s);
-                       verify_modified_extent(iter, _k);
                }
 
                break;
@@ -1208,7 +1215,8 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
                __bch2_cut_front(insert->k.p, k);
                BUG_ON(bkey_deleted(k.k));
                extent_save(l->b, _k, k.k);
-               verify_modified_extent(iter, _k);
+               bch2_btree_node_iter_fix(iter, l->b, &l->iter,
+                                        _k, _k->u64s, _k->u64s);
 
                extent_bset_insert(c, iter, &split.k);
                break;
@@ -1265,6 +1273,8 @@ static void __bch2_insert_fixup_extent(struct bch_fs *c,
                                btree_account_key_drop(l->b, _k);
                                _k->type = KEY_TYPE_discard;
                                reserve_whiteout(l->b, _k);
+                               bch2_btree_node_iter_fix(iter, l->b, &l->iter,
+                                                       _k, _k->u64s, _k->u64s);
                        }
                        break;
                }
@@ -1359,10 +1369,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
                if (s.deleting)
                        tmp.k.k.type = KEY_TYPE_discard;
 
-               if (debug_check_bkeys(c))
-                       bch2_bkey_debugcheck(c, iter->l[0].b,
-                                            bkey_i_to_s_c(&tmp.k));
-
                EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
 
                extent_bset_insert(c, iter, &tmp.k);
@@ -1387,8 +1393,7 @@ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
        return bch2_bkey_ptrs_invalid(c, k);
 }
 
-void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
-                           struct bkey_s_c k)
+void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
 {
        struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
        const union bch_extent_entry *entry;
@@ -1762,6 +1767,12 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
        if (ret == BCH_MERGE_NOMERGE)
                return false;
 
+       if (debug_check_bkeys(c))
+               bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&li.k));
+       if (debug_check_bkeys(c) &&
+           ret == BCH_MERGE_PARTIAL)
+               bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&ri.k));
+
        /*
         * check if we overlap with deleted extents - would break the sort
         * order:
@@ -1798,7 +1809,6 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
        bch2_bset_fix_invalidated_key(b, m);
        bch2_btree_node_iter_fix(iter, b, node_iter,
                                 m, m->u64s, m->u64s);
-       verify_modified_extent(iter, m);
 
        return ret == BCH_MERGE_MERGE;
 }
index 189ae4c71eb21450c038b42468b910ff49dc5555..613d76af69d956c47045ddfcf6fdc098db46e64f 100644 (file)
@@ -389,8 +389,7 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
 /* bch_btree_ptr: */
 
 const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
-void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
-                              struct bkey_s_c);
+void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
 void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
 void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
@@ -405,7 +404,7 @@ void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
 /* bch_extent: */
 
 const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
-void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
+void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
 void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
 enum merge_result bch2_extent_merge(struct bch_fs *,
@@ -433,8 +432,8 @@ enum merge_result bch2_reservation_merge(struct bch_fs *,
        .key_merge      = bch2_reservation_merge,               \
 }
 
-int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
-                          struct bkey_i *, struct bpos *);
+int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
+                          struct bpos *);
 int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
 int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
 
@@ -455,12 +454,11 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c);
 bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
                           struct bch_extent_ptr, u64);
 
-static inline bool bkey_extent_is_data(const struct bkey *k)
+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
 {
        switch (k->type) {
        case KEY_TYPE_btree_ptr:
        case KEY_TYPE_extent:
-       case KEY_TYPE_reflink_p:
        case KEY_TYPE_reflink_v:
                return true;
        default:
@@ -468,6 +466,12 @@ static inline bool bkey_extent_is_data(const struct bkey *k)
        }
 }
 
+static inline bool bkey_extent_is_data(const struct bkey *k)
+{
+       return bkey_extent_is_direct_data(k) ||
+               k->type == KEY_TYPE_reflink_p;
+}
+
 /*
  * Should extent be counted under inode->i_sectors?
  */
index d635ebb5c14342a5b7d971d34806792229f7a54a..aff703244a42759603768ffe9617145883a2dd99 100644 (file)
@@ -749,6 +749,9 @@ static void bch2_set_page_dirty(struct bch_fs *c,
        struct bch_page_state *s = bch2_page_state(page);
        unsigned i, dirty_sectors = 0;
 
+       WARN_ON(page_offset(page) + offset + len >
+               round_up(i_size_read(&inode->v), block_bytes(c)));
+
        for (i = round_down(offset, block_bytes(c)) >> 9;
             i < round_up(offset + len, block_bytes(c)) >> 9;
             i++) {
@@ -780,6 +783,8 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
        struct address_space *mapping = inode->v.i_mapping;
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch2_page_reservation res;
+       unsigned len;
+       loff_t isize;
        int ret = VM_FAULT_LOCKED;
 
        bch2_page_reservation_init(c, inode, &res);
@@ -797,21 +802,27 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
                pagecache_add_get(&mapping->add_lock);
 
        lock_page(page);
-       if (page->mapping != mapping ||
-           page_offset(page) > i_size_read(&inode->v)) {
+       isize = i_size_read(&inode->v);
+
+       if (page->mapping != mapping || page_offset(page) >= isize) {
                unlock_page(page);
                ret = VM_FAULT_NOPAGE;
                goto out;
        }
 
-       if (bch2_page_reservation_get(c, inode, page, &res,
-                                     0, PAGE_SIZE, true)) {
+       /* page is wholly or partially inside EOF */
+       if (((page->index + 1) << PAGE_SHIFT) <= isize)
+               len = PAGE_SIZE;
+       else
+               len = offset_in_page(isize);
+
+       if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
                unlock_page(page);
                ret = VM_FAULT_SIGBUS;
                goto out;
        }
 
-       bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE);
+       bch2_set_page_dirty(c, inode, page, &res, 0, len);
        wait_for_stable_page(page);
 out:
        if (current->pagecache_lock != &mapping->add_lock)
@@ -884,9 +895,8 @@ static void bch2_readpages_end_io(struct bio *bio)
 {
        struct bvec_iter_all iter;
        struct bio_vec *bv;
-       int i;
 
-       bio_for_each_segment_all(bv, bio, i, iter) {
+       bio_for_each_segment_all(bv, bio, iter) {
                struct page *page = bv->bv_page;
 
                if (!bio->bi_status) {
@@ -1287,10 +1297,10 @@ static void bch2_writepage_io_done(struct closure *cl)
        struct bio *bio = &io->op.op.wbio.bio;
        struct bvec_iter_all iter;
        struct bio_vec *bvec;
-       unsigned i, j;
+       unsigned i;
 
        if (io->op.op.error) {
-               bio_for_each_segment_all(bvec, bio, i, iter) {
+               bio_for_each_segment_all(bvec, bio, iter) {
                        struct bch_page_state *s;
 
                        SetPageError(bvec->bv_page);
@@ -1298,8 +1308,8 @@ static void bch2_writepage_io_done(struct closure *cl)
 
                        lock_page(bvec->bv_page);
                        s = bch2_page_state(bvec->bv_page);
-                       for (j = 0; j < PAGE_SECTORS; j++)
-                               s->s[j].nr_replicas = 0;
+                       for (i = 0; i < PAGE_SECTORS; i++)
+                               s->s[i].nr_replicas = 0;
                        unlock_page(bvec->bv_page);
                }
        }
@@ -1325,7 +1335,7 @@ static void bch2_writepage_io_done(struct closure *cl)
                i_sectors_acct(c, io->op.inode, NULL,
                               io->op.sectors_added - (s64) io->new_sectors);
 
-       bio_for_each_segment_all(bvec, bio, i, iter) {
+       bio_for_each_segment_all(bvec, bio, iter) {
                struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
 
                if (atomic_dec_and_test(&s->write_count))
@@ -1490,6 +1500,10 @@ do_io:
                BUG_ON(!bio_add_page(&w->io->op.op.wbio.bio, page,
                                     sectors << 9, offset << 9));
 
+               /* Check for writing past i_size: */
+               WARN_ON((bio_end_sector(&w->io->op.op.wbio.bio) << 9) >
+                       round_up(i_size, block_bytes(c)));
+
                w->io->op.op.res.sectors += reserved_sectors;
                w->io->op.new_i_size = i_size;
 
@@ -1994,16 +2008,17 @@ static void bch2_dio_write_loop_async(struct closure *);
 static long bch2_dio_write_loop(struct dio_write *dio)
 {
        bool kthread = (current->flags & PF_KTHREAD) != 0;
+       struct bch_fs *c = dio->iop.op.c;
        struct kiocb *req = dio->req;
        struct address_space *mapping = req->ki_filp->f_mapping;
        struct bch_inode_info *inode = dio->iop.inode;
        struct bio *bio = &dio->iop.op.wbio.bio;
        struct bvec_iter_all iter;
        struct bio_vec *bv;
+       unsigned unaligned;
        loff_t offset;
        bool sync;
        long ret;
-       int i;
 
        if (dio->loop)
                goto loop;
@@ -2036,6 +2051,21 @@ static long bch2_dio_write_loop(struct dio_write *dio)
                if (unlikely(ret < 0))
                        goto err;
 
+               unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
+               bio->bi_iter.bi_size -= unaligned;
+               iov_iter_revert(&dio->iter, unaligned);
+
+               if (!bio->bi_iter.bi_size) {
+                       /*
+                        * bio_iov_iter_get_pages was only able to get <
+                        * blocksize worth of pages:
+                        */
+                       bio_for_each_segment_all(bv, bio, iter)
+                               put_page(bv->bv_page);
+                       ret = -EFAULT;
+                       goto err;
+               }
+
                /* gup might have faulted pages back in: */
                ret = write_invalidate_inode_pages_range(mapping,
                                offset,
@@ -2076,7 +2106,7 @@ err_wait_io:
 
                closure_sync(&dio->cl);
 loop:
-               bio_for_each_segment_all(bv, bio, i, iter)
+               bio_for_each_segment_all(bv, bio, iter)
                        put_page(bv->bv_page);
                if (!dio->iter.count || dio->iop.op.error)
                        break;
@@ -2086,8 +2116,8 @@ loop:
        ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9);
 err:
        __pagecache_block_put(&mapping->add_lock);
-       bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res);
-       bch2_quota_reservation_put(dio->iop.op.c, inode, &dio->quota_res);
+       bch2_disk_reservation_put(c, &dio->iop.op.res);
+       bch2_quota_reservation_put(c, inode, &dio->quota_res);
 
        if (dio->free_iov)
                kfree(dio->iter.iov);
@@ -2530,6 +2560,16 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
        if (unlikely(ret))
                goto err;
 
+       /*
+        * When extending, we're going to write the new i_size to disk
+        * immediately so we need to flush anything above the current on disk
+        * i_size first:
+        *
+        * Also, when extending we need to flush the page that i_size currently
+        * straddles - if it's mapped to userspace, we need to ensure that
+        * userspace has to redirty it and call .mkwrite -> set_page_dirty
+        * again to allocate the part of the page that was extended.
+        */
        if (iattr->ia_size > inode->ei_inode.bi_size)
                ret = filemap_write_and_wait_range(mapping,
                                inode->ei_inode.bi_size,
@@ -2608,16 +2648,16 @@ err:
        return ret;
 }
 
-static long bch2_fcollapse(struct bch_inode_info *inode,
-                          loff_t offset, loff_t len)
+static long bch2_fcollapse_finsert(struct bch_inode_info *inode,
+                                  loff_t offset, loff_t len,
+                                  bool insert)
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct address_space *mapping = inode->v.i_mapping;
        struct btree_trans trans;
-       struct btree_iter *src, *dst;
-       BKEY_PADDED(k) copy;
-       struct bkey_s_c k;
-       loff_t new_size;
+       struct btree_iter *src, *dst, *del = NULL;
+       loff_t shift, new_size;
+       u64 src_start;
        int ret;
 
        if ((offset | len) & (block_bytes(c) - 1))
@@ -2635,92 +2675,188 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
        inode_dio_wait(&inode->v);
        pagecache_block_get(&mapping->add_lock);
 
-       ret = -EINVAL;
-       if (offset + len >= inode->v.i_size)
-               goto err;
+       if (insert) {
+               ret = -EFBIG;
+               if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
+                       goto err;
 
-       if (inode->v.i_size < len)
-               goto err;
+               ret = -EINVAL;
+               if (offset >= inode->v.i_size)
+                       goto err;
+
+               src_start       = U64_MAX;
+               shift           = len;
+       } else {
+               ret = -EINVAL;
+               if (offset + len >= inode->v.i_size)
+                       goto err;
 
-       new_size = inode->v.i_size - len;
+               src_start       = offset + len;
+               shift           = -len;
+       }
+
+       new_size = inode->v.i_size + shift;
 
        ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
        if (ret)
                goto err;
 
-       dst = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-                       POS(inode->v.i_ino, offset >> 9),
-                       BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-       BUG_ON(IS_ERR_OR_NULL(dst));
+       if (insert) {
+               i_size_write(&inode->v, new_size);
+               mutex_lock(&inode->ei_update_lock);
+               ret = bch2_write_inode_size(c, inode, new_size,
+                                           ATTR_MTIME|ATTR_CTIME);
+               mutex_unlock(&inode->ei_update_lock);
+       } else {
+               ret = __bch2_fpunch(c, inode, offset >> 9,
+                                   (offset + len) >> 9);
+               if (ret)
+                       goto err;
+       }
 
        src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-                       POS_MIN, BTREE_ITER_SLOTS);
+                       POS(inode->v.i_ino, src_start >> 9),
+                       BTREE_ITER_INTENT);
        BUG_ON(IS_ERR_OR_NULL(src));
 
-       while (bkey_cmp(dst->pos,
-                       POS(inode->v.i_ino,
-                           round_up(new_size, block_bytes(c)) >> 9)) < 0) {
-               struct disk_reservation disk_res;
+       dst = bch2_trans_copy_iter(&trans, src);
+       BUG_ON(IS_ERR_OR_NULL(dst));
 
-               ret = bch2_btree_iter_traverse(dst);
-               if (ret)
+       while (1) {
+               struct disk_reservation disk_res =
+                       bch2_disk_reservation_init(c, 0);
+               BKEY_PADDED(k) copy;
+               struct bkey_i delete;
+               struct bkey_s_c k;
+               struct bpos next_pos;
+               struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
+               struct bpos atomic_end;
+               unsigned commit_flags = BTREE_INSERT_NOFAIL|
+                       BTREE_INSERT_ATOMIC|
+                       BTREE_INSERT_USE_RESERVE;
+
+               k = insert
+                       ? bch2_btree_iter_peek_prev(src)
+                       : bch2_btree_iter_peek(src);
+               if ((ret = bkey_err(k)))
                        goto bkey_err;
 
-               bch2_btree_iter_set_pos(src,
-                       POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
+               if (!k.k || k.k->p.inode != inode->v.i_ino)
+                       break;
 
-               k = bch2_btree_iter_peek_slot(src);
-               if ((ret = bkey_err(k)))
-                       goto bkey_err;
+               BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
 
+               if (insert &&
+                   bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
+                       break;
+reassemble:
                bkey_reassemble(&copy.k, k);
 
-               bch2_cut_front(src->pos, &copy.k);
-               copy.k.k.p.offset -= len >> 9;
+               if (insert &&
+                   bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {
+                       bch2_cut_front(move_pos, &copy.k);
+                       bch2_btree_iter_set_pos(src, bkey_start_pos(&copy.k.k));
+               }
+
+               copy.k.k.p.offset += shift >> 9;
+               bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k.k));
+
+               ret = bch2_btree_iter_traverse(dst);
+               if (ret)
+                       goto bkey_err;
 
-               ret = bch2_extent_trim_atomic(&copy.k, dst);
+               ret = bch2_extent_atomic_end(dst, &copy.k, &atomic_end);
                if (ret)
                        goto bkey_err;
 
-               BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(&copy.k.k)));
+               if (bkey_cmp(atomic_end, copy.k.k.p)) {
+                       if (insert) {
+                               move_pos = atomic_end;
+                               move_pos.offset -= shift >> 9;
+                               goto reassemble;
+                       } else {
+                               bch2_cut_back(atomic_end, &copy.k.k);
+                       }
+               }
 
-               ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size,
-                               bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
-                               BCH_DISK_RESERVATION_NOFAIL);
-               BUG_ON(ret);
+               bkey_init(&delete.k);
+               delete.k.p = src->pos;
+               bch2_key_resize(&delete.k, copy.k.k.size);
 
-               bch2_trans_begin_updates(&trans);
+               next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
 
-               ret = bch2_extent_update(&trans, inode,
-                               &disk_res, NULL,
-                               dst, &copy.k,
-                               0, true, true, NULL);
+               /*
+                * If the new and old keys overlap (because we're moving an
+                * extent that's bigger than the amount we're collapsing by),
+                * we need to trim the delete key here so they don't overlap
+                * because overlaps on insertions aren't handled before
+                * triggers are run, so the overwrite will get double counted
+                * by the triggers machinery:
+                */
+               if (insert &&
+                   bkey_cmp(bkey_start_pos(&copy.k.k), delete.k.p) < 0) {
+                       bch2_cut_back(bkey_start_pos(&copy.k.k), &delete.k);
+               } else if (!insert &&
+                          bkey_cmp(copy.k.k.p,
+                                   bkey_start_pos(&delete.k)) > 0) {
+                       bch2_cut_front(copy.k.k.p, &delete);
+
+                       del = bch2_trans_copy_iter(&trans, src);
+                       BUG_ON(IS_ERR_OR_NULL(del));
+
+                       bch2_btree_iter_set_pos(del,
+                               bkey_start_pos(&delete.k));
+               }
+
+               bch2_trans_update(&trans, BTREE_INSERT_ENTRY(dst, &copy.k));
+               bch2_trans_update(&trans,
+                                 BTREE_INSERT_ENTRY(del ?: src, &delete));
+
+               if (copy.k.k.size == k.k->size) {
+                       /*
+                        * If we're moving the entire extent, we can skip
+                        * running triggers:
+                        */
+                       commit_flags |= BTREE_INSERT_NOMARK;
+               } else {
+                       /* We might end up splitting compressed extents: */
+                       unsigned nr_ptrs =
+                               bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k));
+
+                       ret = bch2_disk_reservation_get(c, &disk_res,
+                                       copy.k.k.size, nr_ptrs,
+                                       BCH_DISK_RESERVATION_NOFAIL);
+                       BUG_ON(ret);
+               }
+
+               ret = bch2_trans_commit(&trans, &disk_res,
+                                       &inode->ei_journal_seq,
+                                       commit_flags);
                bch2_disk_reservation_put(c, &disk_res);
 bkey_err:
+               if (del)
+                       bch2_trans_iter_free(&trans, del);
+               del = NULL;
+
+               if (!ret)
+                       bch2_btree_iter_set_pos(src, next_pos);
+
                if (ret == -EINTR)
                        ret = 0;
                if (ret)
                        goto err;
-               /*
-                * XXX: if we error here we've left data with multiple
-                * pointers... which isn't a _super_ serious problem...
-                */
 
                bch2_trans_cond_resched(&trans);
        }
        bch2_trans_unlock(&trans);
 
-       ret = __bch2_fpunch(c, inode,
-                       round_up(new_size, block_bytes(c)) >> 9,
-                       U64_MAX);
-       if (ret)
-               goto err;
-
-       i_size_write(&inode->v, new_size);
-       mutex_lock(&inode->ei_update_lock);
-       ret = bch2_write_inode_size(c, inode, new_size,
-                                   ATTR_MTIME|ATTR_CTIME);
-       mutex_unlock(&inode->ei_update_lock);
+       if (!insert) {
+               i_size_write(&inode->v, new_size);
+               mutex_lock(&inode->ei_update_lock);
+               ret = bch2_write_inode_size(c, inode, new_size,
+                                           ATTR_MTIME|ATTR_CTIME);
+               mutex_unlock(&inode->ei_update_lock);
+       }
 err:
        bch2_trans_exit(&trans);
        pagecache_block_put(&mapping->add_lock);
@@ -2889,8 +3025,11 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
        if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
                return bch2_fpunch(inode, offset, len);
 
+       if (mode == FALLOC_FL_INSERT_RANGE)
+               return bch2_fcollapse_finsert(inode, offset, len, true);
+
        if (mode == FALLOC_FL_COLLAPSE_RANGE)
-               return bch2_fcollapse(inode, offset, len);
+               return bch2_fcollapse_finsert(inode, offset, len, false);
 
        return -EOPNOTSUPP;
 }
index e3738757b6a0cb9f81e87384f13b5a71c46cfdd1..50a7d8c1fabafc46de1d1a56d4a7ff40557d649a 100644 (file)
@@ -509,7 +509,7 @@ retry:
                if (fsck_err_on(w.have_inode &&
                        !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
                        k.k->type != KEY_TYPE_reservation &&
-                       k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
+                       k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
                        "extent type %u offset %llu past end of inode %llu, i_size %llu",
                        k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
                        bch2_trans_unlock(&trans);
index e2ec5bea93362befe01d58877e981897835a82fb..ab8c25602448be580511db466303b5b7e8d87731 100644 (file)
@@ -124,9 +124,8 @@ void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
 {
        struct bvec_iter_all iter;
        struct bio_vec *bv;
-       unsigned i;
 
-       bio_for_each_segment_all(bv, bio, i, iter)
+       bio_for_each_segment_all(bv, bio, iter)
                if (bv->bv_page != ZERO_PAGE(0))
                        mempool_free(bv->bv_page, &c->bio_bounce_pages);
        bio->bi_vcnt = 0;
@@ -1210,10 +1209,15 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
        return rbio;
 }
 
+/*
+ * Only called on a top level bch_read_bio to complete an entire read request,
+ * not a split:
+ */
 static void bch2_rbio_done(struct bch_read_bio *rbio)
 {
-       bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
-                              rbio->start_time);
+       if (rbio->start_time)
+               bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
+                                      rbio->start_time);
        bio_endio(&rbio->bio);
 }
 
index 9595ba7910d81ded7dfb3b6c778818d1ff1e8712..26a2c4fb1845c5c919387d154691fd431fd2a4a2 100644 (file)
@@ -304,11 +304,10 @@ static void move_free(struct closure *cl)
        struct moving_context *ctxt = io->write.ctxt;
        struct bvec_iter_all iter;
        struct bio_vec *bv;
-       int i;
 
        bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
 
-       bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i, iter)
+       bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
                if (bv->bv_page)
                        __free_page(bv->bv_page);
 
@@ -438,7 +437,8 @@ static int bch2_move_extent(struct bch_fs *c,
                                 GFP_KERNEL))
                goto err_free;
 
-       io->rbio.opts = io_opts;
+       io->rbio.c              = c;
+       io->rbio.opts           = io_opts;
        bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
        io->rbio.bio.bi_vcnt = pages;
        bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
@@ -548,7 +548,7 @@ peek:
                if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
                        break;
 
-               if (!bkey_extent_is_data(k.k))
+               if (!bkey_extent_is_direct_data(k.k))
                        goto next_nondata;
 
                if (cur_inum != k.k->p.inode) {
index 4797d620fe7723fb8c9591fc4f9b8ee7e4f9642f..84b3fb6eb101e591c5d2a07f4eea5fa8a801c2e0 100644 (file)
@@ -42,9 +42,6 @@ void bch2_rebalance_add_key(struct bch_fs *c,
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
 
-       if (!bkey_extent_is_data(k.k))
-               return;
-
        if (!io_opts->background_target &&
            !io_opts->background_compression)
                return;
@@ -72,30 +69,26 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
                                    struct bch_io_opts *io_opts,
                                    struct data_opts *data_opts)
 {
-       switch (k.k->type) {
-       case KEY_TYPE_extent: {
-               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-               const union bch_extent_entry *entry;
-               struct extent_ptr_decoded p;
-
-               /* Make sure we have room to add a new pointer: */
-               if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
-                   BKEY_EXTENT_VAL_U64s_MAX)
-                       return DATA_SKIP;
-
-               extent_for_each_ptr_decode(e, p, entry)
-                       if (rebalance_ptr_pred(c, p, io_opts))
-                               goto found;
-
-               return DATA_SKIP;
-found:
-               data_opts->target               = io_opts->background_target;
-               data_opts->btree_insert_flags   = 0;
-               return DATA_ADD_REPLICAS;
-       }
-       default:
-               return DATA_SKIP;
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       unsigned nr_replicas = 0;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               nr_replicas += !p.ptr.cached;
+
+               if (rebalance_ptr_pred(c, p, io_opts))
+                       goto found;
        }
+
+       if (nr_replicas < io_opts->data_replicas)
+               goto found;
+
+       return DATA_SKIP;
+found:
+       data_opts->target               = io_opts->background_target;
+       data_opts->btree_insert_flags   = 0;
+       return DATA_ADD_REPLICAS;
 }
 
 struct rebalance_work {
index c9558ccb9a262001f3c0118c7fecdccbf7c5d619..98d9a1432e507af654b930a6007f206e192403d3 100644 (file)
@@ -281,8 +281,7 @@ retry:
                if (ret)
                        goto err;
 
-               ret = bch2_extent_atomic_end(&trans, split_iter,
-                                            k, &atomic_end);
+               ret = bch2_extent_atomic_end(split_iter, k, &atomic_end);
                if (ret)
                        goto err;
 
@@ -936,7 +935,9 @@ out:
        ret = 0;
 err:
 fsck_err:
+       set_bit(BCH_FS_FSCK_DONE, &c->flags);
        bch2_flush_fsck_errs(c);
+
        journal_keys_free(&journal_keys);
        journal_entries_free(&journal_entries);
        kfree(clean);
index d06027256e0bc998834390659ec7e24698f6665a..bb9da2bb5a92cfeacb63cb4c320c7c8426253489 100644 (file)
@@ -16,11 +16,16 @@ static inline int u8_cmp(u8 l, u8 r)
        return cmp_int(l, r);
 }
 
-static void verify_replicas_entry_sorted(struct bch_replicas_entry *e)
+static void verify_replicas_entry(struct bch_replicas_entry *e)
 {
-#ifdef CONFIG_BCACHES_DEBUG
+#ifdef CONFIG_BCACHEFS_DEBUG
        unsigned i;
 
+       BUG_ON(e->data_type >= BCH_DATA_NR);
+       BUG_ON(!e->nr_devs);
+       BUG_ON(e->nr_required > 1 &&
+              e->nr_required >= e->nr_devs);
+
        for (i = 0; i + 1 < e->nr_devs; i++)
                BUG_ON(e->devs[i] >= e->devs[i + 1]);
 #endif
@@ -158,7 +163,7 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
        };
 
        BUG_ON(!new_entry->data_type);
-       verify_replicas_entry_sorted(new_entry);
+       verify_replicas_entry(new_entry);
 
        new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
        if (!new.entries)
@@ -185,7 +190,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
        if (unlikely(entry_size > r->entry_size))
                return -1;
 
-       verify_replicas_entry_sorted(search);
+       verify_replicas_entry(search);
 
 #define entry_cmp(_l, _r, size)        memcmp(_l, _r, entry_size)
        idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
@@ -216,7 +221,7 @@ static bool bch2_replicas_marked_locked(struct bch_fs *c,
        if (!search->nr_devs)
                return true;
 
-       verify_replicas_entry_sorted(search);
+       verify_replicas_entry(search);
 
        return __replicas_has_entry(&c->replicas, search) &&
                (!check_gc_replicas ||
@@ -360,6 +365,8 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
        struct bch_replicas_cpu new_r, new_gc;
        int ret = -ENOMEM;
 
+       verify_replicas_entry(new_entry);
+
        memset(&new_r, 0, sizeof(new_r));
        memset(&new_gc, 0, sizeof(new_gc));
 
@@ -875,9 +882,8 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
                        goto err;
 
                err = "invalid replicas entry: bad nr_required";
-               if (!e->nr_required ||
-                   (e->nr_required > 1 &&
-                    e->nr_required >= e->nr_devs))
+               if (e->nr_required > 1 &&
+                   e->nr_required >= e->nr_devs)
                        goto err;
 
                err = "invalid replicas entry: invalid device";
index 091bf7a8957755c092636177ae13bce9a5af6998..ef30c73a1e4b877190f0c457efb947c5b02cab95 100644 (file)
@@ -42,7 +42,6 @@ bch2_hash_info_init(struct bch_fs *c,
                u8 digest[SHA256_DIGEST_SIZE];
 
                desc->tfm = c->sha256;
-               desc->flags = 0;
 
                crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
                                    sizeof(bi->bi_hash_seed), digest);
index bd4b3188be5328be0a23cf2bcdcd2c8391981a71..4145832f48566db522d8b9789c37b9e5ebc4649d 100644 (file)
@@ -494,6 +494,7 @@ static void bch2_fs_free(struct bch_fs *c)
        bch2_fs_ec_exit(c);
        bch2_fs_encryption_exit(c);
        bch2_fs_io_exit(c);
+       bch2_fs_btree_iter_exit(c);
        bch2_fs_btree_cache_exit(c);
        bch2_fs_journal_exit(&c->journal);
        bch2_io_clock_exit(&c->io_clock[WRITE]);
@@ -505,7 +506,6 @@ static void bch2_fs_free(struct bch_fs *c)
        free_percpu(c->usage[0]);
        kfree(c->usage_base);
        free_percpu(c->pcpu);
-       mempool_exit(&c->btree_iters_pool);
        mempool_exit(&c->btree_bounce_pool);
        bioset_exit(&c->btree_bio);
        mempool_exit(&c->btree_interior_update_pool);
@@ -758,15 +758,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
            mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
                                        btree_bytes(c)) ||
-           mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
-                       sizeof(struct btree_iter) * BTREE_ITER_MAX +
-                       sizeof(struct btree_insert_entry) *
-                       (BTREE_ITER_MAX + 4)) ||
            bch2_io_clock_init(&c->io_clock[READ]) ||
            bch2_io_clock_init(&c->io_clock[WRITE]) ||
            bch2_fs_journal_init(&c->journal) ||
            bch2_fs_replicas_init(c) ||
            bch2_fs_btree_cache_init(c) ||
+           bch2_fs_btree_iter_init(c) ||
            bch2_fs_io_init(c) ||
            bch2_fs_encryption_init(c) ||
            bch2_fs_compress_init(c) ||
index d9b860a026a80d68bea50622ba9e8d4efb1bfd40..797204f81fb2b960387baa9ed1b424c8f220d050 100644 (file)
@@ -167,9 +167,8 @@ void bio_free_pages(struct bio *bio)
 {
        struct bvec_iter_all iter;
        struct bio_vec *bvec;
-       int i;
 
-       bio_for_each_segment_all(bvec, bio, i, iter)
+       bio_for_each_segment_all(bvec, bio, iter)
                __free_page(bvec->bv_page);
 }