]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 5241335413 bcachefs: Fix for spinning in journal reclaim...
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 19 Dec 2020 23:05:09 +0000 (18:05 -0500)
committerKent Overstreet <kent.overstreet@gmail.com>
Sat, 19 Dec 2020 23:05:09 +0000 (18:05 -0500)
30 files changed:
.bcachefs_revision
libbcachefs/alloc_background.c
libbcachefs/bcachefs_format.h
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_key_cache.c
libbcachefs/btree_key_cache.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/ec.c
libbcachefs/ec.h
libbcachefs/ec_types.h
libbcachefs/extents.c
libbcachefs/fs-io.c
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_io.c
libbcachefs/journal_reclaim.c
libbcachefs/journal_reclaim.h
libbcachefs/journal_types.h
libbcachefs/recovery.c
libbcachefs/replicas.c
libbcachefs/super-io.c
libbcachefs/super_types.h
libbcachefs/tests.c
libbcachefs/util.h

index 2cf8031141af2169ba5aaab32181347ede1aa78d..6bdc42aaf14a0698d952ed8c525a85c1fdde1a63 100644 (file)
@@ -1 +1 @@
-e1d0fb8c5fbc70df1007ebf5d9ab03018dc05275
+5241335413ef160e309fd41ab909532fec656a3a
index 2dd8a37f29e78e15e9edeaa9e8b2e3327fd03bf4..62ca9b7aaefa10a2af9d1f2177d797f7f36145f0 100644 (file)
@@ -505,8 +505,9 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
                                   BTREE_ITER_CACHED|
                                   BTREE_ITER_CACHED_NOFILL|
                                   BTREE_ITER_INTENT);
-       if (IS_ERR(iter))
-               return PTR_ERR(iter);
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               goto out;
 
        a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
        ret = PTR_ERR_OR_ZERO(a);
index 02a76c3d3acb7839d83d94d44e107e76aa8787d1..9f59c6b3a25e023440e45dfc620df8c875a1bc87 100644 (file)
@@ -1359,6 +1359,8 @@ enum bch_sb_compat {
 
 #define BCH_REPLICAS_MAX               4U
 
+#define BCH_BKEY_PTRS_MAX              16U
+
 enum bch_error_actions {
        BCH_ON_ERROR_CONTINUE           = 0,
        BCH_ON_ERROR_RO                 = 1,
index 9b19432ae7a590f81072121d23a6d90c3a85979f..4dde972d353a3f917bf084df250ded7471562fb3 100644 (file)
@@ -635,21 +635,26 @@ enum btree_validate_ret {
 ({                                                                     \
        __label__ out;                                                  \
        char _buf[300];                                                 \
+       char *buf2 = _buf;                                              \
        struct printbuf out = PBUF(_buf);                               \
                                                                        \
+       buf2 = kmalloc(4096, GFP_ATOMIC);                               \
+       if (buf2)                                                       \
+               out = _PBUF(buf2, 4986);                                \
+                                                                       \
        btree_err_msg(&out, c, b, i, b->written, write);                \
        pr_buf(&out, ": " msg, ##__VA_ARGS__);                          \
                                                                        \
        if (type == BTREE_ERR_FIXABLE &&                                \
            write == READ &&                                            \
            !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {             \
-               mustfix_fsck_err(c, "%s", _buf);                        \
+               mustfix_fsck_err(c, "%s", buf2);                        \
                goto out;                                               \
        }                                                               \
                                                                        \
        switch (write) {                                                \
        case READ:                                                      \
-               bch_err(c, "%s", _buf);                                 \
+               bch_err(c, "%s", buf2);                                 \
                                                                        \
                switch (type) {                                         \
                case BTREE_ERR_FIXABLE:                                 \
@@ -670,7 +675,7 @@ enum btree_validate_ret {
                }                                                       \
                break;                                                  \
        case WRITE:                                                     \
-               bch_err(c, "corrupt metadata before write: %s", _buf);  \
+               bch_err(c, "corrupt metadata before write: %s", buf2);  \
                                                                        \
                if (bch2_fs_inconsistent(c)) {                          \
                        ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
@@ -679,6 +684,8 @@ enum btree_validate_ret {
                break;                                                  \
        }                                                               \
 out:                                                                   \
+       if (buf2 != _buf)                                               \
+               kfree(buf2);                                            \
        true;                                                           \
 })
 
@@ -844,7 +851,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 
                        bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
                        btree_err(BTREE_ERR_FIXABLE, c, b, i,
-                                 "invalid bkey:\n%s\n%s", invalid, buf);
+                                 "invalid bkey: %s\n%s", invalid, buf);
 
                        i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
                        memmove_u64s_down(k, bkey_next(k),
index 7a95fcc0b244f750eefacbf4df032687725d998d..8c35e39ea97fce2a33bc750cc3aa8015ece7c9ea 100644 (file)
@@ -875,9 +875,19 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
                char buf[100];
                struct bkey uk = bkey_unpack_key(b, k);
 
+               bch2_dump_btree_node(iter->trans->c, l->b);
                bch2_bkey_to_text(&PBUF(buf), &uk);
-               panic("parent iter doesn't point to new node:\n%s\n%llu:%llu\n",
-                     buf, b->key.k.p.inode, b->key.k.p.offset);
+               panic("parent iter doesn't point to new node:\n"
+                     "iter pos %s %llu:%llu\n"
+                     "iter key %s\n"
+                     "new node %llu:%llu-%llu:%llu\n",
+                     bch2_btree_ids[iter->btree_id],
+                     iter->pos.inode,
+                     iter->pos.offset,
+                     buf,
+                     b->data->min_key.inode,
+                     b->data->min_key.offset,
+                     b->key.k.p.inode, b->key.k.p.offset);
        }
 
        if (!parent_locked)
@@ -892,6 +902,13 @@ static inline void __btree_iter_init(struct btree_iter *iter,
 
        bch2_btree_node_iter_init(&l->iter, l->b, &pos);
 
+       /*
+        * Iterators to interior nodes should always be pointed at the first non
+        * whiteout:
+        */
+       if (level)
+               bch2_btree_node_iter_peek(&l->iter, l->b);
+
        btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
 }
 
@@ -2007,9 +2024,10 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
 {
 
        struct btree_iter *iter;
+       struct btree_insert_entry *i;
 
        trans_for_each_iter(trans, iter)
-               pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
+               printk(KERN_ERR "iter: btree %s pos %llu:%llu%s%s%s %ps\n",
                       bch2_btree_ids[iter->btree_id],
                       iter->pos.inode,
                       iter->pos.offset,
@@ -2017,6 +2035,14 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
                       (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
                       iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
                       (void *) iter->ip_allocated);
+
+       trans_for_each_update(trans, i) {
+               char buf[300];
+
+               bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k));
+               printk(KERN_ERR "update: btree %s %s\n",
+                      bch2_btree_ids[i->iter->btree_id], buf);
+       }
        panic("trans iter oveflow\n");
 }
 
index ee8c4346aadbfd638e93b42bf40d8cdd07ec97c5..9a7f8d0197eca7c9e2d9fb1d3838e3c3dfbb2d72 100644 (file)
@@ -51,11 +51,17 @@ static inline int btree_iter_err(const struct btree_iter *iter)
 static inline struct btree_iter *
 __trans_next_iter(struct btree_trans *trans, unsigned idx)
 {
-       u64 l = trans->iters_linked >> idx;
+       u64 l;
+
+       if (idx == BTREE_ITER_MAX)
+               return NULL;
+
+       l = trans->iters_linked >> idx;
        if (!l)
                return NULL;
 
        idx += __ffs64(l);
+       EBUG_ON(idx >= BTREE_ITER_MAX);
        EBUG_ON(trans->iters[idx].idx != idx);
        return &trans->iters[idx];
 }
index 244c5dbcd3e9098db5b6a7c0374efe8b19c56d59..1a557b753bc1ed8a03d6fa01cadf0052adb8a050 100644 (file)
@@ -580,6 +580,8 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        list_splice(&bc->dirty, &bc->clean);
 
        list_for_each_entry_safe(ck, n, &bc->clean, list) {
+               cond_resched();
+
                bch2_journal_pin_drop(&c->journal, &ck->journal);
                bch2_journal_preres_put(&c->journal, &ck->res);
 
@@ -593,6 +595,8 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        BUG_ON(bc->nr_keys);
 
        list_for_each_entry_safe(ck, n, &bc->freed, list) {
+               cond_resched();
+
                list_del(&ck->list);
                kmem_cache_free(bch2_key_cache, ck);
        }
index d7d31a0662c366dafb431d8706c916f17098d51a..2f8b5521718aa44613b764d7cb83cc3701b8e81f 100644 (file)
@@ -4,8 +4,8 @@
 static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
 {
        size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty);
-       size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_dirty);
-       size_t max_dirty = 4096 + nr_keys  / 2;
+       size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_keys);
+       size_t max_dirty = 1024 + nr_keys  / 2;
 
        return max_t(ssize_t, 0, nr_dirty - max_dirty);
 }
@@ -13,10 +13,11 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
 static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
 {
        size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty);
-       size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_dirty);
+       size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_keys);
        size_t max_dirty = 4096 + (nr_keys * 3) / 4;
 
-       return nr_dirty > max_dirty;
+       return nr_dirty > max_dirty &&
+               test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
 }
 
 struct bkey_cached *
index 4a169d36653832204a09ac5cd86b1ff48e06d783..8f96756ba648f4718d9e7badefd54c0c0b07a4c4 100644 (file)
@@ -519,14 +519,18 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
        trans->journal_pin = &as->journal;
 
        for_each_keylist_key(&as->new_keys, k) {
-               ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+               ret = bch2_trans_mark_key(trans,
+                                         bkey_s_c_null,
+                                         bkey_i_to_s_c(k),
                                          0, 0, BTREE_TRIGGER_INSERT);
                if (ret)
                        return ret;
        }
 
        for_each_keylist_key(&as->old_keys, k) {
-               ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+               ret = bch2_trans_mark_key(trans,
+                                         bkey_i_to_s_c(k),
+                                         bkey_s_c_null,
                                          0, 0, BTREE_TRIGGER_OVERWRITE);
                if (ret)
                        return ret;
index e7816afe4a08b0aa4573aff152b073f22cd90d1d..64734f9158c391ea9b84487cd7a573ba4188d44a 100644 (file)
@@ -508,6 +508,10 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
 
        /*
         * Can't be holding any read locks when we go to take write locks:
+        * another thread could be holding an intent lock on the same node we
+        * have a read lock on, and it'll block trying to take a write lock
+        * (because we hold a read lock) and it could be blocking us by holding
+        * its own read lock (while we're trying to to take write locks).
         *
         * note - this must be done after bch2_trans_journal_preres_get_cold()
         * or anything else that might call bch2_trans_relock(), since that
@@ -515,9 +519,15 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
         */
        trans_for_each_iter(trans, iter) {
                if (iter->nodes_locked != iter->nodes_intent_locked) {
-                       EBUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-                       EBUG_ON(trans->iters_live & (1ULL << iter->idx));
-                       bch2_btree_iter_unlock_noinline(iter);
+                       if ((iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
+                           (trans->iters_live & (1ULL << iter->idx))) {
+                               if (!bch2_btree_iter_upgrade(iter, 1)) {
+                                       trace_trans_restart_upgrade(trans->ip);
+                                       return -EINTR;
+                               }
+                       } else {
+                               bch2_btree_iter_unlock_noinline(iter);
+                       }
                }
        }
 
index 0000fc76d2d9c31c72576de743851d2888b03cfc..1934b845ea15096ee0a5bfa6dd35ec0e70126378 100644 (file)
@@ -1334,10 +1334,8 @@ static int bch2_mark_key_locked(struct bch_fs *c,
                ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_inode:
-               if (!(flags & BTREE_TRIGGER_OVERWRITE))
-                       fs_usage->nr_inodes++;
-               else
-                       fs_usage->nr_inodes--;
+               fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode;
+               fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode;
                break;
        case KEY_TYPE_reservation: {
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@@ -1401,10 +1399,10 @@ int bch2_mark_update(struct btree_trans *trans,
        old = (struct bkey_s_c) { &unpacked, NULL };
 
        if (!btree_node_type_is_extents(iter->btree_id)) {
+               /* iterators should be uptodate, shouldn't get errors here: */
                if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
-                       _old = bch2_btree_node_iter_peek(&node_iter, b);
-                       if (_old)
-                               old = bkey_disassemble(b, _old, &unpacked);
+                       old = bch2_btree_iter_peek_slot(iter);
+                       BUG_ON(bkey_err(old));
                } else {
                        struct bkey_cached *ck = (void *) iter->l[0].b;
 
@@ -1749,59 +1747,92 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
        return 0;
 }
 
+static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
+                                           const struct bch_extent_ptr *ptr,
+                                           s64 sectors, bool parity)
+{
+       struct bkey_i_alloc *a;
+       struct btree_iter *iter;
+       struct bkey_alloc_unpacked u;
+       int ret;
+
+       ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u);
+       if (ret)
+               return ret;
+
+       if (parity) {
+               u.dirty_sectors += sectors;
+               u.data_type = u.dirty_sectors
+                       ? BCH_DATA_parity
+                       : 0;
+       }
+
+       a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               goto err;
+
+       bkey_alloc_init(&a->k_i);
+       a->k.p = iter->pos;
+       bch2_alloc_pack(a, u);
+       bch2_trans_update(trans, iter, &a->k_i, 0);
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
 static int bch2_trans_mark_stripe(struct btree_trans *trans,
-                                 struct bkey_s_c k,
+                                 struct bkey_s_c old, struct bkey_s_c new,
                                  unsigned flags)
 {
-       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-       unsigned nr_data = s->nr_blocks - s->nr_redundant;
+       const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(old).v : NULL;
+       const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(new).v : NULL;
        struct bch_replicas_padded r;
-       struct bkey_alloc_unpacked u;
-       struct bkey_i_alloc *a;
-       struct btree_iter *iter;
-       bool deleting = flags & BTREE_TRIGGER_OVERWRITE;
-       s64 sectors = le16_to_cpu(s->sectors);
        unsigned i;
        int ret = 0;
 
-       if (deleting)
-               sectors = -sectors;
-
-       bch2_bkey_to_replicas(&r.e, k);
-       update_replicas_list(trans, &r.e, sectors * s->nr_redundant);
-
        /*
-        * The allocator code doesn't necessarily update bucket gens in the
-        * btree when incrementing them, right before handing out new buckets -
-        * we just need to persist those updates here along with the new stripe:
+        * If the pointers aren't changing, we don't need to do anything:
         */
+       if (new_s && old_s &&
+           !memcmp(old_s->ptrs, new_s->ptrs,
+                   new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
+               return 0;
 
-       for (i = 0; i < s->nr_blocks && !ret; i++) {
-               bool parity = i >= nr_data;
+       if (new_s) {
+               unsigned nr_data = new_s->nr_blocks - new_s->nr_redundant;
+               s64 sectors = le16_to_cpu(new_s->sectors);
 
-               ret = bch2_trans_start_alloc_update(trans, &iter,
-                                                   &s->ptrs[i], &u);
-               if (ret)
-                       break;
+               bch2_bkey_to_replicas(&r.e, new);
+               update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
 
-               if (parity) {
-                       u.dirty_sectors += sectors;
-                       u.data_type = u.dirty_sectors
-                               ? BCH_DATA_parity
-                               : 0;
+               for (i = 0; i < new_s->nr_blocks; i++) {
+                       bool parity = i >= nr_data;
+
+                       ret = bch2_trans_mark_stripe_alloc_ref(trans,
+                                       &new_s->ptrs[i], sectors, parity);
+                       if (ret)
+                               return ret;
                }
+       }
 
-               a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
-               ret = PTR_ERR_OR_ZERO(a);
-               if (ret)
-                       goto put_iter;
-
-               bkey_alloc_init(&a->k_i);
-               a->k.p = iter->pos;
-               bch2_alloc_pack(a, u);
-               bch2_trans_update(trans, iter, &a->k_i, 0);
-put_iter:
-               bch2_trans_iter_put(trans, iter);
+       if (old_s) {
+               unsigned nr_data = old_s->nr_blocks - old_s->nr_redundant;
+               s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
+
+               bch2_bkey_to_replicas(&r.e, old);
+               update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
+
+               for (i = 0; i < old_s->nr_blocks; i++) {
+                       bool parity = i >= nr_data;
+
+                       ret = bch2_trans_mark_stripe_alloc_ref(trans,
+                                       &old_s->ptrs[i], sectors, parity);
+                       if (ret)
+                               return ret;
+               }
        }
 
        return ret;
@@ -1900,11 +1931,16 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
        return ret;
 }
 
-int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
+int bch2_trans_mark_key(struct btree_trans *trans,
+                       struct bkey_s_c old,
+                       struct bkey_s_c new,
                        unsigned offset, s64 sectors, unsigned flags)
 {
-       struct replicas_delta_list *d;
        struct bch_fs *c = trans->c;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
+       struct replicas_delta_list *d;
+
+       BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
 
        switch (k.k->type) {
        case KEY_TYPE_btree_ptr:
@@ -1920,15 +1956,18 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
                return bch2_trans_mark_extent(trans, k, offset, sectors,
                                              flags, BCH_DATA_user);
        case KEY_TYPE_stripe:
-               return bch2_trans_mark_stripe(trans, k, flags);
-       case KEY_TYPE_inode:
-               d = replicas_deltas_realloc(trans, 0);
+               return bch2_trans_mark_stripe(trans, old, new, flags);
+       case KEY_TYPE_inode: {
+               int nr = (new.k->type == KEY_TYPE_inode) -
+                        (old.k->type == KEY_TYPE_inode);
+
+               if (nr) {
+                       d = replicas_deltas_realloc(trans, 0);
+                       d->nr_inodes += nr;
+               }
 
-               if (!(flags & BTREE_TRIGGER_OVERWRITE))
-                       d->nr_inodes++;
-               else
-                       d->nr_inodes--;
                return 0;
+       }
        case KEY_TYPE_reservation: {
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 
@@ -1952,12 +1991,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
 
 int bch2_trans_mark_update(struct btree_trans *trans,
                           struct btree_iter *iter,
-                          struct bkey_i *insert,
+                          struct bkey_i *new,
                           unsigned flags)
 {
-       struct btree            *b = iter_l(iter)->b;
-       struct btree_node_iter  node_iter = iter_l(iter)->iter;
-       struct bkey_packed      *_k;
+       struct bkey_s_c old;
        int ret;
 
        if (unlikely(flags & BTREE_TRIGGER_NORUN))
@@ -1966,68 +2003,93 @@ int bch2_trans_mark_update(struct btree_trans *trans,
        if (!btree_node_type_needs_gc(iter->btree_id))
                return 0;
 
-       ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
-                       0, insert->k.size, BTREE_TRIGGER_INSERT);
-       if (ret)
-               return ret;
-
-       if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-               struct bkey_cached *ck = (void *) iter->l[0].b;
+       if (!btree_node_type_is_extents(iter->btree_id)) {
+               /* iterators should be uptodate, shouldn't get errors here: */
+               if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
+                       old = bch2_btree_iter_peek_slot(iter);
+                       BUG_ON(bkey_err(old));
+               } else {
+                       struct bkey_cached *ck = (void *) iter->l[0].b;
 
-               return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
-                                          0, 0, BTREE_TRIGGER_OVERWRITE);
-       }
+                       BUG_ON(!ck->valid);
+                       old = bkey_i_to_s_c(ck->k);
+               }
 
-       while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
+               if (old.k->type == new->k.type) {
+                       ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+                                       BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
+               } else {
+                       ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+                                       BTREE_TRIGGER_INSERT|flags) ?:
+                               bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+                                       BTREE_TRIGGER_OVERWRITE|flags);
+               }
+       } else {
+               struct btree            *b = iter_l(iter)->b;
+               struct btree_node_iter  node_iter = iter_l(iter)->iter;
+               struct bkey_packed      *_old;
                struct bkey             unpacked;
-               struct bkey_s_c         k;
-               unsigned                offset = 0;
-               s64                     sectors = 0;
-               unsigned                flags = BTREE_TRIGGER_OVERWRITE;
 
-               k = bkey_disassemble(b, _k, &unpacked);
+               EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
 
-               if (btree_node_is_extents(b)
-                   ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
-                   : bkey_cmp(insert->k.p, k.k->p))
-                       break;
+               bkey_init(&unpacked);
+               old = (struct bkey_s_c) { &unpacked, NULL };
+
+               ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
+                                         0, new->k.size,
+                                         BTREE_TRIGGER_INSERT);
+               if (ret)
+                       return ret;
+
+               while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
+                       unsigned flags = BTREE_TRIGGER_OVERWRITE;
+                       unsigned offset = 0;
+                       s64 sectors;
+
+                       old = bkey_disassemble(b, _old, &unpacked);
+                       sectors = -((s64) old.k->size);
+
+                       flags |= BTREE_TRIGGER_OVERWRITE;
+
+                       if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
+                               return 0;
 
-               if (btree_node_is_extents(b)) {
-                       switch (bch2_extent_overlap(&insert->k, k.k)) {
+                       switch (bch2_extent_overlap(&new->k, old.k)) {
                        case BCH_EXTENT_OVERLAP_ALL:
                                offset = 0;
-                               sectors = -((s64) k.k->size);
+                               sectors = -((s64) old.k->size);
                                break;
                        case BCH_EXTENT_OVERLAP_BACK:
-                               offset = bkey_start_offset(&insert->k) -
-                                       bkey_start_offset(k.k);
-                               sectors = bkey_start_offset(&insert->k) -
-                                       k.k->p.offset;
+                               offset = bkey_start_offset(&new->k) -
+                                       bkey_start_offset(old.k);
+                               sectors = bkey_start_offset(&new->k) -
+                                       old.k->p.offset;
                                break;
                        case BCH_EXTENT_OVERLAP_FRONT:
                                offset = 0;
-                               sectors = bkey_start_offset(k.k) -
-                                       insert->k.p.offset;
+                               sectors = bkey_start_offset(old.k) -
+                                       new->k.p.offset;
                                break;
                        case BCH_EXTENT_OVERLAP_MIDDLE:
-                               offset = bkey_start_offset(&insert->k) -
-                                       bkey_start_offset(k.k);
-                               sectors = -((s64) insert->k.size);
+                               offset = bkey_start_offset(&new->k) -
+                                       bkey_start_offset(old.k);
+                               sectors = -((s64) new->k.size);
                                flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
                                break;
                        }
 
                        BUG_ON(sectors >= 0);
-               }
 
-               ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
-               if (ret)
-                       return ret;
+                       ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
+                                       offset, sectors, flags);
+                       if (ret)
+                               return ret;
 
-               bch2_btree_node_iter_advance(&node_iter, b);
+                       bch2_btree_node_iter_advance(&node_iter, b);
+               }
        }
 
-       return 0;
+       return ret;
 }
 
 /* Disk reservations: */
index a3873becbb70111b173b6c42e369e2bc5012027f..3a5ed1fcaf78455ee2ab922dfdc403d0f9cedaa9 100644 (file)
@@ -264,7 +264,7 @@ int bch2_mark_update(struct btree_trans *, struct btree_iter *,
 int bch2_replicas_delta_list_apply(struct bch_fs *,
                                   struct bch_fs_usage *,
                                   struct replicas_delta_list *);
-int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c,
                        unsigned, s64, unsigned);
 int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
                           struct bkey_i *insert, unsigned);
index c409a4260f11ceac8e4ee2aa9dc4c6de73160e61..8f39c4de6672a49702f1eef235e24d952dc629a5 100644 (file)
@@ -300,7 +300,7 @@ static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
 static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
 {
        struct bch_stripe *v = &buf->key.v;
-       unsigned i, failed[EC_STRIPE_MAX], nr_failed = 0;
+       unsigned i, failed[BCH_BKEY_PTRS_MAX], nr_failed = 0;
        unsigned nr_data = v->nr_blocks - v->nr_redundant;
        unsigned bytes = buf->size << 9;
 
@@ -874,7 +874,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        for_each_keylist_key(&s->keys, k) {
                ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
                if (ret) {
-                       bch_err(c, "error creating stripe: error updating pointers");
+                       bch_err(c, "error creating stripe: error %i updating pointers", ret);
                        break;
                }
        }
@@ -1101,7 +1101,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
        s->c            = c;
        s->h            = h;
        s->nr_data      = min_t(unsigned, h->nr_active_devs,
-                               EC_STRIPE_MAX) - h->redundancy;
+                               BCH_BKEY_PTRS_MAX) - h->redundancy;
        s->nr_parity    = h->redundancy;
 
        bch2_keylist_init(&s->keys, s->inline_keys);
@@ -1211,13 +1211,13 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
        struct open_bucket *ob;
        unsigned i, nr_have, nr_data =
                min_t(unsigned, h->nr_active_devs,
-                     EC_STRIPE_MAX) - h->redundancy;
+                     BCH_BKEY_PTRS_MAX) - h->redundancy;
        bool have_cache = true;
        int ret = 0;
 
        devs = h->devs;
 
-       for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
+       for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
                __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
                --nr_data;
        }
@@ -1341,16 +1341,14 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
        if (!h)
                return NULL;
 
-       if (!h->s && ec_new_stripe_alloc(c, h)) {
-               bch2_ec_stripe_head_put(c, h);
-               return NULL;
-       }
-
-       if (!h->s->allocated) {
-               if (!h->s->existing_stripe &&
-                   (idx = get_existing_stripe(c, target, algo, redundancy)) >= 0) {
-                       //pr_info("got existing stripe %llu", idx);
+       if (!h->s) {
+               if (ec_new_stripe_alloc(c, h)) {
+                       bch2_ec_stripe_head_put(c, h);
+                       return NULL;
+               }
 
+               idx = get_existing_stripe(c, target, algo, redundancy);
+               if (idx >= 0) {
                        h->s->existing_stripe = true;
                        h->s->existing_stripe_idx = idx;
                        if (get_stripe_key(c, idx, &h->s->stripe)) {
@@ -1364,7 +1362,9 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                                        ec_block_io(c, &h->s->stripe, READ, i, &cl);
                                }
                }
+       }
 
+       if (!h->s->allocated) {
                if (!h->s->existing_stripe &&
                    !h->s->res.sectors) {
                        ret = bch2_disk_reservation_get(c, &h->s->res,
index 15f751fc2a35d32bea03efde0cf5cbbd332bb41a..450bb1a113a30c200db219525076a7522d34b275 100644 (file)
@@ -71,9 +71,9 @@ struct ec_stripe_buf {
        /* might not be buffering the entire stripe: */
        unsigned                offset;
        unsigned                size;
-       unsigned long           valid[BITS_TO_LONGS(EC_STRIPE_MAX)];
+       unsigned long           valid[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
 
-       void                    *data[EC_STRIPE_MAX];
+       void                    *data[BCH_BKEY_PTRS_MAX];
 
        union {
                struct bkey_i_stripe    key;
@@ -101,10 +101,10 @@ struct ec_stripe_new {
        bool                    existing_stripe;
        u64                     existing_stripe_idx;
 
-       unsigned long           blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
+       unsigned long           blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
 
        struct open_buckets     blocks;
-       u8                      data_block_idx[EC_STRIPE_MAX];
+       u8                      data_block_idx[BCH_BKEY_PTRS_MAX];
        struct open_buckets     parity;
        struct disk_reservation res;
 
index e4d633fca5bf913a78a4d78168141dc1458fdaf1..5b688b4394f788a8ec2accbce2365d1c7d45f76f 100644 (file)
@@ -4,11 +4,9 @@
 
 #include <linux/llist.h>
 
-#define EC_STRIPE_MAX  16
-
 struct bch_replicas_padded {
        struct bch_replicas_entry       e;
-       u8                              pad[EC_STRIPE_MAX];
+       u8                              pad[BCH_BKEY_PTRS_MAX];
 };
 
 struct stripe {
@@ -24,7 +22,7 @@ struct stripe {
        unsigned                dirty:1;
        unsigned                on_heap:1;
        u8                      blocks_nonempty;
-       u16                     block_sectors[EC_STRIPE_MAX];
+       u16                     block_sectors[BCH_BKEY_PTRS_MAX];
 
        struct bch_replicas_padded r;
 };
index 7fae6a4ba26f0e1e0209f2d86c8cf74cc1537121..828ccf07da610eb6faa9d46dbd7ca331dc14ea8b 100644 (file)
@@ -1046,11 +1046,13 @@ static const char *extent_ptr_invalid(const struct bch_fs *c,
 const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       struct bch_devs_list devs;
        const union bch_extent_entry *entry;
        struct bch_extent_crc_unpacked crc;
        unsigned size_ondisk = k.k->size;
        const char *reason;
        unsigned nonce = UINT_MAX;
+       unsigned i;
 
        if (k.k->type == KEY_TYPE_btree_ptr)
                size_ondisk = c->opts.btree_node_size;
@@ -1101,6 +1103,12 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
                }
        }
 
+       devs = bch2_bkey_devs(k);
+       bubble_sort(devs.devs, devs.nr, u8_cmp);
+       for (i = 0; i + 1 < devs.nr; i++)
+               if (devs.devs[i] == devs.devs[i + 1])
+                       return "multiple ptrs to same device";
+
        return NULL;
 }
 
index 8170e93ca4d487497aeb65ed7f5fa947824405f1..53c6660e07f8092822373549c40e2bc64c5f9de0 100644 (file)
@@ -3019,8 +3019,8 @@ static loff_t page_hole_offset(struct address_space *mapping, loff_t offset)
        int pg_offset;
        loff_t ret = -1;
 
-       page = find_lock_entry(mapping, index);
-       if (!page || xa_is_value(page))
+       page = find_lock_page(mapping, index);
+       if (!page)
                return offset;
 
        pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1));
index 3489605e0127907d12471e41aa133565175361ec..abf204ef21cac13cca8041890930210ed6614ee8 100644 (file)
@@ -186,34 +186,33 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
 static int sum_sector_overwrites(struct btree_trans *trans,
                                 struct btree_iter *extent_iter,
                                 struct bkey_i *new,
-                                bool may_allocate,
                                 bool *maybe_extending,
-                                s64 *delta)
+                                s64 *i_sectors_delta,
+                                s64 *disk_sectors_delta)
 {
        struct btree_iter *iter;
        struct bkey_s_c old;
        int ret = 0;
 
-       *maybe_extending = true;
-       *delta = 0;
+       *maybe_extending        = true;
+       *i_sectors_delta        = 0;
+       *disk_sectors_delta     = 0;
 
        iter = bch2_trans_copy_iter(trans, extent_iter);
 
        for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
-               if (!may_allocate &&
-                   bch2_bkey_nr_ptrs_fully_allocated(old) <
-                   bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) {
-                       ret = -ENOSPC;
-                       break;
-               }
+               s64 sectors = min(new->k.p.offset, old.k->p.offset) -
+                       max(bkey_start_offset(&new->k),
+                           bkey_start_offset(old.k));
 
-               *delta += (min(new->k.p.offset,
-                             old.k->p.offset) -
-                         max(bkey_start_offset(&new->k),
-                             bkey_start_offset(old.k))) *
+               *i_sectors_delta += sectors *
                        (bkey_extent_is_allocation(&new->k) -
                         bkey_extent_is_allocation(old.k));
 
+               *disk_sectors_delta += sectors *
+                       (int) (bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)) -
+                              bch2_bkey_nr_ptrs_fully_allocated(old));
+
                if (bkey_cmp(old.k->p, new->k.p) >= 0) {
                        /*
                         * Check if there's already data above where we're
@@ -247,12 +246,12 @@ int bch2_extent_update(struct btree_trans *trans,
                       struct disk_reservation *disk_res,
                       u64 *journal_seq,
                       u64 new_i_size,
-                      s64 *i_sectors_delta)
+                      s64 *i_sectors_delta_total)
 {
        /* this must live until after bch2_trans_commit(): */
        struct bkey_inode_buf inode_p;
        bool extending = false;
-       s64 delta = 0;
+       s64 i_sectors_delta = 0, disk_sectors_delta = 0;
        int ret;
 
        ret = bch2_extent_trim_atomic(k, iter);
@@ -260,16 +259,26 @@ int bch2_extent_update(struct btree_trans *trans,
                return ret;
 
        ret = sum_sector_overwrites(trans, iter, k,
-                       disk_res && disk_res->sectors != 0,
-                       &extending, &delta);
+                       &extending,
+                       &i_sectors_delta,
+                       &disk_sectors_delta);
        if (ret)
                return ret;
 
+       if (disk_res &&
+           disk_sectors_delta > (s64) disk_res->sectors) {
+               ret = bch2_disk_reservation_add(trans->c, disk_res,
+                                       disk_sectors_delta - disk_res->sectors,
+                                       0);
+               if (ret)
+                       return ret;
+       }
+
        new_i_size = extending
                ? min(k->k.p.offset << 9, new_i_size)
                : 0;
 
-       if (delta || new_i_size) {
+       if (i_sectors_delta || new_i_size) {
                struct btree_iter *inode_iter;
                struct bch_inode_unpacked inode_u;
 
@@ -296,9 +305,9 @@ int bch2_extent_update(struct btree_trans *trans,
                else
                        new_i_size = 0;
 
-               inode_u.bi_sectors += delta;
+               inode_u.bi_sectors += i_sectors_delta;
 
-               if (delta || new_i_size) {
+               if (i_sectors_delta || new_i_size) {
                        bch2_inode_pack(trans->c, &inode_p, &inode_u);
                        bch2_trans_update(trans, inode_iter,
                                          &inode_p.inode.k_i, 0);
@@ -313,10 +322,12 @@ int bch2_extent_update(struct btree_trans *trans,
                                BTREE_INSERT_NOCHECK_RW|
                                BTREE_INSERT_NOFAIL|
                                BTREE_INSERT_USE_RESERVE);
-       if (!ret && i_sectors_delta)
-               *i_sectors_delta += delta;
+       if (ret)
+               return ret;
 
-       return ret;
+       if (i_sectors_delta_total)
+               *i_sectors_delta_total += i_sectors_delta;
+       return 0;
 }
 
 int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
index 701521030c3d46bd92cab91e913795c7aa316bbf..d544248293789acb4cecb00c0ea79f87b0f99bf7 100644 (file)
@@ -443,20 +443,6 @@ unlock:
        if (!ret)
                goto retry;
 
-       if (WARN_ONCE(ret == cur_entry_journal_full &&
-                     !can_discard &&
-                     (flags & JOURNAL_RES_GET_RESERVED),
-                     "JOURNAL_RES_GET_RESERVED set but journal full")) {
-               char *buf;
-
-               buf = kmalloc(4096, GFP_NOFS);
-               if (buf) {
-                       bch2_journal_debug_to_text(&_PBUF(buf, 4096), j);
-                       pr_err("\n%s", buf);
-                       kfree(buf);
-               }
-       }
-
        /*
         * Journal is full - can't rely on reclaim from work item due to
         * freezing:
@@ -1137,7 +1123,7 @@ out:
 
 /* debug: */
 
-void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
+void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        union journal_res_state s;
@@ -1145,7 +1131,6 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        unsigned i;
 
        rcu_read_lock();
-       spin_lock(&j->lock);
        s = READ_ONCE(j->reservations);
 
        pr_buf(out,
@@ -1245,10 +1230,16 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
                       ja->cur_idx,             ja->bucket_seq[ja->cur_idx]);
        }
 
-       spin_unlock(&j->lock);
        rcu_read_unlock();
 }
 
+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
+{
+       spin_lock(&j->lock);
+       __bch2_journal_debug_to_text(out, j);
+       spin_unlock(&j->lock);
+}
+
 void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
 {
        struct journal_entry_pin_list *pin_list;
index a6ce03a724cba1e3ddb74cfe6f380bc19a9a5105..1db1f190a168fd2fe5881fa687bcf7e29d8f7ee8 100644 (file)
@@ -384,7 +384,7 @@ out:
 static inline bool journal_check_may_get_unreserved(struct journal *j)
 {
        union journal_preres_state s = READ_ONCE(j->prereserved);
-       bool ret = s.reserved <= s.remaining &&
+       bool ret = s.reserved < s.remaining &&
                fifo_free(&j->pin) > 8;
 
        lockdep_assert_held(&j->lock);
@@ -508,6 +508,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
 void bch2_journal_unblock(struct journal *);
 void bch2_journal_block(struct journal *);
 
+void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
 void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
 void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
 
index bb9a1936c24cdd4014c8e39a134d4535213b1731..0e6fbe2f6a7542ac24495a8a02864af35638fa48 100644 (file)
@@ -1099,7 +1099,6 @@ static void journal_write_done(struct closure *cl)
        if (!w->noflush) {
                j->flushed_seq_ondisk = seq;
                j->last_seq_ondisk = last_seq;
-               bch2_journal_space_available(j);
        }
 
        /*
@@ -1123,6 +1122,8 @@ static void journal_write_done(struct closure *cl)
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
 
+       bch2_journal_space_available(j);
+
        closure_wake_up(&w->wait);
        journal_wake(j);
 
index 9d778306efc515a3caa11e561dada6fce09bed1c..4e3cf219fb911e1f1cbc70d2fe415d98b0d162e1 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "btree_key_cache.h"
+#include "error.h"
 #include "journal.h"
 #include "journal_io.h"
 #include "journal_reclaim.h"
@@ -159,7 +160,7 @@ void bch2_journal_space_available(struct journal *j)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        unsigned clean, clean_ondisk, total;
-       unsigned overhead, u64s_remaining = 0;
+       s64 u64s_remaining = 0;
        unsigned max_entry_size  = min(j->buf[0].buf_size >> 9,
                                       j->buf[1].buf_size >> 9);
        unsigned i, nr_online = 0, nr_devs_want;
@@ -208,22 +209,38 @@ void bch2_journal_space_available(struct journal *j)
        clean           = j->space[journal_space_clean].total;
        total           = j->space[journal_space_total].total;
 
-       if (!j->space[journal_space_discarded].next_entry)
+       if (!clean_ondisk &&
+           j->reservations.idx ==
+           j->reservations.unwritten_idx) {
+               char *buf = kmalloc(4096, GFP_ATOMIC);
+
+               bch_err(c, "journal stuck");
+               if (buf) {
+                       __bch2_journal_debug_to_text(&_PBUF(buf, 4096), j);
+                       pr_err("\n%s", buf);
+                       kfree(buf);
+               }
+
+               bch2_fatal_error(c);
+               ret = cur_entry_journal_stuck;
+       } else if (!j->space[journal_space_discarded].next_entry)
                ret = cur_entry_journal_full;
        else if (!fifo_free(&j->pin))
                ret = cur_entry_journal_pin_full;
 
-       if ((clean - clean_ondisk <= total / 8) &&
+       if ((j->space[journal_space_clean_ondisk].next_entry <
+            j->space[journal_space_clean_ondisk].total) &&
+           (clean - clean_ondisk <= total / 8) &&
            (clean_ondisk * 2 > clean ))
                set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
        else
                clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
 
-       overhead = DIV_ROUND_UP(clean, max_entry_size) *
-               journal_entry_overhead(j);
-       u64s_remaining = clean << 6;
-       u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
-       u64s_remaining /= 4;
+       u64s_remaining  = (u64) clean << 6;
+       u64s_remaining -= (u64) total << 3;
+       u64s_remaining = max(0LL, u64s_remaining);
+       u64s_remaining /= 2;
+       u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
 out:
        j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        j->cur_entry_error      = ret;
@@ -367,12 +384,22 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
        struct journal_entry_pin_list *pin_list;
 
        spin_lock(&j->lock);
+
+       if (seq < journal_last_seq(j)) {
+               /*
+                * bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on
+                * the src pin - with the pin dropped, the entry to pin might no
+                * longer to exist, but that means there's no longer anything to
+                * copy and we can bail out here:
+                */
+               spin_unlock(&j->lock);
+               return;
+       }
+
        pin_list = journal_seq_pin(j, seq);
 
        __journal_pin_drop(j, pin);
 
-       BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
-
        atomic_inc(&pin_list->count);
        pin->seq        = seq;
        pin->flush      = flush_fn;
@@ -572,6 +599,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
                    c->btree_cache.used  * 3)
                        min_nr = 1;
 
+               if (fifo_free(&j->pin) <= 32)
+                       min_nr = 1;
+
                min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c));
 
                trace_journal_reclaim_start(c,
@@ -590,7 +620,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
                else
                        j->nr_background_reclaim += nr_flushed;
                trace_journal_reclaim_finish(c, nr_flushed);
-       } while (min_nr);
+       } while (min_nr && nr_flushed);
 
        memalloc_noreclaim_restore(flags);
 
index f02caa3d49ea74daf97d1054cc3ddbfba250d254..adf1f5c981cdfa80e227516d7c203306a543eba3 100644 (file)
@@ -53,8 +53,11 @@ static inline void bch2_journal_pin_copy(struct journal *j,
                                         struct journal_entry_pin *src,
                                         journal_pin_flush_fn flush_fn)
 {
-       if (journal_pin_active(src))
-               bch2_journal_pin_add(j, src->seq, dst, flush_fn);
+       /* Guard against racing with journal_pin_drop(src): */
+       u64 seq = READ_ONCE(src->seq);
+
+       if (seq)
+               bch2_journal_pin_add(j, seq, dst, flush_fn);
 }
 
 static inline void bch2_journal_pin_update(struct journal *j, u64 seq,
index 308b899b42145e99c0ce8c2309943e740a7efc80..67ee47eb17a79c4b1cf5e091f84a0ff5fd587633 100644 (file)
@@ -172,6 +172,7 @@ struct journal {
                cur_entry_blocked,
                cur_entry_journal_full,
                cur_entry_journal_pin_full,
+               cur_entry_journal_stuck,
                cur_entry_insufficient_devices,
        }                       cur_entry_error;
 
index ecd51d45743a3705da4f1a6261b2469876f73add..1883a1faf380c9d69bfd0bd1d720a1c127a94bb3 100644 (file)
@@ -458,7 +458,9 @@ retry:
                bch2_btree_iter_set_pos(iter, split->k.p);
 
                if (remark) {
-                       ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split),
+                       ret = bch2_trans_mark_key(&trans,
+                                                 bkey_s_c_null,
+                                                 bkey_i_to_s_c(split),
                                                  0, split->k.size,
                                                  BTREE_TRIGGER_INSERT);
                        if (ret)
@@ -467,7 +469,9 @@ retry:
        } while (bkey_cmp(iter->pos, k->k.p) < 0);
 
        if (remark) {
-               ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
+               ret = bch2_trans_mark_key(&trans,
+                                         bkey_i_to_s_c(k),
+                                         bkey_s_c_null,
                                          0, -((s64) k->k.size),
                                          BTREE_TRIGGER_OVERWRITE);
                if (ret)
index 00a197b65e0b1412a76f28facd6df4cdf01b89a9..d37d173f3ba6db328fa3cb6ab231f6c1a1620ad8 100644 (file)
@@ -11,11 +11,6 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
 
 /* Replicas tracking - in memory: */
 
-static inline int u8_cmp(u8 l, u8 r)
-{
-       return cmp_int(l, r);
-}
-
 static void verify_replicas_entry(struct bch_replicas_entry *e)
 {
 #ifdef CONFIG_BCACHEFS_DEBUG
index abe46c539c2e20d0991ae023cbed7c46e5301db5..78835bd2d6bc4e0c95c96f68d9d318eef47652e6 100644 (file)
@@ -614,9 +614,6 @@ got_super:
            bdev_logical_block_size(sb->bdev))
                goto err;
 
-       if (sb->mode & FMODE_WRITE)
-               bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
-                       |= BDI_CAP_STABLE_WRITES;
        ret = 0;
        sb->have_layout = true;
 out:
index 20406ebd6f5bad7cd89252a293366262155e9cef..069973a38f12d7b1398c1b12a858d5a7e263be7c 100644 (file)
@@ -20,7 +20,7 @@ struct bch_devs_mask {
 
 struct bch_devs_list {
        u8                      nr;
-       u8                      devs[BCH_REPLICAS_MAX + 1];
+       u8                      devs[BCH_BKEY_PTRS_MAX];
 };
 
 struct bch_member_cpu {
index 5f40b048dd0d9bd6764499261f85c435d9f7e845..f1d09e3ada0937a1e72faeeff29a3ff9ab21d2dc 100644 (file)
@@ -563,15 +563,14 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
 
        iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos,
                                   BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(iter);
-       if (ret)
-               goto err;
-
        k = bch2_btree_iter_peek(iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
 
+       if (!k.k)
+               goto err;
+
        bkey_init(&delete.k);
        delete.k.p = k.k->p;
 
index 6e5335440b4b5696b1764bba740225bd46ef039f..c69b05deec41dc69ba387803305d1f364551849d 100644 (file)
@@ -747,4 +747,9 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
 
 #define cmp_int(l, r)          ((l > r) - (l < r))
 
+static inline int u8_cmp(u8 l, u8 r)
+{
+       return cmp_int(l, r);
+}
+
 #endif /* _BCACHEFS_UTIL_H */