]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 7958ebe324 bcachefs: Fix alloc_v4_backpointers()
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 19 Dec 2022 19:47:35 +0000 (14:47 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 19 Dec 2022 19:47:35 +0000 (14:47 -0500)
23 files changed:
.bcachefs_revision
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/bcachefs.h
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_leaf.c
libbcachefs/data_update.c
libbcachefs/errcode.h
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_io.c
libbcachefs/journal_io.h
libbcachefs/journal_types.h
libbcachefs/nocow_locking.c
libbcachefs/nocow_locking.h
libbcachefs/nocow_locking_types.h [new file with mode: 0644]
libbcachefs/recovery.c
libbcachefs/super.c
libbcachefs/sysfs.c

index eacbdc34d58126d62d7d8d1750bc44c5202c4e0a..b1aa968db29ee2f2bdd6fd484922b08ba2e71df5 100644 (file)
@@ -1 +1 @@
-84505cfd37957accbff6fa7e4477bfd9c4c23ba6
+7958ebe32438f58a0e59f240aa288b14efcc0964
index d67fab0f5dc15cb83d6011e167d391179239c328..a0c3c47b49b5970a26ededf4269ecba87e30ddea 100644 (file)
@@ -23,6 +23,16 @@ static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
                pos.offset < ca->mi.nbuckets;
 }
 
+static inline u64 bucket_to_u64(struct bpos bucket)
+{
+       return (bucket.inode << 48) | bucket.offset;
+}
+
+static inline struct bpos u64_to_bucket(u64 bucket)
+{
+       return POS(bucket >> 48, bucket & ~(~0ULL << 48));
+}
+
 static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
 {
        return a.gen - a.oldest_gen;
@@ -190,7 +200,9 @@ void bch2_do_invalidates(struct bch_fs *);
 
 static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
 {
-       return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+       return (void *) ((u64 *) &a->v +
+                        (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+                         BCH_ALLOC_V4_U64s_V0));
 }
 
 static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a)
index 2010a9af0eb2de8a36eb74633c79dea18f17ac25..f1cfb90b6d54c9cd6f5088cdbf9b11471ba1b06b 100644 (file)
@@ -28,6 +28,7 @@
 #include "io.h"
 #include "journal.h"
 #include "movinggc.h"
+#include "nocow_locking.h"
 
 #include <linux/math64.h>
 #include <linux/rculist.h>
index 71f51459872d28c16c95c4e233eda5a870fcb983..febef9ac254ba26b432f394c40ef50f060637559 100644 (file)
 #include "bcachefs_format.h"
 #include "errcode.h"
 #include "fifo.h"
-#include "nocow_locking.h"
+#include "nocow_locking_types.h"
 #include "opts.h"
 #include "util.h"
 
index fd4358eb013006dc142dbac9ee296d4482ebf1b2..9c139a7b43061436982bd23bc579f5fc80a2ce5c 100644 (file)
@@ -2781,6 +2781,20 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        return p;
 }
 
+static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_path *path;
+
+       trans_for_each_path(trans, path)
+               if (path->cached && !btree_node_locked(path, 0))
+                       path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+       srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+       trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+       trans->srcu_lock_time   = jiffies;
+}
+
 /**
  * bch2_trans_begin() - reset a transaction after a interrupted attempt
  * @trans: transaction to reset
@@ -2836,6 +2850,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
                bch2_trans_relock(trans);
        }
 
+       if (unlikely(time_after(jiffies, trans->srcu_lock_time + HZ)))
+               bch2_trans_reset_srcu_lock(trans);
+
        trans->last_restarted_ip = _RET_IP_;
        if (trans->restarted)
                bch2_btree_path_traverse_all(trans);
@@ -2925,6 +2942,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
                trans->nr_max_paths = s->nr_max_paths;
 
        trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+       trans->srcu_lock_time   = jiffies;
 
        mutex_lock(&c->btree_trans_lock);
        list_for_each_entry(pos, &c->btree_trans_list, list) {
index 2f13be60b245b12128612fe766e03f2a77a4f463..07c415d572262d2f18ef92ddb1f0f5862bc7904f 100644 (file)
@@ -459,7 +459,7 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
 
 #define bch2_bkey_alloc(_trans, _iter, _type)                          \
 ({                                                                     \
-       struct bkey_i_##_type *_k = bch2_trans_kmalloc(_trans, sizeof(*_k));\
+       struct bkey_i_##_type *_k = bch2_trans_kmalloc_nomemzero(_trans, sizeof(*_k));\
        if (!IS_ERR(_k)) {                                              \
                bkey_##_type##_init(&_k->k_i);                          \
                _k->k.p = (_iter)->pos;                                 \
index 13a930251ab3d767ea9f9536e0b8daf1d3e22dc1..af86ba12e3a4dbc236c7b6fdea9e74c9ec6e90c3 100644 (file)
@@ -421,6 +421,7 @@ struct btree_trans {
        enum bch_errcode        restarted:16;
        u32                     restart_count;
        unsigned long           last_restarted_ip;
+       unsigned long           srcu_lock_time;
 
        /*
         * For when bch2_trans_update notices we'll be splitting a compressed
@@ -442,7 +443,7 @@ struct btree_trans {
 
        /* update path: */
        struct btree_trans_commit_hook *hooks;
-       DARRAY(u64)             extra_journal_entries;
+       darray_u64              extra_journal_entries;
        struct journal_entry_pin *journal_pin;
 
        struct journal_res      journal_res;
index 1c2e7b2b4ed5b01b00b9e61d6bdaf0802bc69c48..7e9f1f170d5f6bdcedd0585f7263cc053e69b78b 100644 (file)
@@ -82,7 +82,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
                            struct btree_trans_commit_hook *);
 int __bch2_trans_commit(struct btree_trans *);
 
-int bch2_trans_log_msg(struct btree_trans *, const char *);
+int bch2_trans_log_msg(struct btree_trans *, const char *, ...);
+int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
 
 /**
  * bch2_trans_commit - insert keys at given iterator positions
index 75d8a55352127482a42eccfcf604768da7f10f62..a2b37dd45cf8f6d5132a9a9d224c2f5fc1a3c679 100644 (file)
@@ -1727,18 +1727,25 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
        return ret;
 }
 
-int bch2_trans_log_msg(struct btree_trans *trans, const char *msg)
+static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args)
 {
-       unsigned len = strlen(msg);
-       unsigned u64s = DIV_ROUND_UP(len, sizeof(u64));
+       struct printbuf buf = PRINTBUF;
        struct jset_entry_log *l;
+       unsigned u64s;
        int ret;
 
-       ret = darray_make_room(&trans->extra_journal_entries, jset_u64s(u64s));
+       prt_vprintf(&buf, fmt, args);
+       ret = buf.allocation_failure ? -ENOMEM : 0;
        if (ret)
-               return ret;
+               goto err;
+
+       u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
+
+       ret = darray_make_room(entries, jset_u64s(u64s));
+       if (ret)
+               goto err;
 
-       l = (void *) &darray_top(trans->extra_journal_entries);
+       l = (void *) &darray_top(*entries);
        l->entry.u64s           = cpu_to_le16(u64s);
        l->entry.btree_id       = 0;
        l->entry.level          = 1;
@@ -1746,10 +1753,44 @@ int bch2_trans_log_msg(struct btree_trans *trans, const char *msg)
        l->entry.pad[0]         = 0;
        l->entry.pad[1]         = 0;
        l->entry.pad[2]         = 0;
-       memcpy(l->d, msg, len);
-       while (len & 7)
-               l->d[len++] = '\0';
+       memcpy(l->d, buf.buf, buf.pos);
+       while (buf.pos & 7)
+               l->d[buf.pos++] = '\0';
+
+       entries->nr += jset_u64s(u64s);
+err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
+int bch2_trans_log_msg(struct btree_trans *trans, const char *fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       ret = __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args);
+       va_end(args);
+
+       return ret;
+}
+
+int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+
+       if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
+               ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
+       } else {
+               ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
+                       __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
+       }
+
+       va_end(args);
+
+       return ret;
 
-       trans->extra_journal_entries.nr += jset_u64s(u64s);
-       return 0;
 }
index d4cdfb48ab8fd19f8bd120f4e10b311ea1e19314..7ef7bb613347ea85bda4fafd1ae6ebcad94f0075 100644 (file)
@@ -11,6 +11,7 @@
 #include "io.h"
 #include "keylist.h"
 #include "move.h"
+#include "nocow_locking.h"
 #include "subvolume.h"
 
 #include <trace/events/bcachefs.h>
index 543cdb553188ab5a003d5f3157b10927405afba2..6217096494e6b085b65c7440363dd16774681ec1 100644 (file)
@@ -52,6 +52,7 @@
        x(BCH_ERR_no_btree_node,        no_btree_node_down)                     \
        x(BCH_ERR_no_btree_node,        no_btree_node_init)                     \
        x(BCH_ERR_no_btree_node,        no_btree_node_cached)                   \
+       x(BCH_ERR_no_btree_node,        no_btree_node_srcu_reset)               \
        x(0,                            btree_insert_fail)                      \
        x(BCH_ERR_btree_insert_fail,    btree_insert_btree_node_full)           \
        x(BCH_ERR_btree_insert_fail,    btree_insert_need_mark_replicas)        \
index 706f18bc4238373a758998b30064ad7d702a478e..d215973ae73b3d9cdf5353509b1c534de0db9232 100644 (file)
@@ -27,6 +27,7 @@
 #include "journal.h"
 #include "keylist.h"
 #include "move.h"
+#include "nocow_locking.h"
 #include "rebalance.h"
 #include "subvolume.h"
 #include "super.h"
@@ -753,15 +754,17 @@ static void __bch2_write_index(struct bch_write_op *op)
 
                op->written += sectors_start - keylist_sectors(keys);
 
-               if (ret) {
+               if (ret && !bch2_err_matches(ret, EROFS)) {
                        struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
 
                        bch_err_inum_offset_ratelimited(c,
                                k->k.p.inode, k->k.p.offset << 9,
                                "write error while doing btree update: %s",
                                bch2_err_str(ret));
-                       goto err;
                }
+
+               if (ret)
+                       goto err;
        }
 out:
        /* If some a bucket wasn't written, we can't erasure code it: */
@@ -1362,13 +1365,16 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
                        bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size);
                }));
 
-               if (ret) {
+               if (ret && !bch2_err_matches(ret, EROFS)) {
                        struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
 
                        bch_err_inum_offset_ratelimited(c,
                                k->k.p.inode, k->k.p.offset << 9,
                                "write error while doing btree update: %s",
                                bch2_err_str(ret));
+               }
+
+               if (ret) {
                        op->error = ret;
                        break;
                }
@@ -1406,7 +1412,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
        struct {
                struct bpos     b;
                unsigned        gen;
-               two_state_lock_t *l;
+               struct nocow_lock_bucket *l;
        } buckets[BCH_REPLICAS_MAX];
        unsigned nr_buckets = 0;
        u32 snapshot;
@@ -1453,7 +1459,8 @@ retry:
                        buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
                        buckets[nr_buckets].gen = ptr->gen;
                        buckets[nr_buckets].l =
-                               bucket_nocow_lock(&c->nocow_locks, buckets[nr_buckets].b);
+                               bucket_nocow_lock(&c->nocow_locks,
+                                                 bucket_to_u64(buckets[nr_buckets].b));
 
                        prefetch(buckets[nr_buckets].l);
                        nr_buckets++;
@@ -1475,11 +1482,12 @@ retry:
 
                for (i = 0; i < nr_buckets; i++) {
                        struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
-                       two_state_lock_t *l = buckets[i].l;
+                       struct nocow_lock_bucket *l = buckets[i].l;
                        bool stale;
 
-                       if (!bch2_two_state_trylock(l, BUCKET_NOCOW_LOCK_UPDATE))
-                               __bch2_bucket_nocow_lock(&c->nocow_locks, l, BUCKET_NOCOW_LOCK_UPDATE);
+                       __bch2_bucket_nocow_lock(&c->nocow_locks, l,
+                                                bucket_to_u64(buckets[i].b),
+                                                BUCKET_NOCOW_LOCK_UPDATE);
 
                        rcu_read_lock();
                        stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
@@ -2905,11 +2913,6 @@ void bch2_fs_io_exit(struct bch_fs *c)
 
 int bch2_fs_io_init(struct bch_fs *c)
 {
-       unsigned i;
-
-       for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
-               two_state_lock_init(&c->nocow_locks.l[i]);
-
        if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
                        BIOSET_NEED_BVECS) ||
            bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
index e35b685a6770df57484f8aceeb7f9883793735cd..1cbca187cb15d2829cb1c8e09c694c4131f86c0d 100644 (file)
@@ -244,7 +244,7 @@ static int journal_entry_open(struct journal *j)
                journal_entry_overhead(j);
        u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
 
-       if (u64s <= 0)
+       if (u64s <= (ssize_t) j->early_journal_entries.nr)
                return JOURNAL_ERR_journal_full;
 
        if (fifo_empty(&j->pin) && j->reclaim_thread)
@@ -269,6 +269,12 @@ static int journal_entry_open(struct journal *j)
        buf->data->seq  = cpu_to_le64(journal_cur_seq(j));
        buf->data->u64s = 0;
 
+       if (j->early_journal_entries.nr) {
+               memcpy(buf->data->_data, j->early_journal_entries.data,
+                      j->early_journal_entries.nr * sizeof(u64));
+               le32_add_cpu(&buf->data->u64s, j->early_journal_entries.nr);
+       }
+
        /*
         * Must be set before marking the journal entry as open:
         */
@@ -285,7 +291,9 @@ static int journal_entry_open(struct journal *j)
                BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK));
 
                journal_state_inc(&new);
-               new.cur_entry_offset = 0;
+
+               /* Handle any already added entries */
+               new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
 
@@ -298,6 +306,9 @@ static int journal_entry_open(struct journal *j)
                         &j->write_work,
                         msecs_to_jiffies(c->opts.journal_flush_delay));
        journal_wake(j);
+
+       if (j->early_journal_entries.nr)
+               darray_exit(&j->early_journal_entries);
        return 0;
 }
 
@@ -717,39 +728,6 @@ int bch2_journal_meta(struct journal *j)
        return bch2_journal_flush_seq(j, res.seq);
 }
 
-int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
-{
-       struct jset_entry_log *entry;
-       struct journal_res res = { 0 };
-       unsigned msglen, u64s;
-       va_list args;
-       int ret;
-
-       va_start(args, fmt);
-       msglen = vsnprintf(NULL, 0, fmt, args) + 1;
-       va_end(args);
-
-       u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64)));
-
-       ret = bch2_journal_res_get(j, &res, u64s, 0);
-       if (ret)
-               return ret;
-
-       entry = container_of(journal_res_entry(j, &res),
-                            struct jset_entry_log, entry);
-       memset(entry, 0, u64s * sizeof(u64));
-       entry->entry.type = BCH_JSET_ENTRY_log;
-       entry->entry.u64s = u64s - 1;
-
-       va_start(args, fmt);
-       vsnprintf(entry->d, INT_MAX, fmt, args);
-       va_end(args);
-
-       bch2_journal_res_put(j, &res);
-
-       return bch2_journal_flush_seq(j, res.seq);
-}
-
 /* block/unlock the journal: */
 
 void bch2_journal_unblock(struct journal *j)
@@ -1192,6 +1170,8 @@ void bch2_fs_journal_exit(struct journal *j)
 {
        unsigned i;
 
+       darray_exit(&j->early_journal_entries);
+
        for (i = 0; i < ARRAY_SIZE(j->buf); i++)
                kvpfree(j->buf[i].data, j->buf[i].buf_size);
        free_fifo(&j->pin);
index 896a2d7dca3615eab5512c56f023b3b267206673..ee37f90aa6b369097170d85ea2b3fe35c69d56ea 100644 (file)
@@ -497,7 +497,6 @@ int bch2_journal_flush_seq(struct journal *, u64);
 int bch2_journal_flush(struct journal *);
 bool bch2_journal_noflush_seq(struct journal *, u64);
 int bch2_journal_meta(struct journal *);
-int bch2_journal_log_msg(struct journal *, const char *, ...);
 
 void bch2_journal_halt(struct journal *);
 
index 2b1974a9f360e943e59a408ccbbcb9cc2c262f4b..d6f259348b3dbb67eec2740a19b93b333765d038 100644 (file)
@@ -1080,7 +1080,10 @@ void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
        }
 }
 
-int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
+int bch2_journal_read(struct bch_fs *c,
+                     u64 *last_seq,
+                     u64 *blacklist_seq,
+                     u64 *start_seq)
 {
        struct journal_list jlist;
        struct journal_replay *i, **_i, *prev = NULL;
@@ -1089,7 +1092,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
        unsigned iter;
        struct printbuf buf = PRINTBUF;
        bool degraded = false, last_write_torn = false;
-       u64 seq, last_seq = 0;
+       u64 seq;
        int ret = 0;
 
        closure_init_stack(&jlist.cl);
@@ -1118,15 +1121,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
        if (jlist.ret)
                return jlist.ret;
 
+       *last_seq       = 0;
        *start_seq      = 0;
        *blacklist_seq  = 0;
 
        /*
         * Find most recent flush entry, and ignore newer non flush entries -
         * those entries will be blacklisted:
-        *
-        *
-        * XXX check for torn write on last journal entry
         */
        genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) {
                int write = READ;
@@ -1140,13 +1141,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
                        *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1;
 
                if (JSET_NO_FLUSH(&i->j)) {
-                       journal_replay_free(c, i);
+                       i->ignore = true;
                        continue;
                }
 
                if (!last_write_torn && !i->csum_good) {
                        last_write_torn = true;
-                       journal_replay_free(c, i);
+                       i->ignore = true;
                        continue;
                }
 
@@ -1157,7 +1158,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
                                         le64_to_cpu(i->j.seq)))
                        i->j.last_seq = i->j.seq;
 
-               last_seq        = le64_to_cpu(i->j.last_seq);
+               *last_seq       = le64_to_cpu(i->j.last_seq);
                *blacklist_seq  = le64_to_cpu(i->j.seq) + 1;
                break;
        }
@@ -1167,13 +1168,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
                return 0;
        }
 
-       if (!last_seq) {
+       if (!*last_seq) {
                fsck_err(c, "journal read done, but no entries found after dropping non-flushes");
                return 0;
        }
 
        bch_info(c, "journal read done, replaying entries %llu-%llu",
-                last_seq, *blacklist_seq - 1);
+                *last_seq, *blacklist_seq - 1);
 
        if (*start_seq != *blacklist_seq)
                bch_info(c, "dropped unflushed entries %llu-%llu",
@@ -1187,7 +1188,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
                        continue;
 
                seq = le64_to_cpu(i->j.seq);
-               if (seq < last_seq) {
+               if (seq < *last_seq) {
                        journal_replay_free(c, i);
                        continue;
                }
@@ -1195,13 +1196,12 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
                if (bch2_journal_seq_is_blacklisted(c, seq, true)) {
                        fsck_err_on(!JSET_NO_FLUSH(&i->j), c,
                                    "found blacklisted journal entry %llu", seq);
-
-                       journal_replay_free(c, i);
+                       i->ignore = true;
                }
        }
 
        /* Check for missing entries: */
-       seq = last_seq;
+       seq = *last_seq;
        genradix_for_each(&c->journal_entries, radix_iter, _i) {
                i = *_i;
 
@@ -1239,7 +1239,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
                                 "  prev at %s\n"
                                 "  next at %s",
                                 missing_start, missing_end,
-                                last_seq, *blacklist_seq - 1,
+                                *last_seq, *blacklist_seq - 1,
                                 buf1.buf, buf2.buf);
 
                        printbuf_exit(&buf1);
index 2f8bbf06b28951301d8c0469eed1e0f934adf18b..a32c2876f2a65075e5803ce01c18aa9f29793aec 100644 (file)
@@ -52,7 +52,7 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
 void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
                               struct journal_replay *);
 
-int bch2_journal_read(struct bch_fs *, u64 *, u64 *);
+int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
 
 void bch2_journal_write(struct closure *);
 
index 045ee95a92a874a09668e75a5cabae92f9ce27e4..c8729cb37f06808ecec9588ad9a159db124281f2 100644 (file)
@@ -177,6 +177,8 @@ enum journal_errors {
 #undef x
 };
 
+typedef DARRAY(u64)            darray_u64;
+
 /* Embedded in struct bch_fs */
 struct journal {
        /* Fastpath stuff up front: */
@@ -203,6 +205,12 @@ struct journal {
 
        unsigned                buf_size_want;
 
+       /*
+        * We may queue up some things to be journalled (log messages) before
+        * the journal has actually started - stash them here:
+        */
+       darray_u64              early_journal_entries;
+
        /*
         * Two journal entries -- one is currently open for new entries, the
         * other is possibly being written out.
index b325fb105322a8393587813d9852bcbd652ebb4f..bff6267158cc3f6d30bbb3b1ac141d28b8379294 100644 (file)
 #include "nocow_locking.h"
 #include "util.h"
 
+#include <linux/closure.h>
+
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket)
+{
+       u64 dev_bucket = bucket_to_u64(bucket);
+       struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (l->b[i] == dev_bucket && atomic_read(&l->l[i]))
+                       return true;
+       return false;
+}
+
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags)
+{
+       u64 dev_bucket = bucket_to_u64(bucket);
+       struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+       int lock_val = flags ? 1 : -1;
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (l->b[i] == dev_bucket) {
+                       if (!atomic_sub_return(lock_val, &l->l[i]))
+                               closure_wake_up(&l->wait);
+                       return;
+               }
+
+       BUG();
+}
+
+static bool bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
+                                     u64 dev_bucket, int flags)
+{
+       int v, lock_val = flags ? 1 : -1;
+       unsigned i;
+
+       spin_lock(&l->lock);
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (l->b[i] == dev_bucket)
+                       goto got_entry;
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (!atomic_read(&l->l[i])) {
+                       l->b[i] = dev_bucket;
+                       goto take_lock;
+               }
+fail:
+       spin_unlock(&l->lock);
+       return false;
+got_entry:
+       v = atomic_read(&l->l[i]);
+       if (lock_val > 0 ? v < 0 : v > 0)
+               goto fail;
+take_lock:
+       atomic_add(lock_val, &l->l[i]);
+       spin_unlock(&l->lock);
+       return true;
+}
+
 void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
-                             two_state_lock_t *l, int flags)
+                             struct nocow_lock_bucket *l,
+                             u64 dev_bucket, int flags)
+{
+       if (!bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
+               struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
+               u64 start_time = local_clock();
+
+               __closure_wait_event(&l->wait, bch2_bucket_nocow_trylock(l, dev_bucket, flags));
+               bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
+       }
+}
+
+void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t)
 {
-       struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
-       u64 start_time = local_clock();
+       unsigned i, nr_zero = 0;
+       struct nocow_lock_bucket *l;
+
+       for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) {
+               unsigned v = 0;
+
+               for (i = 0; i < ARRAY_SIZE(l->l); i++)
+                       v |= atomic_read(&l->l[i]);
+
+               if (!v) {
+                       nr_zero++;
+                       continue;
+               }
+
+               if (nr_zero)
+                       prt_printf(out, "(%u empty entries)\n", nr_zero);
+               nr_zero = 0;
+
+               for (i = 0; i < ARRAY_SIZE(l->l); i++)
+                       if (atomic_read(&l->l[i]))
+                               prt_printf(out, "%llu: %i ", l->b[i], atomic_read(&l->l[i]));
+               prt_newline(out);
+       }
+
+       if (nr_zero)
+               prt_printf(out, "(%u empty entries)\n", nr_zero);
+}
+
+int bch2_fs_nocow_locking_init(struct bch_fs *c)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
+               spin_lock_init(&c->nocow_locks.l[i].lock);
 
-       __bch2_two_state_lock(l, flags & BUCKET_NOCOW_LOCK_UPDATE);
-       bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
+       return 0;
 }
index 2a7a9f44e88e9eaf5fcf6dbacbcf97a680df5bbe..45258cc34614af8c86e047d3f910489c2484d9fa 100644 (file)
@@ -2,54 +2,38 @@
 #ifndef _BCACHEFS_NOCOW_LOCKING_H
 #define _BCACHEFS_NOCOW_LOCKING_H
 
-#include "bcachefs_format.h"
-#include "two_state_shared_lock.h"
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "nocow_locking_types.h"
 
 #include <linux/hash.h>
 
-#define BUCKET_NOCOW_LOCKS_BITS                10
-#define BUCKET_NOCOW_LOCKS             (1U << BUCKET_NOCOW_LOCKS_BITS)
-
-struct bucket_nocow_lock_table {
-       two_state_lock_t                l[BUCKET_NOCOW_LOCKS];
-};
-
-#define BUCKET_NOCOW_LOCK_UPDATE       (1 << 0)
-
-static inline two_state_lock_t *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
-                                                 struct bpos bucket)
+static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+                                                         u64 dev_bucket)
 {
-       u64 dev_bucket = bucket.inode << 56 | bucket.offset;
        unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS);
 
        return t->l + (h & (BUCKET_NOCOW_LOCKS - 1));
 }
 
-static inline bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t,
-                                              struct bpos bucket)
-{
-       two_state_lock_t *l = bucket_nocow_lock(t, bucket);
-
-       return atomic_long_read(&l->v) != 0;
-}
-
-static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t,
-                                           struct bpos bucket, int flags)
-{
-       two_state_lock_t *l = bucket_nocow_lock(t, bucket);
-
-       bch2_two_state_unlock(l, flags & BUCKET_NOCOW_LOCK_UPDATE);
-}
+#define BUCKET_NOCOW_LOCK_UPDATE       (1 << 0)
 
-void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, two_state_lock_t *, int);
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos);
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int);
+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *,
+                             struct nocow_lock_bucket *, u64, int);
 
 static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
                                          struct bpos bucket, int flags)
 {
-       two_state_lock_t *l = bucket_nocow_lock(t, bucket);
+       u64 dev_bucket = bucket_to_u64(bucket);
+       struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
 
-       if (!bch2_two_state_trylock(l, flags & BUCKET_NOCOW_LOCK_UPDATE))
-               __bch2_bucket_nocow_lock(t, l, flags);
+       __bch2_bucket_nocow_lock(t, l, dev_bucket, flags);
 }
 
+void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
+
+int bch2_fs_nocow_locking_init(struct bch_fs *);
+
 #endif /* _BCACHEFS_NOCOW_LOCKING_H */
diff --git a/libbcachefs/nocow_locking_types.h b/libbcachefs/nocow_locking_types.h
new file mode 100644 (file)
index 0000000..bd12bf6
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H
+#define _BCACHEFS_NOCOW_LOCKING_TYPES_H
+
+#define BUCKET_NOCOW_LOCKS_BITS                10
+#define BUCKET_NOCOW_LOCKS             (1U << BUCKET_NOCOW_LOCKS_BITS)
+
+struct nocow_lock_bucket {
+       struct closure_waitlist         wait;
+       spinlock_t                      lock;
+       u64                             b[4];
+       atomic_t                        l[4];
+} __aligned(SMP_CACHE_BYTES);
+
+struct bucket_nocow_lock_table {
+       struct nocow_lock_bucket        l[BUCKET_NOCOW_LOCKS];
+};
+
+#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */
+
index 0ad2bb2aa039bdec4426b1d98b92672159bb08b7..8c1aa6d2ce45f02f8cea4c782a45be8aa2237b6e 100644 (file)
@@ -588,7 +588,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
        return cmp_int(l->journal_seq, r->journal_seq);
 }
 
-static int bch2_journal_replay(struct bch_fs *c)
+static int bch2_journal_replay(struct bch_fs *c, u64 start_seq, u64 end_seq)
 {
        struct journal_keys *keys = &c->journal_keys;
        struct journal_key **keys_sorted, *k;
@@ -610,6 +610,13 @@ static int bch2_journal_replay(struct bch_fs *c)
             sizeof(keys_sorted[0]),
             journal_sort_seq_cmp, NULL);
 
+       if (keys->nr) {
+               ret = bch2_fs_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
+                                     keys->nr, start_seq, end_seq);
+               if (ret)
+                       goto err;
+       }
+
        for (i = 0; i < keys->nr; i++) {
                k = keys_sorted[i];
 
@@ -639,7 +646,7 @@ static int bch2_journal_replay(struct bch_fs *c)
        ret = bch2_journal_error(j);
 
        if (keys->nr && !ret)
-               bch2_journal_log_msg(&c->journal, "journal replay finished");
+               bch2_fs_log_msg(c, "journal replay finished");
 err:
        kvfree(keys_sorted);
        return ret;
@@ -1045,7 +1052,7 @@ int bch2_fs_recovery(struct bch_fs *c)
        const char *err = "cannot allocate memory";
        struct bch_sb_field_clean *clean = NULL;
        struct jset *last_journal_entry = NULL;
-       u64 blacklist_seq, journal_seq;
+       u64 last_seq, blacklist_seq, journal_seq;
        bool write_sb = false;
        int ret = 0;
 
@@ -1115,7 +1122,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                struct journal_replay **i;
 
                bch_verbose(c, "starting journal read");
-               ret = bch2_journal_read(c, &blacklist_seq, &journal_seq);
+               ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq);
                if (ret)
                        goto err;
 
@@ -1143,7 +1150,15 @@ int bch2_fs_recovery(struct bch_fs *c)
 
                if (!last_journal_entry) {
                        fsck_err_on(!c->sb.clean, c, "no journal entries found");
-                       goto use_clean;
+                       if (clean)
+                               goto use_clean;
+
+                       genradix_for_each_reverse(&c->journal_entries, iter, i)
+                               if (*i) {
+                                       last_journal_entry = &(*i)->j;
+                                       (*i)->ignore = false;
+                                       break;
+                               }
                }
 
                ret = journal_keys_sort(c);
@@ -1189,7 +1204,9 @@ use_clean:
                journal_seq += 8;
 
        if (blacklist_seq != journal_seq) {
-               ret = bch2_journal_seq_blacklist_add(c,
+               ret =   bch2_fs_log_msg(c, "blacklisting entries %llu-%llu",
+                                       blacklist_seq, journal_seq) ?:
+                       bch2_journal_seq_blacklist_add(c,
                                        blacklist_seq, journal_seq);
                if (ret) {
                        bch_err(c, "error creating new journal seq blacklist entry");
@@ -1197,7 +1214,9 @@ use_clean:
                }
        }
 
-       ret = bch2_fs_journal_start(&c->journal, journal_seq);
+       ret =   bch2_fs_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
+                               journal_seq, last_seq, blacklist_seq - 1) ?:
+               bch2_fs_journal_start(&c->journal, journal_seq);
        if (ret)
                goto err;
 
@@ -1264,7 +1283,7 @@ use_clean:
 
                bch_info(c, "starting journal replay, %zu keys", c->journal_keys.nr);
                err = "journal replay failed";
-               ret = bch2_journal_replay(c);
+               ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1);
                if (ret)
                        goto err;
                if (c->opts.verbose || !c->sb.clean)
@@ -1345,7 +1364,7 @@ use_clean:
 
                bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
                err = "journal replay failed";
-               ret = bch2_journal_replay(c);
+               ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1);
                if (ret)
                        goto err;
                if (c->opts.verbose || !c->sb.clean)
index 7cac0567037d3ea9613d4bae2ff793b94e2fe0c3..95c16f70512f1c06bb849ce65b3dac7144eb5000 100644 (file)
@@ -37,6 +37,7 @@
 #include "move.h"
 #include "migrate.h"
 #include "movinggc.h"
+#include "nocow_locking.h"
 #include "quota.h"
 #include "rebalance.h"
 #include "recovery.h"
@@ -803,6 +804,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            bch2_fs_buckets_waiting_for_journal_init(c) ?:
            bch2_fs_subvolumes_init(c) ?:
            bch2_fs_io_init(c) ?:
+           bch2_fs_nocow_locking_init(c) ?:
            bch2_fs_encryption_init(c) ?:
            bch2_fs_compress_init(c) ?:
            bch2_fs_ec_init(c) ?:
index bad3eafd32d28d185d2bed9858624562167f7589..6e49cf98f60d5da80b7626e95079f0121a5d0aca 100644 (file)
@@ -27,6 +27,7 @@
 #include "journal.h"
 #include "keylist.h"
 #include "move.h"
+#include "nocow_locking.h"
 #include "opts.h"
 #include "rebalance.h"
 #include "replicas.h"
@@ -446,22 +447,9 @@ SHOW(bch2_fs)
        if (attr == &sysfs_data_jobs)
                data_progress_to_text(out, c);
 
-       if (attr == &sysfs_nocow_lock_table) {
-               int i, count = 1;
-               long last, curr = 0;
-
-               last = atomic_long_read(&c->nocow_locks.l[0].v);
-               for (i = 1; i < BUCKET_NOCOW_LOCKS; i++) {
-                       curr = atomic_long_read(&c->nocow_locks.l[i].v);
-                       if (last != curr) {
-                               prt_printf(out, "%li: %d\n", last, count);
-                               count = 1;
-                               last = curr;
-                       } else
-                               count++;
-               }
-               prt_printf(out, "%li: %d\n", last, count);
-}
+       if (attr == &sysfs_nocow_lock_table)
+               bch2_nocow_locks_to_text(out, &c->nocow_locks);
+
        return 0;
 }