From 71111771690f244d13650c73d52ff601ad914d95 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Dec 2022 14:47:35 -0500 Subject: [PATCH] Update bcachefs sources to 7958ebe324 bcachefs: Fix alloc_v4_backpointers() --- .bcachefs_revision | 2 +- libbcachefs/alloc_background.h | 14 +++- libbcachefs/alloc_foreground.c | 1 + libbcachefs/bcachefs.h | 2 +- libbcachefs/btree_iter.c | 18 +++++ libbcachefs/btree_iter.h | 2 +- libbcachefs/btree_types.h | 3 +- libbcachefs/btree_update.h | 3 +- libbcachefs/btree_update_leaf.c | 63 ++++++++++++++--- libbcachefs/data_update.c | 1 + libbcachefs/errcode.h | 1 + libbcachefs/io.c | 29 ++++---- libbcachefs/journal.c | 50 ++++--------- libbcachefs/journal.h | 1 - libbcachefs/journal_io.c | 30 ++++---- libbcachefs/journal_io.h | 2 +- libbcachefs/journal_types.h | 8 +++ libbcachefs/nocow_locking.c | 114 ++++++++++++++++++++++++++++-- libbcachefs/nocow_locking.h | 50 +++++-------- libbcachefs/nocow_locking_types.h | 20 ++++++ libbcachefs/recovery.c | 37 +++++++--- libbcachefs/super.c | 2 + libbcachefs/sysfs.c | 20 ++---- 23 files changed, 328 insertions(+), 145 deletions(-) create mode 100644 libbcachefs/nocow_locking_types.h diff --git a/.bcachefs_revision b/.bcachefs_revision index eacbdc3..b1aa968 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -84505cfd37957accbff6fa7e4477bfd9c4c23ba6 +7958ebe32438f58a0e59f240aa288b14efcc0964 diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index d67fab0..a0c3c47 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -23,6 +23,16 @@ static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos) pos.offset < ca->mi.nbuckets; } +static inline u64 bucket_to_u64(struct bpos bucket) +{ + return (bucket.inode << 48) | bucket.offset; +} + +static inline struct bpos u64_to_bucket(u64 bucket) +{ + return POS(bucket >> 48, bucket & ~(~0ULL << 48)); +} + static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) { return a.gen - a.oldest_gen; @@ -190,7 +200,9 @@ void bch2_do_invalidates(struct bch_fs *); static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a) { - return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); + return (void *) ((u64 *) &a->v + + (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: + BCH_ALLOC_V4_U64s_V0)); } static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a) diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 2010a9a..f1cfb90 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -28,6 +28,7 @@ #include "io.h" #include "journal.h" #include "movinggc.h" +#include "nocow_locking.h" #include #include diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 71f5145..febef9a 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -206,7 +206,7 @@ #include "bcachefs_format.h" #include "errcode.h" #include "fifo.h" -#include "nocow_locking.h" +#include "nocow_locking_types.h" #include "opts.h" #include "util.h" diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index fd4358e..9c139a7 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2781,6 +2781,20 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } +static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) +{ + struct bch_fs *c = trans->c; + struct btree_path *path; + + trans_for_each_path(trans, path) + if (path->cached && !btree_node_locked(path, 0)) + path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); + + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + trans->srcu_lock_time = jiffies; +} + /** * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset @@ -2836,6 +2850,9 @@ u32 bch2_trans_begin(struct btree_trans *trans) bch2_trans_relock(trans); } + if (unlikely(time_after(jiffies, trans->srcu_lock_time + HZ))) + bch2_trans_reset_srcu_lock(trans); + trans->last_restarted_ip = _RET_IP_; if (trans->restarted) bch2_btree_path_traverse_all(trans); @@ -2925,6 +2942,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_ trans->nr_max_paths = s->nr_max_paths; trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + trans->srcu_lock_time = jiffies; mutex_lock(&c->btree_trans_lock); list_for_each_entry(pos, &c->btree_trans_list, list) { diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 2f13be6..07c415d 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -459,7 +459,7 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, #define bch2_bkey_alloc(_trans, _iter, _type) \ ({ \ - struct bkey_i_##_type *_k = bch2_trans_kmalloc(_trans, sizeof(*_k));\ + struct bkey_i_##_type *_k = bch2_trans_kmalloc_nomemzero(_trans, sizeof(*_k));\ if (!IS_ERR(_k)) { \ bkey_##_type##_init(&_k->k_i); \ _k->k.p = (_iter)->pos; \ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 13a9302..af86ba1 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -421,6 +421,7 @@ struct btree_trans { enum bch_errcode restarted:16; u32 restart_count; unsigned long last_restarted_ip; + unsigned long srcu_lock_time; /* * For when bch2_trans_update notices we'll be splitting a compressed @@ -442,7 +443,7 @@ struct btree_trans { /* update path: */ struct btree_trans_commit_hook *hooks; - DARRAY(u64) extra_journal_entries; + darray_u64 extra_journal_entries; struct journal_entry_pin *journal_pin; struct journal_res journal_res; diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 1c2e7b2..7e9f1f1 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -82,7 +82,8 @@ void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); int __bch2_trans_commit(struct btree_trans *); -int bch2_trans_log_msg(struct btree_trans *, const char *); +int bch2_trans_log_msg(struct btree_trans *, const char *, ...); +int bch2_fs_log_msg(struct bch_fs *, const char *, ...); /** * bch2_trans_commit - insert keys at given iterator positions diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 75d8a55..a2b37dd 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -1727,18 +1727,25 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, return ret; } -int bch2_trans_log_msg(struct btree_trans *trans, const char *msg) +static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) { - unsigned len = strlen(msg); - unsigned u64s = DIV_ROUND_UP(len, sizeof(u64)); + struct printbuf buf = PRINTBUF; struct jset_entry_log *l; + unsigned u64s; int ret; - ret = darray_make_room(&trans->extra_journal_entries, jset_u64s(u64s)); + prt_vprintf(&buf, fmt, args); + ret = buf.allocation_failure ? -ENOMEM : 0; if (ret) - return ret; + goto err; + + u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); + + ret = darray_make_room(entries, jset_u64s(u64s)); + if (ret) + goto err; - l = (void *) &darray_top(trans->extra_journal_entries); + l = (void *) &darray_top(*entries); l->entry.u64s = cpu_to_le16(u64s); l->entry.btree_id = 0; l->entry.level = 1; @@ -1746,10 +1753,44 @@ int bch2_trans_log_msg(struct btree_trans *trans, const char *msg) l->entry.pad[0] = 0; l->entry.pad[1] = 0; l->entry.pad[2] = 0; - memcpy(l->d, msg, len); - while (len & 7) - l->d[len++] = '\0'; + memcpy(l->d, buf.buf, buf.pos); + while (buf.pos & 7) + l->d[buf.pos++] = '\0'; + + entries->nr += jset_u64s(u64s); +err: + printbuf_exit(&buf); + return ret; +} + +int bch2_trans_log_msg(struct btree_trans *trans, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args); + va_end(args); + + return ret; +} + +int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + + if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) { + ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); + } else { + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, + __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args)); + } + + va_end(args); + + return ret; - trans->extra_journal_entries.nr += jset_u64s(u64s); - return 0; } diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index d4cdfb4..7ef7bb6 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -11,6 +11,7 @@ #include "io.h" #include "keylist.h" #include "move.h" +#include "nocow_locking.h" #include "subvolume.h" #include diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 543cdb5..6217096 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -52,6 +52,7 @@ x(BCH_ERR_no_btree_node, no_btree_node_down) \ x(BCH_ERR_no_btree_node, no_btree_node_init) \ x(BCH_ERR_no_btree_node, no_btree_node_cached) \ + x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \ x(0, btree_insert_fail) \ x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \ diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 706f18b..d215973 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -27,6 +27,7 @@ #include "journal.h" #include "keylist.h" #include "move.h" +#include "nocow_locking.h" #include "rebalance.h" #include "subvolume.h" #include "super.h" @@ -753,15 +754,17 @@ static void __bch2_write_index(struct bch_write_op *op) op->written += sectors_start - keylist_sectors(keys); - if (ret) { + if (ret && !bch2_err_matches(ret, EROFS)) { struct bkey_i *k = bch2_keylist_front(&op->insert_keys); bch_err_inum_offset_ratelimited(c, k->k.p.inode, k->k.p.offset << 9, "write error while doing btree update: %s", bch2_err_str(ret)); - goto err; } + + if (ret) + goto err; } out: /* If some a bucket wasn't written, we can't erasure code it: */ @@ -1362,13 +1365,16 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size); })); - if (ret) { + if (ret && !bch2_err_matches(ret, EROFS)) { struct bkey_i *k = bch2_keylist_front(&op->insert_keys); bch_err_inum_offset_ratelimited(c, k->k.p.inode, k->k.p.offset << 9, "write error while doing btree update: %s", bch2_err_str(ret)); + } + + if (ret) { op->error = ret; break; } @@ -1406,7 +1412,7 @@ static void bch2_nocow_write(struct bch_write_op *op) struct { struct bpos b; unsigned gen; - two_state_lock_t *l; + struct nocow_lock_bucket *l; } buckets[BCH_REPLICAS_MAX]; unsigned nr_buckets = 0; u32 snapshot; @@ -1453,7 +1459,8 @@ retry: buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); buckets[nr_buckets].gen = ptr->gen; buckets[nr_buckets].l = - bucket_nocow_lock(&c->nocow_locks, buckets[nr_buckets].b); + bucket_nocow_lock(&c->nocow_locks, + bucket_to_u64(buckets[nr_buckets].b)); prefetch(buckets[nr_buckets].l); nr_buckets++; @@ -1475,11 +1482,12 @@ retry: for (i = 0; i < nr_buckets; i++) { struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); - two_state_lock_t *l = buckets[i].l; + struct nocow_lock_bucket *l = buckets[i].l; bool stale; - if (!bch2_two_state_trylock(l, BUCKET_NOCOW_LOCK_UPDATE)) - __bch2_bucket_nocow_lock(&c->nocow_locks, l, BUCKET_NOCOW_LOCK_UPDATE); + __bch2_bucket_nocow_lock(&c->nocow_locks, l, + bucket_to_u64(buckets[i].b), + BUCKET_NOCOW_LOCK_UPDATE); rcu_read_lock(); stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); @@ -2905,11 +2913,6 @@ void bch2_fs_io_exit(struct bch_fs *c) int bch2_fs_io_init(struct bch_fs *c) { - unsigned i; - - for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++) - two_state_lock_init(&c->nocow_locks.l[i]); - if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), BIOSET_NEED_BVECS) || bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index e35b685..1cbca18 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -244,7 +244,7 @@ static int journal_entry_open(struct journal *j) journal_entry_overhead(j); u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); - if (u64s <= 0) + if (u64s <= (ssize_t) j->early_journal_entries.nr) return JOURNAL_ERR_journal_full; if (fifo_empty(&j->pin) && j->reclaim_thread) @@ -269,6 +269,12 @@ static int journal_entry_open(struct journal *j) buf->data->seq = cpu_to_le64(journal_cur_seq(j)); buf->data->u64s = 0; + if (j->early_journal_entries.nr) { + memcpy(buf->data->_data, j->early_journal_entries.data, + j->early_journal_entries.nr * sizeof(u64)); + le32_add_cpu(&buf->data->u64s, j->early_journal_entries.nr); + } + /* * Must be set before marking the journal entry as open: */ @@ -285,7 +291,9 @@ static int journal_entry_open(struct journal *j) BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK)); journal_state_inc(&new); - new.cur_entry_offset = 0; + + /* Handle any already added entries */ + new.cur_entry_offset = le32_to_cpu(buf->data->u64s); } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); @@ -298,6 +306,9 @@ static int journal_entry_open(struct journal *j) &j->write_work, msecs_to_jiffies(c->opts.journal_flush_delay)); journal_wake(j); + + if (j->early_journal_entries.nr) + darray_exit(&j->early_journal_entries); return 0; } @@ -717,39 +728,6 @@ int bch2_journal_meta(struct journal *j) return bch2_journal_flush_seq(j, res.seq); } -int bch2_journal_log_msg(struct journal *j, const char *fmt, ...) -{ - struct jset_entry_log *entry; - struct journal_res res = { 0 }; - unsigned msglen, u64s; - va_list args; - int ret; - - va_start(args, fmt); - msglen = vsnprintf(NULL, 0, fmt, args) + 1; - va_end(args); - - u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64))); - - ret = bch2_journal_res_get(j, &res, u64s, 0); - if (ret) - return ret; - - entry = container_of(journal_res_entry(j, &res), - struct jset_entry_log, entry); - memset(entry, 0, u64s * sizeof(u64)); - entry->entry.type = BCH_JSET_ENTRY_log; - entry->entry.u64s = u64s - 1; - - va_start(args, fmt); - vsnprintf(entry->d, INT_MAX, fmt, args); - va_end(args); - - bch2_journal_res_put(j, &res); - - return bch2_journal_flush_seq(j, res.seq); -} - /* block/unlock the journal: */ void bch2_journal_unblock(struct journal *j) @@ -1192,6 +1170,8 @@ void bch2_fs_journal_exit(struct journal *j) { unsigned i; + darray_exit(&j->early_journal_entries); + for (i = 0; i < ARRAY_SIZE(j->buf); i++) kvpfree(j->buf[i].data, j->buf[i].buf_size); free_fifo(&j->pin); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 896a2d7..ee37f90 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -497,7 +497,6 @@ int bch2_journal_flush_seq(struct journal *, u64); int bch2_journal_flush(struct journal *); bool bch2_journal_noflush_seq(struct journal *, u64); int bch2_journal_meta(struct journal *); -int bch2_journal_log_msg(struct journal *, const char *, ...); void bch2_journal_halt(struct journal *); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 2b1974a..d6f2593 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1080,7 +1080,10 @@ void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } -int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) +int bch2_journal_read(struct bch_fs *c, + u64 *last_seq, + u64 *blacklist_seq, + u64 *start_seq) { struct journal_list jlist; struct journal_replay *i, **_i, *prev = NULL; @@ -1089,7 +1092,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) unsigned iter; struct printbuf buf = PRINTBUF; bool degraded = false, last_write_torn = false; - u64 seq, last_seq = 0; + u64 seq; int ret = 0; closure_init_stack(&jlist.cl); @@ -1118,15 +1121,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) if (jlist.ret) return jlist.ret; + *last_seq = 0; *start_seq = 0; *blacklist_seq = 0; /* * Find most recent flush entry, and ignore newer non flush entries - * those entries will be blacklisted: - * - * - * XXX check for torn write on last journal entry */ genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { int write = READ; @@ -1140,13 +1141,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1; if (JSET_NO_FLUSH(&i->j)) { - journal_replay_free(c, i); + i->ignore = true; continue; } if (!last_write_torn && !i->csum_good) { last_write_torn = true; - journal_replay_free(c, i); + i->ignore = true; continue; } @@ -1157,7 +1158,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) le64_to_cpu(i->j.seq))) i->j.last_seq = i->j.seq; - last_seq = le64_to_cpu(i->j.last_seq); + *last_seq = le64_to_cpu(i->j.last_seq); *blacklist_seq = le64_to_cpu(i->j.seq) + 1; break; } @@ -1167,13 +1168,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) return 0; } - if (!last_seq) { + if (!*last_seq) { fsck_err(c, "journal read done, but no entries found after dropping non-flushes"); return 0; } bch_info(c, "journal read done, replaying entries %llu-%llu", - last_seq, *blacklist_seq - 1); + *last_seq, *blacklist_seq - 1); if (*start_seq != *blacklist_seq) bch_info(c, "dropped unflushed entries %llu-%llu", @@ -1187,7 +1188,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) continue; seq = le64_to_cpu(i->j.seq); - if (seq < last_seq) { + if (seq < *last_seq) { journal_replay_free(c, i); continue; } @@ -1195,13 +1196,12 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) if (bch2_journal_seq_is_blacklisted(c, seq, true)) { fsck_err_on(!JSET_NO_FLUSH(&i->j), c, "found blacklisted journal entry %llu", seq); - - journal_replay_free(c, i); + i->ignore = true; } } /* Check for missing entries: */ - seq = last_seq; + seq = *last_seq; genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1239,7 +1239,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) " prev at %s\n" " next at %s", missing_start, missing_end, - last_seq, *blacklist_seq - 1, + *last_seq, *blacklist_seq - 1, buf1.buf, buf2.buf); printbuf_exit(&buf1); diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h index 2f8bbf0..a32c287 100644 --- a/libbcachefs/journal_io.h +++ b/libbcachefs/journal_io.h @@ -52,7 +52,7 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, struct journal_replay *); -int bch2_journal_read(struct bch_fs *, u64 *, u64 *); +int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *); void bch2_journal_write(struct closure *); diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 045ee95..c8729cb 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -177,6 +177,8 @@ enum journal_errors { #undef x }; +typedef DARRAY(u64) darray_u64; + /* Embedded in struct bch_fs */ struct journal { /* Fastpath stuff up front: */ @@ -203,6 +205,12 @@ struct journal { unsigned buf_size_want; + /* + * We may queue up some things to be journalled (log messages) before + * the journal has actually started - stash them here: + */ + darray_u64 early_journal_entries; + /* * Two journal entries -- one is currently open for new entries, the * other is possibly being written out. diff --git a/libbcachefs/nocow_locking.c b/libbcachefs/nocow_locking.c index b325fb1..bff6267 100644 --- a/libbcachefs/nocow_locking.c +++ b/libbcachefs/nocow_locking.c @@ -4,12 +4,116 @@ #include "nocow_locking.h" #include "util.h" +#include + +bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket) +{ + u64 dev_bucket = bucket_to_u64(bucket); + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); + unsigned i; + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (l->b[i] == dev_bucket && atomic_read(&l->l[i])) + return true; + return false; +} + +void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags) +{ + u64 dev_bucket = bucket_to_u64(bucket); + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); + int lock_val = flags ? 1 : -1; + unsigned i; + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (l->b[i] == dev_bucket) { + if (!atomic_sub_return(lock_val, &l->l[i])) + closure_wake_up(&l->wait); + return; + } + + BUG(); +} + +static bool bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, + u64 dev_bucket, int flags) +{ + int v, lock_val = flags ? 1 : -1; + unsigned i; + + spin_lock(&l->lock); + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (l->b[i] == dev_bucket) + goto got_entry; + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (!atomic_read(&l->l[i])) { + l->b[i] = dev_bucket; + goto take_lock; + } +fail: + spin_unlock(&l->lock); + return false; +got_entry: + v = atomic_read(&l->l[i]); + if (lock_val > 0 ? v < 0 : v > 0) + goto fail; +take_lock: + atomic_add(lock_val, &l->l[i]); + spin_unlock(&l->lock); + return true; +} + void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, - two_state_lock_t *l, int flags) + struct nocow_lock_bucket *l, + u64 dev_bucket, int flags) +{ + if (!bch2_bucket_nocow_trylock(l, dev_bucket, flags)) { + struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); + u64 start_time = local_clock(); + + __closure_wait_event(&l->wait, bch2_bucket_nocow_trylock(l, dev_bucket, flags)); + bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); + } +} + +void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t) { - struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); - u64 start_time = local_clock(); + unsigned i, nr_zero = 0; + struct nocow_lock_bucket *l; + + for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) { + unsigned v = 0; + + for (i = 0; i < ARRAY_SIZE(l->l); i++) + v |= atomic_read(&l->l[i]); + + if (!v) { + nr_zero++; + continue; + } + + if (nr_zero) + prt_printf(out, "(%u empty entries)\n", nr_zero); + nr_zero = 0; + + for (i = 0; i < ARRAY_SIZE(l->l); i++) + if (atomic_read(&l->l[i])) + prt_printf(out, "%llu: %i ", l->b[i], atomic_read(&l->l[i])); + prt_newline(out); + } + + if (nr_zero) + prt_printf(out, "(%u empty entries)\n", nr_zero); +} + +int bch2_fs_nocow_locking_init(struct bch_fs *c) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++) + spin_lock_init(&c->nocow_locks.l[i].lock); - __bch2_two_state_lock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); - bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); + return 0; } diff --git a/libbcachefs/nocow_locking.h b/libbcachefs/nocow_locking.h index 2a7a9f4..45258cc 100644 --- a/libbcachefs/nocow_locking.h +++ b/libbcachefs/nocow_locking.h @@ -2,54 +2,38 @@ #ifndef _BCACHEFS_NOCOW_LOCKING_H #define _BCACHEFS_NOCOW_LOCKING_H -#include "bcachefs_format.h" -#include "two_state_shared_lock.h" +#include "bcachefs.h" +#include "alloc_background.h" +#include "nocow_locking_types.h" #include -#define BUCKET_NOCOW_LOCKS_BITS 10 -#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) - -struct bucket_nocow_lock_table { - two_state_lock_t l[BUCKET_NOCOW_LOCKS]; -}; - -#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) - -static inline two_state_lock_t *bucket_nocow_lock(struct bucket_nocow_lock_table *t, - struct bpos bucket) +static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t, + u64 dev_bucket) { - u64 dev_bucket = bucket.inode << 56 | bucket.offset; unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS); return t->l + (h & (BUCKET_NOCOW_LOCKS - 1)); } -static inline bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, - struct bpos bucket) -{ - two_state_lock_t *l = bucket_nocow_lock(t, bucket); - - return atomic_long_read(&l->v) != 0; -} - -static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, - struct bpos bucket, int flags) -{ - two_state_lock_t *l = bucket_nocow_lock(t, bucket); - - bch2_two_state_unlock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); -} +#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) -void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, two_state_lock_t *, int); +bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos); +void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int); +void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, + struct nocow_lock_bucket *, u64, int); static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags) { - two_state_lock_t *l = bucket_nocow_lock(t, bucket); + u64 dev_bucket = bucket_to_u64(bucket); + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); - if (!bch2_two_state_trylock(l, flags & BUCKET_NOCOW_LOCK_UPDATE)) - __bch2_bucket_nocow_lock(t, l, flags); + __bch2_bucket_nocow_lock(t, l, dev_bucket, flags); } +void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *); + +int bch2_fs_nocow_locking_init(struct bch_fs *); + #endif /* _BCACHEFS_NOCOW_LOCKING_H */ diff --git a/libbcachefs/nocow_locking_types.h b/libbcachefs/nocow_locking_types.h new file mode 100644 index 0000000..bd12bf6 --- /dev/null +++ b/libbcachefs/nocow_locking_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H +#define _BCACHEFS_NOCOW_LOCKING_TYPES_H + +#define BUCKET_NOCOW_LOCKS_BITS 10 +#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) + +struct nocow_lock_bucket { + struct closure_waitlist wait; + spinlock_t lock; + u64 b[4]; + atomic_t l[4]; +} __aligned(SMP_CACHE_BYTES); + +struct bucket_nocow_lock_table { + struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS]; +}; + +#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */ + diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 0ad2bb2..8c1aa6d 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -588,7 +588,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq, r->journal_seq); } -static int bch2_journal_replay(struct bch_fs *c) +static int bch2_journal_replay(struct bch_fs *c, u64 start_seq, u64 end_seq) { struct journal_keys *keys = &c->journal_keys; struct journal_key **keys_sorted, *k; @@ -610,6 +610,13 @@ static int bch2_journal_replay(struct bch_fs *c) sizeof(keys_sorted[0]), journal_sort_seq_cmp, NULL); + if (keys->nr) { + ret = bch2_fs_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", + keys->nr, start_seq, end_seq); + if (ret) + goto err; + } + for (i = 0; i < keys->nr; i++) { k = keys_sorted[i]; @@ -639,7 +646,7 @@ static int bch2_journal_replay(struct bch_fs *c) ret = bch2_journal_error(j); if (keys->nr && !ret) - bch2_journal_log_msg(&c->journal, "journal replay finished"); + bch2_fs_log_msg(c, "journal replay finished"); err: kvfree(keys_sorted); return ret; @@ -1045,7 +1052,7 @@ int bch2_fs_recovery(struct bch_fs *c) const char *err = "cannot allocate memory"; struct bch_sb_field_clean *clean = NULL; struct jset *last_journal_entry = NULL; - u64 blacklist_seq, journal_seq; + u64 last_seq, blacklist_seq, journal_seq; bool write_sb = false; int ret = 0; @@ -1115,7 +1122,7 @@ int bch2_fs_recovery(struct bch_fs *c) struct journal_replay **i; bch_verbose(c, "starting journal read"); - ret = bch2_journal_read(c, &blacklist_seq, &journal_seq); + ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq); if (ret) goto err; @@ -1143,7 +1150,15 @@ int bch2_fs_recovery(struct bch_fs *c) if (!last_journal_entry) { fsck_err_on(!c->sb.clean, c, "no journal entries found"); - goto use_clean; + if (clean) + goto use_clean; + + genradix_for_each_reverse(&c->journal_entries, iter, i) + if (*i) { + last_journal_entry = &(*i)->j; + (*i)->ignore = false; + break; + } } ret = journal_keys_sort(c); @@ -1189,7 +1204,9 @@ use_clean: journal_seq += 8; if (blacklist_seq != journal_seq) { - ret = bch2_journal_seq_blacklist_add(c, + ret = bch2_fs_log_msg(c, "blacklisting entries %llu-%llu", + blacklist_seq, journal_seq) ?: + bch2_journal_seq_blacklist_add(c, blacklist_seq, journal_seq); if (ret) { bch_err(c, "error creating new journal seq blacklist entry"); @@ -1197,7 +1214,9 @@ use_clean: } } - ret = bch2_fs_journal_start(&c->journal, journal_seq); + ret = bch2_fs_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", + journal_seq, last_seq, blacklist_seq - 1) ?: + bch2_fs_journal_start(&c->journal, journal_seq); if (ret) goto err; @@ -1264,7 +1283,7 @@ use_clean: bch_info(c, "starting journal replay, %zu keys", c->journal_keys.nr); err = "journal replay failed"; - ret = bch2_journal_replay(c); + ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1); if (ret) goto err; if (c->opts.verbose || !c->sb.clean) @@ -1345,7 +1364,7 @@ use_clean: bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr); err = "journal replay failed"; - ret = bch2_journal_replay(c); + ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1); if (ret) goto err; if (c->opts.verbose || !c->sb.clean) diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 7cac056..95c16f7 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -37,6 +37,7 @@ #include "move.h" #include "migrate.h" #include "movinggc.h" +#include "nocow_locking.h" #include "quota.h" #include "rebalance.h" #include "recovery.h" @@ -803,6 +804,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_buckets_waiting_for_journal_init(c) ?: bch2_fs_subvolumes_init(c) ?: bch2_fs_io_init(c) ?: + bch2_fs_nocow_locking_init(c) ?: bch2_fs_encryption_init(c) ?: bch2_fs_compress_init(c) ?: bch2_fs_ec_init(c) ?: diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index bad3eaf..6e49cf9 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -27,6 +27,7 @@ #include "journal.h" #include "keylist.h" #include "move.h" +#include "nocow_locking.h" #include "opts.h" #include "rebalance.h" #include "replicas.h" @@ -446,22 +447,9 @@ SHOW(bch2_fs) if (attr == &sysfs_data_jobs) data_progress_to_text(out, c); - if (attr == &sysfs_nocow_lock_table) { - int i, count = 1; - long last, curr = 0; - - last = atomic_long_read(&c->nocow_locks.l[0].v); - for (i = 1; i < BUCKET_NOCOW_LOCKS; i++) { - curr = atomic_long_read(&c->nocow_locks.l[i].v); - if (last != curr) { - prt_printf(out, "%li: %d\n", last, count); - count = 1; - last = curr; - } else - count++; - } - prt_printf(out, "%li: %d\n", last, count); -} + if (attr == &sysfs_nocow_lock_table) + bch2_nocow_locks_to_text(out, &c->nocow_locks); + return 0; } -- 2.39.2