From 74148a8ee52526c44752f1773365963f18734ac9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 21 Mar 2022 02:10:28 -0400 Subject: [PATCH] Update bcachefs sources to 0e705f5944 fixup! bcachefs: Refactor bch2_btree_node_mem_alloc() --- .bcachefs_revision | 2 +- include/trace/events/bcachefs.h | 82 +++++--- libbcachefs.c | 10 +- libbcachefs/alloc_foreground.c | 22 ++- libbcachefs/alloc_foreground.h | 2 + libbcachefs/alloc_types.h | 14 +- libbcachefs/bcachefs_format.h | 30 +-- libbcachefs/btree_cache.c | 4 +- libbcachefs/btree_gc.c | 2 +- libbcachefs/btree_io.c | 2 +- libbcachefs/btree_iter.c | 16 +- libbcachefs/btree_key_cache.c | 2 +- libbcachefs/btree_types.h | 2 +- libbcachefs/btree_update.h | 7 +- libbcachefs/btree_update_interior.c | 12 +- libbcachefs/btree_update_leaf.c | 25 ++- libbcachefs/buckets.h | 8 +- libbcachefs/darray.h | 291 ++++++++++++++++++++++++++++ libbcachefs/ec.c | 21 +- libbcachefs/fs-io.c | 6 +- libbcachefs/io.h | 4 +- libbcachefs/journal.c | 70 ++++--- libbcachefs/journal.h | 53 +++-- libbcachefs/journal_io.c | 25 ++- libbcachefs/journal_reclaim.c | 8 +- libbcachefs/journal_types.h | 41 +++- libbcachefs/move.c | 3 +- libbcachefs/movinggc.c | 23 +-- libbcachefs/opts.c | 55 +++--- libbcachefs/opts.h | 8 +- libbcachefs/recovery.c | 5 +- libbcachefs/super-io.c | 24 ++- libbcachefs/super.c | 2 +- libbcachefs/sysfs.c | 4 +- libbcachefs/xattr.c | 2 +- 35 files changed, 647 insertions(+), 240 deletions(-) create mode 100644 libbcachefs/darray.h diff --git a/.bcachefs_revision b/.bcachefs_revision index be0ed05..3072ae8 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -f05b3c1af906802e46f9caca13fb6260d8293fdf +0e705f5944069d3ded1d9238f7805dd210e79a25 diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 832e9f1..08de7e6 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -468,58 +468,62 @@ TRACE_EVENT(invalidate, ); DECLARE_EVENT_CLASS(bucket_alloc, - TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), - TP_ARGS(ca, reserve), + TP_PROTO(struct bch_dev *ca, const char *alloc_reserve), + TP_ARGS(ca, alloc_reserve), TP_STRUCT__entry( __field(dev_t, dev ) - __field(enum alloc_reserve, reserve ) + __array(char, reserve, 16 ) ), TP_fast_assign( __entry->dev = ca->dev; - __entry->reserve = reserve; + strlcpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve)); ), - TP_printk("%d,%d reserve %d", + TP_printk("%d,%d reserve %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->reserve) ); DEFINE_EVENT(bucket_alloc, bucket_alloc, - TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), - TP_ARGS(ca, reserve) + TP_PROTO(struct bch_dev *ca, const char *alloc_reserve), + TP_ARGS(ca, alloc_reserve) ); TRACE_EVENT(bucket_alloc_fail, - TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve, - u64 avail, u64 need_journal_commit), - TP_ARGS(ca, reserve, avail, need_journal_commit), + TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, + u64 avail, u64 need_journal_commit, + bool nonblocking), + TP_ARGS(ca, alloc_reserve, avail, need_journal_commit, nonblocking), TP_STRUCT__entry( - __field(dev_t, dev ) - __field(enum alloc_reserve, reserve ) - __field(u64, avail ) - __field(u64, need_journal_commit ) + __field(dev_t, dev ) + __array(char, reserve, 16 ) + __field(u64, avail ) + __field(u64, need_journal_commit ) + __field(bool, nonblocking ) ), TP_fast_assign( __entry->dev = ca->dev; - __entry->reserve = reserve; + strlcpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve)); __entry->avail = avail; __entry->need_journal_commit = need_journal_commit; + __entry->nonblocking = nonblocking; ), - TP_printk("%d,%d reserve %d avail %llu need_journal_commit %llu", + TP_printk("%d,%d reserve %s avail %llu need_journal_commit %llu nonblocking %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->reserve, __entry->avail, - __entry->need_journal_commit) + __entry->need_journal_commit, + __entry->nonblocking) ); DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail, - TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), - TP_ARGS(ca, reserve) + TP_PROTO(struct bch_dev *ca, const char *alloc_reserve), + TP_ARGS(ca, alloc_reserve) ); /* Moving IO */ @@ -939,12 +943,46 @@ TRACE_EVENT(trans_restart_mem_realloced, __entry->bytes) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_key_cache_key_realloced, +TRACE_EVENT(trans_restart_key_cache_key_realloced, TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct bpos *pos, + unsigned old_u64s, + unsigned new_u64s), + TP_ARGS(trans_fn, caller_ip, btree_id, pos, old_u64s, new_u64s), + + TP_STRUCT__entry( + __array(char, trans_fn, 24 ) + __field(unsigned long, caller_ip ) + __field(enum btree_id, btree_id ) + __field(u64, inode ) + __field(u64, offset ) + __field(u32, snapshot ) + __field(u32, old_u64s ) + __field(u32, new_u64s ) + ), + + TP_fast_assign( + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->btree_id = btree_id; + __entry->inode = pos->inode; + __entry->offset = pos->offset; + __entry->snapshot = pos->snapshot; + __entry->old_u64s = old_u64s; + __entry->new_u64s = new_u64s; + ), + + TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u", + __entry->trans_fn, + (void *) __entry->caller_ip, + bch2_btree_ids[__entry->btree_id], + __entry->inode, + __entry->offset, + __entry->snapshot, + __entry->old_u64s, + __entry->new_u64s) ); #endif /* _TRACE_BCACHE_H */ diff --git a/libbcachefs.c b/libbcachefs.c index 1c780be..ceca428 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -597,6 +597,7 @@ next: struct bch_opts bch2_parse_opts(struct bch_opt_strs strs) { struct bch_opts opts = bch2_opts_empty(); + struct printbuf err = PRINTBUF; unsigned i; int ret; u64 v; @@ -606,17 +607,16 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs) bch2_opt_table[i].type == BCH_OPT_FN) continue; - ret = bch2_opt_parse(NULL, "option", + ret = bch2_opt_parse(NULL, &bch2_opt_table[i], - strs.by_id[i], &v); + strs.by_id[i], &v, &err); if (ret < 0) - die("Invalid %s: %s", - bch2_opt_table[i].attr.name, - strerror(-ret)); + die("Invalid option %s", err.buf); bch2_opt_set_by_id(&opts, i, v); } + printbuf_exit(&err); return opts; } diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 178d7c0..5b11493 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -32,6 +32,13 @@ #include #include +const char * const bch2_alloc_reserves[] = { +#define x(t) #t, + BCH_ALLOC_RESERVES() +#undef x + NULL +}; + /* * Open buckets represent a bucket that's currently being allocated from. They * serve two purposes: @@ -172,10 +179,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca) static inline unsigned open_buckets_reserved(enum alloc_reserve reserve) { switch (reserve) { - case RESERVE_BTREE: - case RESERVE_BTREE_MOVINGGC: + case RESERVE_btree: + case RESERVE_btree_movinggc: return 0; - case RESERVE_MOVINGGC: + case RESERVE_movinggc: return OPEN_BUCKETS_COUNT / 4; default: return OPEN_BUCKETS_COUNT / 2; @@ -213,7 +220,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * spin_unlock(&c->freelist_lock); - trace_open_bucket_alloc_fail(ca, reserve); + trace_open_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve]); return ERR_PTR(-OPEN_BUCKETS_EMPTY); } @@ -254,7 +261,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * spin_unlock(&c->freelist_lock); - trace_bucket_alloc(ca, reserve); + trace_bucket_alloc(ca, bch2_alloc_reserves[reserve]); return ob; } @@ -487,7 +494,8 @@ err: ob = ERR_PTR(ret ?: -FREELIST_EMPTY); if (ob == ERR_PTR(-FREELIST_EMPTY)) { - trace_bucket_alloc_fail(ca, reserve, avail, need_journal_commit); + trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], avail, + need_journal_commit, cl == NULL); atomic_long_inc(&c->bucket_alloc_fail); } @@ -521,7 +529,7 @@ void bch2_dev_stripe_increment(struct bch_dev *ca, struct dev_stripe_state *stripe) { u64 *v = stripe->next_alloc + ca->dev_idx; - u64 free_space = dev_buckets_available(ca, RESERVE_NONE); + u64 free_space = dev_buckets_available(ca, RESERVE_none); u64 free_space_inv = free_space ? div64_u64(1ULL << 48, free_space) : 1ULL << 48; diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h index f51cec5..8bc7887 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc_foreground.h @@ -12,6 +12,8 @@ struct bch_dev; struct bch_fs; struct bch_devs_List; +extern const char * const bch2_alloc_reserves[]; + struct dev_alloc_list { unsigned nr; u8 devs[BCH_SB_MEMBERS_MAX]; diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h index 22e1fbd..21b5645 100644 --- a/libbcachefs/alloc_types.h +++ b/libbcachefs/alloc_types.h @@ -10,12 +10,16 @@ struct ec_bucket_buf; +#define BCH_ALLOC_RESERVES() \ + x(btree_movinggc) \ + x(btree) \ + x(movinggc) \ + x(none) + enum alloc_reserve { - RESERVE_BTREE_MOVINGGC = -2, - RESERVE_BTREE = -1, - RESERVE_MOVINGGC = 0, - RESERVE_NONE = 1, - RESERVE_NR = 2, +#define x(name) RESERVE_##name, + BCH_ALLOC_RESERVES() +#undef x }; #define OPEN_BUCKETS_COUNT 1024 diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index bb54ac1..3382355 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1312,20 +1312,24 @@ struct bch_sb_field_journal_seq_blacklist { #define BCH_JSET_VERSION_OLD 2 #define BCH_BSET_VERSION_OLD 3 +#define BCH_METADATA_VERSIONS() \ + x(bkey_renumber, 10) \ + x(inode_btree_change, 11) \ + x(snapshot, 12) \ + x(inode_backpointers, 13) \ + x(btree_ptr_sectors_written, 14) \ + x(snapshot_2, 15) \ + x(reflink_p_fix, 16) \ + x(subvol_dirent, 17) \ + x(inode_v2, 18) \ + x(freespace, 19) + enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, - bcachefs_metadata_version_new_versioning = 10, - bcachefs_metadata_version_bkey_renumber = 10, - bcachefs_metadata_version_inode_btree_change = 11, - bcachefs_metadata_version_snapshot = 12, - bcachefs_metadata_version_inode_backpointers = 13, - bcachefs_metadata_version_btree_ptr_sectors_written = 14, - bcachefs_metadata_version_snapshot_2 = 15, - bcachefs_metadata_version_reflink_p_fix = 16, - bcachefs_metadata_version_subvol_dirent = 17, - bcachefs_metadata_version_inode_v2 = 18, - bcachefs_metadata_version_freespace = 19, - bcachefs_metadata_version_max = 20, + bcachefs_metadata_version_min = 9, +#define x(t, n) bcachefs_metadata_version_##t = n, + BCH_METADATA_VERSIONS() +#undef x + bcachefs_metadata_version_max }; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index d91408c..0dcdc30 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -652,6 +652,8 @@ err_locked: /* Try to cannibalize another cached btree node: */ if (bc->alloc_lock == current) { b2 = btree_node_cannibalize(c); + bch2_btree_node_hash_remove(bc, b2); + if (b) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); @@ -665,8 +667,6 @@ err_locked: mutex_unlock(&bc->lock); - bch2_btree_node_hash_remove(bc, b); - trace_btree_node_cannibalize(c); goto out; } diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 5c54a0c..ba81043 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1367,7 +1367,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, if (IS_ERR(a)) return PTR_ERR(a); - ret = bch2_trans_update(trans, iter, &a->k, 0); + ret = bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN); fsck_err: return ret; } diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 1df454f..a801400 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1891,7 +1891,7 @@ do_write: BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); - i->version = c->sb.version < bcachefs_metadata_version_new_versioning + i->version = c->sb.version < bcachefs_metadata_version_bkey_renumber ? cpu_to_le16(BCH_BSET_VERSION_OLD) : cpu_to_le16(c->sb.version); SET_BSET_OFFSET(i, b->written); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 7fd0379..56c493c 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1816,21 +1816,29 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) { struct btree_insert_entry *i; - pr_buf(buf, "transaction updates for %s journal seq %llu\n", + pr_buf(buf, "transaction updates for %s journal seq %llu", trans->fn, trans->journal_res.seq); + pr_newline(buf); + pr_indent_push(buf, 2); trans_for_each_update(trans, i) { struct bkey_s_c old = { &i->old_k, i->old_v }; - pr_buf(buf, "update: btree %s %pS\n old ", + pr_buf(buf, "update: btree %s %pS", bch2_btree_ids[i->btree_id], (void *) i->ip_allocated); + pr_newline(buf); + pr_buf(buf, " old "); bch2_bkey_val_to_text(buf, trans->c, old); - pr_buf(buf, "\n new "); + pr_newline(buf); + + pr_buf(buf, " new "); bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k)); - pr_buf(buf, "\n"); + pr_newline(buf); } + + pr_indent_pop(buf, 2); } noinline __cold diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index b1b7a30..f5a942b 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -421,7 +421,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| (ck->journal.seq == journal_last_seq(j) - ? BTREE_INSERT_JOURNAL_RESERVED + ? JOURNAL_WATERMARK_reserved : 0)| commit_flags); if (ret) { diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 788b981..993f04f 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -326,7 +326,7 @@ struct bkey_cached { struct btree_bkey_cached_common c; unsigned long flags; - u8 u64s; + u16 u64s; bool valid; u32 btree_trans_barrier_seq; struct bkey_cached_key key; diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index d9a406a..ca142f9 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -16,12 +16,12 @@ bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *, void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); enum btree_insert_flags { - __BTREE_INSERT_NOFAIL, + /* First two bits for journal watermark: */ + __BTREE_INSERT_NOFAIL = 2, __BTREE_INSERT_NOCHECK_RW, __BTREE_INSERT_LAZY_RW, __BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_JOURNAL_REPLAY, - __BTREE_INSERT_JOURNAL_RESERVED, __BTREE_INSERT_JOURNAL_RECLAIM, __BTREE_INSERT_NOWAIT, __BTREE_INSERT_GC_LOCK_HELD, @@ -41,9 +41,6 @@ enum btree_insert_flags { /* Insert is for journal replay - don't get journal reservations: */ #define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY) -/* Indicates that we have pre-reserved space in the journal: */ -#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED) - /* Insert is being called from journal reclaim path: */ #define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM) diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 5834190..c2232f8 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -194,10 +194,10 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, if (flags & BTREE_INSERT_USE_RESERVE) { nr_reserve = 0; - alloc_reserve = RESERVE_BTREE_MOVINGGC; + alloc_reserve = RESERVE_btree_movinggc; } else { nr_reserve = BTREE_NODE_RESERVE; - alloc_reserve = RESERVE_BTREE; + alloc_reserve = RESERVE_btree; } mutex_lock(&c->btree_reserve_cache_lock); @@ -606,7 +606,7 @@ static void btree_update_nodes_written(struct btree_update *as) BTREE_INSERT_NOFAIL| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_JOURNAL_RECLAIM| - BTREE_INSERT_JOURNAL_RESERVED, + JOURNAL_WATERMARK_reserved, btree_update_nodes_written_trans(&trans, as)); bch2_trans_exit(&trans); @@ -970,13 +970,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ? BCH_DISK_RESERVATION_NOFAIL : 0; unsigned nr_nodes[2] = { 0, 0 }; unsigned update_level = level; - int journal_flags = 0; + int journal_flags = flags & JOURNAL_WATERMARK_MASK; int ret = 0; BUG_ON(!path->should_be_locked); - if (flags & BTREE_INSERT_JOURNAL_RESERVED) - journal_flags |= JOURNAL_RES_GET_RESERVED; if (flags & BTREE_INSERT_JOURNAL_RECLAIM) journal_flags |= JOURNAL_RES_GET_NONBLOCK; @@ -1958,7 +1956,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_USE_RESERVE| BTREE_INSERT_JOURNAL_RECLAIM| - BTREE_INSERT_JOURNAL_RESERVED); + JOURNAL_WATERMARK_reserved); if (ret) goto err; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 47623f3..8d185c7 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -295,11 +295,10 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans, struct bch_fs *c = trans->c; int ret; - if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) - flags |= JOURNAL_RES_GET_RESERVED; - ret = bch2_journal_res_get(&c->journal, &trans->journal_res, - trans->journal_u64s, flags); + trans->journal_u64s, + flags| + (trans->flags & JOURNAL_WATERMARK_MASK)); return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret; } @@ -350,7 +349,7 @@ btree_key_can_insert_cached(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; - unsigned new_u64s; + unsigned old_u64s = ck->u64s, new_u64s; struct bkey_i *new_k; EBUG_ON(path->level); @@ -384,7 +383,8 @@ btree_key_can_insert_cached(struct btree_trans *trans, * transaction restart: */ trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_, - path->btree_id, &path->pos); + path->btree_id, &path->pos, + old_u64s, new_u64s); /* * Not using btree_trans_restart() because we can't unlock here, we have * write locks held: @@ -459,7 +459,13 @@ static int run_one_mem_trigger(struct btree_trans *trans, static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, bool overwrite) { - struct bkey_s_c old = { &i->old_k, i->old_v }; + /* + * Transactional triggers create new btree_insert_entries, so we can't + * pass them a pointer to a btree_insert_entry, that memory is going to + * move: + */ + struct bkey old_k = i->old_k; + struct bkey_s_c old = { &old_k, i->old_v }; int ret = 0; if ((i->flags & BTREE_TRIGGER_NORUN) || @@ -900,8 +906,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, ret = bch2_journal_preres_get(&c->journal, &trans->journal_preres, trans->journal_preres_u64s, JOURNAL_RES_GET_NONBLOCK| - ((trans->flags & BTREE_INSERT_JOURNAL_RESERVED) - ? JOURNAL_RES_GET_RESERVED : 0)); + (trans->flags & JOURNAL_WATERMARK_MASK)); if (unlikely(ret == -EAGAIN)) ret = bch2_trans_journal_preres_get_cold(trans, trans->journal_preres_u64s, trace_ip); @@ -986,7 +991,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, bch2_trans_unlock(trans); if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && - !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) { + !(trans->flags & JOURNAL_WATERMARK_reserved)) { trans->restarted = true; ret = -EAGAIN; break; diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 4a3d6bf..25baca3 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -122,16 +122,16 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca, s64 reserved = 0; switch (reserve) { - case RESERVE_NONE: + case RESERVE_none: reserved += ca->mi.nbuckets >> 6; fallthrough; - case RESERVE_MOVINGGC: + case RESERVE_movinggc: reserved += ca->nr_btree_reserve; fallthrough; - case RESERVE_BTREE: + case RESERVE_btree: reserved += ca->nr_btree_reserve; fallthrough; - case RESERVE_BTREE_MOVINGGC: + case RESERVE_btree_movinggc: break; default: BUG(); diff --git a/libbcachefs/darray.h b/libbcachefs/darray.h new file mode 100644 index 0000000..daf872f --- /dev/null +++ b/libbcachefs/darray.h @@ -0,0 +1,291 @@ +/* + * Copyright (C) 2011 Joseph Adams + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef CCAN_DARRAY_H +#define CCAN_DARRAY_H + +#include +#include +#include "config.h" + +/* + * SYNOPSIS + * + * Life cycle of a darray (dynamically-allocated array): + * + * darray(int) a = darray_new(); + * darray_free(a); + * + * struct {darray(int) a;} foo; + * darray_init(foo.a); + * darray_free(foo.a); + * + * Typedefs for darrays of common types: + * + * darray_char, darray_schar, darray_uchar + * darray_short, darray_int, darray_long + * darray_ushort, darray_uint, darray_ulong + * + * Access: + * + * T darray_item(darray(T) arr, size_t index); + * size_t darray_size(darray(T) arr); + * size_t darray_alloc(darray(T) arr); + * bool darray_empty(darray(T) arr); + * + * Insertion (single item): + * + * void darray_append(darray(T) arr, T item); + * void darray_prepend(darray(T) arr, T item); + * void darray_push(darray(T) arr, T item); // same as darray_append + * + * Insertion (multiple items): + * + * void darray_append_items(darray(T) arr, T *items, size_t count); + * void darray_prepend_items(darray(T) arr, T *items, size_t count); + * + * void darray_appends(darray(T) arr, [T item, [...]]); + * void darray_prepends(darray(T) arr, [T item, [...]]); + * + * // Same functionality as above, but does not require typeof. + * void darray_appends_t(darray(T) arr, #T, [T item, [...]]); + * void darray_prepends_t(darray(T) arr, #T, [T item, [...]]); + * + * Removal: + * + * T darray_pop(darray(T) arr | darray_size(arr) != 0); + * T* darray_pop_check(darray(T*) arr); + * void darray_remove(darray(T) arr, size_t index); + * + * Replacement: + * + * void darray_from_items(darray(T) arr, T *items, size_t count); + * void darray_from_c(darray(T) arr, T c_array[N]); + * + * String buffer: + * + * void darray_append_string(darray(char) arr, const char *str); + * void darray_append_lit(darray(char) arr, char stringLiteral[N+1]); + * + * void darray_prepend_string(darray(char) arr, const char *str); + * void darray_prepend_lit(darray(char) arr, char stringLiteral[N+1]); + * + * void darray_from_string(darray(T) arr, const char *str); + * void darray_from_lit(darray(char) arr, char stringLiteral[N+1]); + * + * Size management: + * + * void darray_resize(darray(T) arr, size_t newSize); + * void darray_resize0(darray(T) arr, size_t newSize); + * + * void darray_realloc(darray(T) arr, size_t newAlloc); + * void darray_growalloc(darray(T) arr, size_t newAlloc); + * + * void darray_make_room(darray(T) arr, size_t room); + * + * Traversal: + * + * darray_foreach(T *&i, darray(T) arr) {...} + * darray_foreach_reverse(T *&i, darray(T) arr) {...} + * + * Except for darray_foreach, darray_foreach_reverse, and darray_remove, + * all macros evaluate their non-darray arguments only once. + */ + +/*** Life cycle ***/ + +#define darray(type) struct {type *item; size_t size; size_t alloc;} + +#define darray_new() {0,0,0} +#define darray_init(arr) do {(arr).item=0; (arr).size=0; (arr).alloc=0;} while(0) +#define darray_free(arr) do {kfree((arr).item);} while(0) + + + +/*** Access ***/ + +#define darray_item(arr, i) ((arr).item[i]) +#define darray_size(arr) ((arr).size) +#define darray_alloc(arr) ((arr).alloc) +#define darray_empty(arr) ((arr).size == 0) + + +/*** Insertion (single item) ***/ + +#define darray_append(arr, ...) do { \ + darray_resize(arr, (arr).size+1); \ + (arr).item[(arr).size-1] = (__VA_ARGS__); \ + } while(0) +#define darray_prepend(arr, ...) do { \ + darray_resize(arr, (arr).size+1); \ + memmove((arr).item+1, (arr).item, ((arr).size-1)*sizeof(*(arr).item)); \ + (arr).item[0] = (__VA_ARGS__); \ + } while(0) +#define darray_push(arr, ...) darray_append(arr, __VA_ARGS__) + + +/*** Insertion (multiple items) ***/ + +#define darray_append_items(arr, items, count) do { \ + size_t __count = (count), __oldSize = (arr).size; \ + darray_resize(arr, __oldSize + __count); \ + memcpy((arr).item + __oldSize, items, __count * sizeof(*(arr).item)); \ + } while(0) + +#define darray_prepend_items(arr, items, count) do { \ + size_t __count = (count), __oldSize = (arr).size; \ + darray_resize(arr, __count + __oldSize); \ + memmove((arr).item + __count, (arr).item, __oldSize * sizeof(*(arr).item)); \ + memcpy((arr).item, items, __count * sizeof(*(arr).item)); \ + } while(0) + +#if HAVE_TYPEOF +#define darray_appends(arr, ...) darray_appends_t(arr, typeof((*(arr).item)), __VA_ARGS__) +#define darray_prepends(arr, ...) darray_prepends_t(arr, typeof((*(arr).item)), __VA_ARGS__) +#endif + +#define darray_appends_t(arr, type, ...) do { \ + type __src[] = {__VA_ARGS__}; \ + darray_append_items(arr, __src, sizeof(__src)/sizeof(*__src)); \ + } while(0) +#define darray_prepends_t(arr, type, ...) do { \ + type __src[] = {__VA_ARGS__}; \ + darray_prepend_items(arr, __src, sizeof(__src)/sizeof(*__src)); \ + } while(0) + + +/*** Removal ***/ + +/* Warning: Do not call darray_pop on an empty darray. */ +#define darray_pop(arr) ((arr).item[--(arr).size]) +#define darray_pop_check(arr) ((arr).size ? darray_pop(arr) : NULL) +/* Warning, slow: Requires copying all elements after removed item. */ +#define darray_remove(arr, index) do { \ + if (index < arr.size-1) \ + memmove(&(arr).item[index], &(arr).item[index+1], ((arr).size-1-i)*sizeof(*(arr).item)); \ + (arr).size--; \ + } while(0) + + +/*** Replacement ***/ + +#define darray_from_items(arr, items, count) do {size_t __count = (count); darray_resize(arr, __count); memcpy((arr).item, items, __count*sizeof(*(arr).item));} while(0) +#define darray_from_c(arr, c_array) darray_from_items(arr, c_array, sizeof(c_array)/sizeof(*(c_array))) + + +/*** Size management ***/ + +#define darray_resize(arr, newSize) darray_growalloc(arr, (arr).size = (newSize)) +#define darray_resize0(arr, newSize) do { \ + size_t __oldSize = (arr).size, __newSize = (newSize); \ + (arr).size = __newSize; \ + if (__newSize > __oldSize) { \ + darray_growalloc(arr, __newSize); \ + memset(&(arr).item[__oldSize], 0, (__newSize - __oldSize) * sizeof(*(arr).item)); \ + } \ + } while(0) + +#define darray_realloc(arr, newAlloc) do { \ + (arr).item = realloc((arr).item, ((arr).alloc = (newAlloc)) * sizeof(*(arr).item)); \ + } while(0) +#define darray_growalloc(arr, need) do { \ + size_t __need = (need); \ + if (__need > (arr).alloc) \ + darray_realloc(arr, darray_next_alloc((arr).alloc, __need)); \ + } while(0) + +#if HAVE_STATEMENT_EXPR==1 +#define darray_make_room(arr, room) ({size_t newAlloc = (arr).size+(room); if ((arr).alloc &(arr).item[0]; ) + + +#endif /* CCAN_DARRAY_H */ + +/* + +darray_growalloc(arr, newAlloc) sees if the darray can currently hold newAlloc items; + if not, it increases the alloc to satisfy this requirement, allocating slack + space to avoid having to reallocate for every size increment. + +darray_from_string(arr, str) copies a string to an darray_char. + +darray_push(arr, item) pushes an item to the end of the darray. +darray_pop(arr) pops it back out. Be sure there is at least one item in the darray before calling. +darray_pop_check(arr) does the same as darray_pop, but returns NULL if there are no more items left in the darray. + +darray_make_room(arr, room) ensures there's 'room' elements of space after the end of the darray, and it returns a pointer to this space. +Currently requires HAVE_STATEMENT_EXPR, but I plan to remove this dependency by creating an inline function. + +The following require HAVE_TYPEOF==1 : + +darray_appends(arr, item0, item1...) appends a collection of comma-delimited items to the darray. +darray_prepends(arr, item0, item1...) prepends a collection of comma-delimited items to the darray.\ + + +Examples: + + darray(int) arr; + int *i; + + darray_appends(arr, 0,1,2,3,4); + darray_appends(arr, -5,-4,-3,-2,-1); + darray_foreach(i, arr) + printf("%d ", *i); + printf("\n"); + + darray_free(arr); + + + typedef struct {int n,d;} Fraction; + darray(Fraction) fractions; + Fraction *i; + + darray_appends(fractions, {3,4}, {3,5}, {2,1}); + darray_foreach(i, fractions) + printf("%d/%d\n", i->n, i->d); + + darray_free(fractions); +*/ diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 6027a7d..616a551 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -1295,9 +1295,6 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h, BUG_ON(nr_have_data > h->s->nr_data); BUG_ON(nr_have_parity > h->s->nr_parity); - percpu_down_read(&c->mark_lock); - rcu_read_lock(); - buckets.nr = 0; if (nr_have_parity < h->s->nr_parity) { ret = bch2_bucket_alloc_set(c, &buckets, @@ -1307,8 +1304,8 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h, &nr_have_parity, &have_cache, h->copygc - ? RESERVE_MOVINGGC - : RESERVE_NONE, + ? RESERVE_movinggc + : RESERVE_none, 0, cl); @@ -1324,7 +1321,7 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h, } if (ret) - goto err; + return ret; } buckets.nr = 0; @@ -1336,8 +1333,8 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h, &nr_have_data, &have_cache, h->copygc - ? RESERVE_MOVINGGC - : RESERVE_NONE, + ? RESERVE_movinggc + : RESERVE_none, 0, cl); @@ -1352,12 +1349,10 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h, } if (ret) - goto err; + return ret; } -err: - rcu_read_unlock(); - percpu_up_read(&c->mark_lock); - return ret; + + return 0; } /* XXX: doesn't obey target: */ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index b05d6e8..051372b 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1287,7 +1287,7 @@ static void bch2_writepage_io_done(struct closure *cl) * racing with fallocate can cause us to add fewer sectors than * expected - but we shouldn't add more sectors than expected: */ - WARN_ON(io->op.i_sectors_delta > 0); + WARN_ON_ONCE(io->op.i_sectors_delta > 0); /* * (error (due to going RO) halfway through a page can screw that up @@ -1473,8 +1473,8 @@ do_io: sectors << 9, offset << 9)); /* Check for writing past i_size: */ - WARN_ON((bio_end_sector(&w->io->op.wbio.bio) << 9) > - round_up(i_size, block_bytes(c))); + WARN_ON_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > + round_up(i_size, block_bytes(c))); w->io->op.res.sectors += reserved_sectors; w->io->op.i_sectors_delta -= dirty_sectors; diff --git a/libbcachefs/io.h b/libbcachefs/io.h index 1aa422d..fb51145 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -50,7 +50,7 @@ static inline u64 *op_journal_seq(struct bch_write_op *op) static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) { - return op->alloc_reserve == RESERVE_MOVINGGC + return op->alloc_reserve == RESERVE_movinggc ? op->c->copygc_wq : op->c->btree_update_wq; } @@ -79,7 +79,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, op->compression_type = bch2_compression_opt_to_type[opts.compression]; op->nr_replicas = 0; op->nr_replicas_required = c->opts.data_replicas_required; - op->alloc_reserve = RESERVE_NONE; + op->alloc_reserve = RESERVE_none; op->incompressible = 0; op->open_buckets.nr = 0; op->devs_have.nr = 0; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 340f0be..6d91a2c 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -20,6 +20,18 @@ #include +#define x(n) #n, +static const char * const bch2_journal_watermarks[] = { + JOURNAL_WATERMARKS() + NULL +}; + +static const char * const bch2_journal_errors[] = { + JOURNAL_ERRORS() + NULL +}; +#undef x + static inline bool journal_seq_unwritten(struct journal *j, u64 seq) { return seq > j->seq_ondisk; @@ -208,19 +220,19 @@ static int journal_entry_open(struct journal *j) BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); if (j->blocked) - return cur_entry_blocked; + return JOURNAL_ERR_blocked; if (j->cur_entry_error) return j->cur_entry_error; if (bch2_journal_error(j)) - return cur_entry_insufficient_devices; /* -EROFS */ + return JOURNAL_ERR_insufficient_devices; /* -EROFS */ if (!fifo_free(&j->pin)) - return cur_entry_journal_pin_full; + return JOURNAL_ERR_journal_pin_full; if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) - 1) - return cur_entry_max_in_flight; + return JOURNAL_ERR_max_in_flight; BUG_ON(!j->cur_entry_sectors); @@ -239,7 +251,7 @@ static int journal_entry_open(struct journal *j) u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); if (u64s <= 0) - return cur_entry_journal_full; + return JOURNAL_ERR_journal_full; if (fifo_empty(&j->pin) && j->reclaim_thread) wake_up_process(j->reclaim_thread); @@ -355,13 +367,12 @@ retry: return 0; } - if (!(flags & JOURNAL_RES_GET_RESERVED) && - !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { + if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark) { /* * Don't want to close current journal entry, just need to * invoke reclaim: */ - ret = cur_entry_journal_full; + ret = JOURNAL_ERR_journal_full; goto unlock; } @@ -379,10 +390,10 @@ retry: __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); ret = journal_entry_open(j); - if (ret == cur_entry_max_in_flight) + if (ret == JOURNAL_ERR_max_in_flight) trace_journal_entry_full(c); unlock: - if ((ret && ret != cur_entry_insufficient_devices) && + if ((ret && ret != JOURNAL_ERR_insufficient_devices) && !j->res_get_blocked_start) { j->res_get_blocked_start = local_clock() ?: 1; trace_journal_full(c); @@ -394,14 +405,15 @@ unlock: if (!ret) goto retry; - if ((ret == cur_entry_journal_full || - ret == cur_entry_journal_pin_full) && + if ((ret == JOURNAL_ERR_journal_full || + ret == JOURNAL_ERR_journal_pin_full) && !can_discard && !nr_unwritten_journal_entries(j) && - (flags & JOURNAL_RES_GET_RESERVED)) { + (flags & JOURNAL_WATERMARK_MASK) == JOURNAL_WATERMARK_reserved) { struct printbuf buf = PRINTBUF; - bch_err(c, "Journal stuck! Hava a pre-reservation but journal full"); + bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (ret %s)", + bch2_journal_errors[ret]); bch2_journal_debug_to_text(&buf, j); bch_err(c, "%s", buf.buf); @@ -419,8 +431,8 @@ unlock: * Journal is full - can't rely on reclaim from work item due to * freezing: */ - if ((ret == cur_entry_journal_full || - ret == cur_entry_journal_pin_full) && + if ((ret == JOURNAL_ERR_journal_full || + ret == JOURNAL_ERR_journal_pin_full) && !(flags & JOURNAL_RES_GET_NONBLOCK)) { if (can_discard) { bch2_journal_do_discards(j); @@ -433,7 +445,7 @@ unlock: } } - return ret == cur_entry_insufficient_devices ? -EROFS : -EAGAIN; + return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN; } /* @@ -767,7 +779,6 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, bool new_fs, struct closure *cl) { struct bch_fs *c = ca->fs; - struct journal *j = &c->journal; struct journal_device *ja = &ca->journal; u64 *new_bucket_seq = NULL, *new_buckets = NULL; struct open_bucket **ob = NULL; @@ -780,8 +791,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, unsigned old_cur_idx = ja->cur_idx; int ret = 0; - bch2_journal_block(j); - bch2_journal_flush_all_pins(j); + if (c) { + bch2_journal_block(&c->journal); + bch2_journal_flush_all_pins(&c->journal); + } bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL); ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL); @@ -800,7 +813,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, break; } } else { - ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_NONE, + ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, false, cl); if (IS_ERR(ob[nr_got])) { ret = cl ? -EAGAIN : -ENOSPC; @@ -819,7 +832,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, * actually been added to the running filesystem: */ if (!new_fs) - spin_lock(&j->lock); + spin_lock(&c->journal.lock); memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64)); @@ -860,9 +873,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, } if (!new_fs) - spin_unlock(&j->lock); + spin_unlock(&c->journal.lock); - bch2_journal_unblock(j); + if (c) + bch2_journal_unblock(&c->journal); if (ret) goto err; @@ -891,7 +905,8 @@ err: return ret; err_unblock: - bch2_journal_unblock(j); + if (c) + bch2_journal_unblock(&c->journal); goto err; } @@ -1224,13 +1239,14 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) rcu_read_lock(); s = READ_ONCE(j->reservations); - pr_buf(out, "dirty journal entries:\t%llu\n", fifo_used(&j->pin)); + pr_buf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size); pr_buf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); pr_buf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk); pr_buf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); pr_buf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk); pr_buf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk); pr_buf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining); + pr_buf(out, "watermark:\t\t%u\n", bch2_journal_watermarks[j->watermark]); pr_buf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved); pr_buf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes); pr_buf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes); @@ -1240,7 +1256,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) pr_buf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now) ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0); pr_buf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors); - pr_buf(out, "current entry error:\t%u\n", j->cur_entry_error); + pr_buf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]); pr_buf(out, "current entry:\t\t"); switch (s.cur_entry_offset) { diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 989c331..e7321c3 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -295,9 +295,9 @@ static inline void bch2_journal_res_put(struct journal *j, int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, unsigned); -#define JOURNAL_RES_GET_NONBLOCK (1 << 0) -#define JOURNAL_RES_GET_CHECK (1 << 1) -#define JOURNAL_RES_GET_RESERVED (1 << 2) +/* First two bits for JOURNAL_WATERMARK: */ +#define JOURNAL_RES_GET_NONBLOCK (1 << 2) +#define JOURNAL_RES_GET_CHECK (1 << 3) static inline int journal_res_get_fast(struct journal *j, struct journal_res *res, @@ -318,8 +318,7 @@ static inline int journal_res_get_fast(struct journal *j, EBUG_ON(!journal_state_count(new, new.idx)); - if (!(flags & JOURNAL_RES_GET_RESERVED) && - !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) + if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark) return 0; new.cur_entry_offset += res->u64s; @@ -372,23 +371,27 @@ out: /* journal_preres: */ -static inline bool journal_check_may_get_unreserved(struct journal *j) +static inline void journal_set_watermark(struct journal *j) { union journal_preres_state s = READ_ONCE(j->prereserved); - bool ret = s.reserved < s.remaining && - fifo_free(&j->pin) > j->pin.size / 4; - - lockdep_assert_held(&j->lock); - - if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { - if (ret) { - set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags); - journal_wake(j); - } else { - clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags); - } - } - return ret; + unsigned watermark = JOURNAL_WATERMARK_any; + + if (fifo_free(&j->pin) < j->pin.size / 4) + watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc); + if (fifo_free(&j->pin) < j->pin.size / 8) + watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved); + + if (s.reserved > s.remaining) + watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc); + if (!s.remaining) + watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved); + + if (watermark == j->watermark) + return; + + swap(watermark, j->watermark); + if (watermark > j->watermark) + journal_wake(j); } static inline void bch2_journal_preres_put(struct journal *j, @@ -408,12 +411,8 @@ static inline void bch2_journal_preres_put(struct journal *j, closure_wake_up(&j->preres_wait); } - if (s.reserved <= s.remaining && - !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { - spin_lock(&j->lock); - journal_check_may_get_unreserved(j); - spin_unlock(&j->lock); - } + if (s.reserved <= s.remaining && j->watermark) + journal_set_watermark(j); } int __bch2_journal_preres_get(struct journal *, @@ -434,7 +433,7 @@ static inline int bch2_journal_preres_get_fast(struct journal *j, old.v = new.v = v; ret = 0; - if ((flags & JOURNAL_RES_GET_RESERVED) || + if ((flags & JOURNAL_WATERMARK_reserved) || new.reserved + d < new.remaining) { new.reserved += d; ret = 1; diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index bacb805..fca9bc4 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -909,6 +909,7 @@ static void bch2_journal_read_device(struct closure *cl) struct bch_fs *c = ca->fs; struct journal_list *jlist = container_of(cl->parent, struct journal_list, cl); + struct journal_replay *r; struct journal_read_buf buf = { NULL, 0 }; u64 min_seq = U64_MAX; unsigned i; @@ -944,11 +945,29 @@ static void bch2_journal_read_device(struct closure *cl) * allocate */ while (ja->bucket_seq[ja->cur_idx] > min_seq && - ja->bucket_seq[ja->cur_idx] > + ja->bucket_seq[ja->cur_idx] == ja->bucket_seq[(ja->cur_idx + 1) % ja->nr]) ja->cur_idx = (ja->cur_idx + 1) % ja->nr; - ja->sectors_free = 0; + ja->sectors_free = ca->mi.bucket_size; + + mutex_lock(&jlist->lock); + list_for_each_entry(r, jlist->head, list) { + for (i = 0; i < r->nr_ptrs; i++) { + if (r->ptrs[i].dev == ca->dev_idx && + sector_to_bucket(ca, r->ptrs[i].sector) == ja->buckets[ja->cur_idx]) { + unsigned wrote = (r->ptrs[i].sector % ca->mi.bucket_size) + + vstruct_sectors(&r->j, c->block_bits); + + ja->sectors_free = min(ja->sectors_free, + ca->mi.bucket_size - wrote); + } + } + } + mutex_unlock(&jlist->lock); + + BUG_ON(ja->bucket_seq[ja->cur_idx] && + ja->sectors_free == ca->mi.bucket_size); /* * Set dirty_idx to indicate the entire journal is full and needs to be @@ -1562,7 +1581,7 @@ void bch2_journal_write(struct closure *cl) BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors); jset->magic = cpu_to_le64(jset_magic(c)); - jset->version = c->sb.version < bcachefs_metadata_version_new_versioning + jset->version = c->sb.version < bcachefs_metadata_version_bkey_renumber ? cpu_to_le32(BCH_JSET_VERSION_OLD) : cpu_to_le32(c->sb.version); diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index a920a11..6f1bad5 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -195,7 +195,7 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; if (nr_online < c->opts.metadata_replicas_required) { - ret = cur_entry_insufficient_devices; + ret = JOURNAL_ERR_insufficient_devices; goto out; } @@ -217,9 +217,9 @@ void bch2_journal_space_available(struct journal *j) printbuf_exit(&buf); bch2_fatal_error(c); - ret = cur_entry_journal_stuck; + ret = JOURNAL_ERR_journal_stuck; } else if (!j->space[journal_space_discarded].next_entry) - ret = cur_entry_journal_full; + ret = JOURNAL_ERR_journal_full; if ((j->space[journal_space_clean_ondisk].next_entry < j->space[journal_space_clean_ondisk].total) && @@ -238,7 +238,7 @@ out: j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; j->cur_entry_error = ret; journal_set_remaining(j, u64s_remaining); - journal_check_may_get_unreserved(j); + journal_set_watermark(j); if (!ret) journal_wake(j); diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 071fcb4..a6cdb88 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -144,10 +144,38 @@ enum journal_space_from { enum { JOURNAL_REPLAY_DONE, JOURNAL_STARTED, - JOURNAL_MAY_GET_UNRESERVED, JOURNAL_MAY_SKIP_FLUSH, }; +#define JOURNAL_WATERMARKS() \ + x(any) \ + x(copygc) \ + x(reserved) + +enum journal_watermark { +#define x(n) JOURNAL_WATERMARK_##n, + JOURNAL_WATERMARKS() +#undef x +}; + +#define JOURNAL_WATERMARK_MASK 3 + +/* Reasons we may fail to get a journal reservation: */ +#define JOURNAL_ERRORS() \ + x(ok) \ + x(blocked) \ + x(max_in_flight) \ + x(journal_full) \ + x(journal_pin_full) \ + x(journal_stuck) \ + x(insufficient_devices) + +enum journal_errors { +#define x(n) JOURNAL_ERR_##n, + JOURNAL_ERRORS() +#undef x +}; + /* Embedded in struct bch_fs */ struct journal { /* Fastpath stuff up front: */ @@ -155,6 +183,7 @@ struct journal { unsigned long flags; union journal_res_state reservations; + enum journal_watermark watermark; /* Max size of current journal entry */ unsigned cur_entry_u64s; @@ -164,15 +193,7 @@ struct journal { * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if * insufficient devices: */ - enum { - cur_entry_ok, - cur_entry_blocked, - cur_entry_max_in_flight, - cur_entry_journal_full, - cur_entry_journal_pin_full, - cur_entry_journal_stuck, - cur_entry_insufficient_devices, - } cur_entry_error; + enum journal_errors cur_entry_error; union journal_preres_state prereserved; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 16bca14..8eb4938 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -351,8 +351,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, } if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { - m->op.alloc_reserve = RESERVE_MOVINGGC; - m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; + m->op.alloc_reserve = RESERVE_movinggc; } else { /* XXX: this should probably be passed in */ m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 466975a..4f32d38 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -30,21 +30,6 @@ #include #include -/* - * We can't use the entire copygc reserve in one iteration of copygc: we may - * need the buckets we're freeing up to go back into the copygc reserve to make - * forward progress, but if the copygc reserve is full they'll be available for - * any allocation - and it's possible that in a given iteration, we free up most - * of the buckets we're going to free before we allocate most of the buckets - * we're going to allocate. - * - * If we only use half of the reserve per iteration, then in steady state we'll - * always have room in the reserve for the buckets we're going to need in the - * next iteration: - */ -#define COPYGC_BUCKETS_PER_ITER(ca) \ - ((ca)->free[RESERVE_MOVINGGC].size / 2) - static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) { const struct copygc_heap_entry *l = _l; @@ -106,7 +91,7 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, data_opts->target = io_opts->background_target; data_opts->nr_replicas = 1; data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_JOURNAL_RESERVED; + JOURNAL_WATERMARK_copygc; data_opts->rewrite_dev = p.ptr.dev; if (p.has_ec) @@ -250,7 +235,7 @@ static int bch2_copygc(struct bch_fs *c) } for_each_rw_member(ca, c, dev_idx) { - s64 avail = min(dev_buckets_available(ca, RESERVE_MOVINGGC), + s64 avail = min(dev_buckets_available(ca, RESERVE_movinggc), ca->mi.nbuckets >> 6); sectors_reserved += avail * ca->mi.bucket_size; @@ -268,7 +253,7 @@ static int bch2_copygc(struct bch_fs *c) } /* - * Our btree node allocations also come out of RESERVE_MOVINGGC: + * Our btree node allocations also come out of RESERVE_movingc: */ sectors_reserved = (sectors_reserved * 3) / 4; if (!sectors_reserved) { @@ -354,7 +339,7 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) for_each_rw_member(ca, c, dev_idx) { struct bch_dev_usage usage = bch2_dev_usage_read(ca); - fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_NONE) * + fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) * ca->mi.bucket_size) >> 1); fragmented = usage.d[BCH_DATA_user].fragmented; diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index e78d3b7..a71054c 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -11,6 +11,11 @@ #define x(t, n) #t, +const char * const bch2_metadata_versions[] = { + BCH_MEMBER_STATES() + NULL +}; + const char * const bch2_error_actions[] = { BCH_ERROR_ACTIONS() NULL @@ -219,42 +224,43 @@ static int bch2_mount_opt_lookup(const char *name) return bch2_opt_lookup(name); } -static int bch2_opt_validate(const struct bch_option *opt, const char *msg, u64 v) +int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err) { if (v < opt->min) { - if (msg) - pr_err("invalid %s%s: too small (min %llu)", - msg, opt->attr.name, opt->min); + if (err) + pr_buf(err, "%s: too small (min %llu)", + opt->attr.name, opt->min); return -ERANGE; } if (opt->max && v >= opt->max) { - if (msg) - pr_err("invalid %s%s: too big (max %llu)", - msg, opt->attr.name, opt->max); + if (err) + pr_buf(err, "%s: too big (max %llu)", + opt->attr.name, opt->max); return -ERANGE; } if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) { - if (msg) - pr_err("invalid %s %s: not a multiple of 512", - msg, opt->attr.name); + if (err) + pr_buf(err, "%s: not a multiple of 512", + opt->attr.name); return -EINVAL; } if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) { - if (msg) - pr_err("invalid %s%s: must be a power of two", - msg, opt->attr.name); + if (err) + pr_buf(err, "%s: must be a power of two", + opt->attr.name); return -EINVAL; } return 0; } -int bch2_opt_parse(struct bch_fs *c, const char *msg, +int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, - const char *val, u64 *res) + const char *val, u64 *res, + struct printbuf *err) { ssize_t ret; @@ -287,7 +293,7 @@ int bch2_opt_parse(struct bch_fs *c, const char *msg, return ret; } - return bch2_opt_validate(opt, msg, *res); + return bch2_opt_validate(opt, *res, err); } void bch2_opt_to_text(struct printbuf *out, @@ -367,6 +373,7 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, char *copied_opts, *copied_opts_start; char *opt, *name, *val; int ret, id; + struct printbuf err = PRINTBUF; u64 v; if (!options) @@ -386,8 +393,7 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, if (id < 0) goto bad_opt; - ret = bch2_opt_parse(c, "mount option ", - &bch2_opt_table[id], val, &v); + ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); if (ret < 0) goto bad_val; } else { @@ -430,7 +436,7 @@ bad_opt: ret = -1; goto out; bad_val: - pr_err("Invalid value %s for mount option %s", val, name); + pr_err("Invalid mount option %s", err.buf); ret = -1; goto out; no_val: @@ -439,6 +445,7 @@ no_val: goto out; out: kfree(copied_opts_start); + printbuf_exit(&err); return ret; } @@ -465,22 +472,14 @@ u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id) int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) { unsigned id; - int ret; for (id = 0; id < bch2_opts_nr; id++) { const struct bch_option *opt = bch2_opt_table + id; - u64 v; if (opt->get_sb == BCH2_NO_SB_OPT) continue; - v = bch2_opt_from_sb(sb, id); - - ret = bch2_opt_validate(opt, "superblock option ", v); - if (ret) - return ret; - - bch2_opt_set_by_id(opts, id, v); + bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id)); } return 0; diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 70b507f..8bc67d0 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -8,6 +8,7 @@ #include #include "bcachefs_format.h" +extern const char * const bch2_metadata_versions[]; extern const char * const bch2_error_actions[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; @@ -274,7 +275,7 @@ enum opt_type { NULL, "Extra debugging information during mount/recovery")\ x(journal_flush_delay, u32, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ - OPT_UINT(0, U32_MAX), \ + OPT_UINT(1, U32_MAX), \ BCH_SB_JOURNAL_FLUSH_DELAY, 1000, \ NULL, "Delay in milliseconds before automatic journal commits")\ x(journal_flush_disabled, u8, \ @@ -482,8 +483,9 @@ void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64); void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64); int bch2_opt_lookup(const char *); -int bch2_opt_parse(struct bch_fs *, const char *, const struct bch_option *, - const char *, u64 *); +int bch2_opt_validate(const struct bch_option *, u64, struct printbuf *); +int bch2_opt_parse(struct bch_fs *, const struct bch_option *, + const char *, u64 *, struct printbuf *); #define OPT_SHOW_FULL_LIST (1 << 0) #define OPT_SHOW_MOUNT_STYLE (1 << 1) diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index fe2c5cb..66492dd 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -563,8 +563,9 @@ static int bch2_journal_replay(struct bch_fs *c) ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_RESERVED| - (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0), + (!k->allocated + ? BTREE_INSERT_JOURNAL_REPLAY|JOURNAL_WATERMARK_reserved + : 0), bch2_journal_replay_key(&trans, k)); if (ret) { bch_err(c, "journal replay: error %d while replaying key at btree %s level %u", diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 95af515..c917bdd 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -253,12 +253,13 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out) struct bch_sb *sb = disk_sb->sb; struct bch_sb_field *f; struct bch_sb_field_members *mi; + enum bch_opt_id opt_id; u32 version, version_min; u16 block_size; int ret; version = le16_to_cpu(sb->version); - version_min = version >= bcachefs_metadata_version_new_versioning + version_min = version >= bcachefs_metadata_version_bkey_renumber ? le16_to_cpu(sb->version_min) : version; @@ -324,6 +325,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out) return -EINVAL; } + for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { + const struct bch_option *opt = bch2_opt_table + opt_id; + + if (opt->get_sb != BCH2_NO_SB_OPT) { + u64 v = bch2_opt_from_sb(sb, opt_id); + + pr_buf(out, "Invalid option "); + ret = bch2_opt_validate(opt, v, out); + if (ret) + return ret; + + printbuf_reset(out); + } + } + /* validate layout */ ret = validate_sb_layout(&sb->layout, out); if (ret) @@ -514,7 +530,7 @@ reread: } version = le16_to_cpu(sb->sb->version); - version_min = version >= bcachefs_metadata_version_new_versioning + version_min = version >= bcachefs_metadata_version_bkey_renumber ? le16_to_cpu(sb->sb->version_min) : version; @@ -1476,12 +1492,12 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, pr_buf(out, "Version:"); pr_tab(out); - pr_buf(out, "%u", le16_to_cpu(sb->version)); + pr_buf(out, "%s", bch2_metadata_versions[le16_to_cpu(sb->version)]); pr_newline(out); pr_buf(out, "Oldest version on disk:"); pr_tab(out); - pr_buf(out, "%u", le16_to_cpu(sb->version_min)); + pr_buf(out, "%u", bch2_metadata_versions[le16_to_cpu(sb->version_min)]); pr_newline(out); pr_buf(out, "Created:"); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 6464e8c..4a07171 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -862,7 +862,7 @@ static void print_mount_opts(struct bch_fs *c) if (!p.pos) pr_buf(&p, "(null)"); - bch_info(c, "mounted with opts: %s", p.buf); + bch_info(c, "mounted version=%s opts=%s", bch2_metadata_versions[c->sb.version], p.buf); printbuf_exit(&p); } diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index bed48af..2594fec 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -626,7 +626,7 @@ STORE(bch2_fs_opts_dir) goto err; } - ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v); + ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL); kfree(tmp); if (ret < 0) @@ -734,7 +734,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) "open_buckets_user\t%u\n" "btree reserve cache\t%u\n", stats.buckets_ec, - __dev_buckets_available(ca, stats, RESERVE_NONE), + __dev_buckets_available(ca, stats, RESERVE_none), c->freelist_wait.list.first ? "waiting" : "empty", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free, ca->nr_open_buckets, diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 1c680b1..8d23b4c 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -525,7 +525,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, memcpy(buf, value, size); buf[size] = '\0'; - ret = bch2_opt_parse(c, NULL, opt, buf, &v); + ret = bch2_opt_parse(c, opt, buf, &v, NULL); kfree(buf); if (ret < 0) -- 2.39.5