From 3c0484687d00f3f2818d13c0c6f65123abcf4517 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Dec 2022 18:27:31 -0500 Subject: [PATCH] Update bcachefs sources to 84505cfd37 bcachefs: Go RW before check_alloc_info() --- .bcachefs_revision | 2 +- include/trace/events/bcachefs.h | 105 ++++++++++------------------ libbcachefs/alloc_background.c | 32 +++++---- libbcachefs/alloc_foreground.c | 46 +++++++----- libbcachefs/backpointers.c | 9 +-- libbcachefs/bcachefs.h | 1 + libbcachefs/btree_cache.c | 4 +- libbcachefs/btree_gc.c | 29 ++++++-- libbcachefs/btree_locking.c | 12 +--- libbcachefs/btree_update_interior.c | 2 +- libbcachefs/btree_update_leaf.c | 40 +++++------ libbcachefs/data_update.c | 12 +++- libbcachefs/ec.c | 2 +- libbcachefs/errcode.h | 11 ++- libbcachefs/io.c | 6 +- libbcachefs/journal.c | 24 +++---- libbcachefs/journal.h | 2 +- libbcachefs/move.c | 21 +++++- libbcachefs/movinggc.c | 2 +- libbcachefs/recovery.c | 21 +++--- libbcachefs/reflink.c | 2 +- libbcachefs/super-io.c | 4 +- libbcachefs/sysfs.c | 18 +++++ 23 files changed, 221 insertions(+), 186 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index d55973e..eacbdc3 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -ed2a5f4260b65f3d613dcd76a97ac091bc88a126 +84505cfd37957accbff6fa7e4477bfd9c4c23ba6 diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index e055d37..f699146 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -564,6 +564,7 @@ TRACE_EVENT(bucket_alloc_fail, __field(u64, need_journal_commit ) __field(u64, nouse ) __field(bool, nonblocking ) + __field(u64, nocow ) __array(char, err, 32 ) ), @@ -579,10 +580,11 @@ TRACE_EVENT(bucket_alloc_fail, __entry->need_journal_commit = s->skipped_need_journal_commit; __entry->nouse = s->skipped_nouse; __entry->nonblocking = nonblocking; + __entry->nocow = s->skipped_nocow; strscpy(__entry->err, err, sizeof(__entry->err)); ), - TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u err %s", + TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u nocow %llu err %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->reserve, __entry->free, @@ -594,6 +596,7 @@ TRACE_EVENT(bucket_alloc_fail, __entry->need_journal_commit, __entry->nouse, __entry->nonblocking, + __entry->nocow, __entry->err) ); @@ -702,6 +705,37 @@ TRACE_EVENT(move_data, __entry->sectors_moved, __entry->keys_moved) ); +TRACE_EVENT(evacuate_bucket, + TP_PROTO(struct bch_fs *c, struct bpos *bucket, + unsigned sectors, unsigned bucket_size, + int ret), + TP_ARGS(c, bucket, sectors, bucket_size, ret), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(u64, member ) + __field(u64, bucket ) + __field(u32, sectors ) + __field(u32, bucket_size ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->member = bucket->inode; + __entry->bucket = bucket->offset; + __entry->sectors = sectors; + __entry->bucket_size = bucket_size; + __entry->ret = ret; + ), + + TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->member, __entry->bucket, + __entry->sectors, __entry->bucket_size, + __entry->ret) +); + TRACE_EVENT(copygc, TP_PROTO(struct bch_fs *c, u64 sectors_moved, u64 sectors_not_moved, @@ -1096,75 +1130,6 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, __entry->new_u64s) ); -DECLARE_EVENT_CLASS(node_lock_fail, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos, - unsigned level, u32 iter_seq, struct btree *b, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, btree_id, pos, - level, iter_seq, b, node_seq), - - TP_STRUCT__entry( - __field(unsigned long, trans_ip ) - __field(unsigned long, caller_ip ) - __field(u8, btree_id ) - __field(u64, pos_inode ) - __field(u64, pos_offset ) - __field(u32, pos_snapshot ) - __field(u32, level ) - __field(u32, iter_seq ) - __array(char, node, 24 ) - __field(u32, node_seq ) - ), - - TP_fast_assign( - __entry->trans_ip = trans_ip; - __entry->caller_ip = caller_ip; - __entry->btree_id = btree_id; - __entry->pos_inode = pos->inode; - __entry->pos_offset = pos->offset; - __entry->pos_snapshot = pos->snapshot; - __entry->level = level; - __entry->iter_seq = iter_seq; - if (IS_ERR(b)) - strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node)); - else - scnprintf(__entry->node, sizeof(__entry->node), "%px", b); - __entry->node_seq = node_seq; - ), - - TP_printk("%ps %pS btree %u pos %llu:%llu:%u level %u iter seq %u node %s node seq %u", - (void *) __entry->trans_ip, - (void *) __entry->caller_ip, - __entry->btree_id, - __entry->pos_inode, - __entry->pos_offset, - __entry->pos_snapshot, - __entry->level, __entry->iter_seq, - __entry->node, __entry->node_seq) -); - -DEFINE_EVENT(node_lock_fail, node_upgrade_fail, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos, - unsigned level, u32 iter_seq, struct btree *b, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, btree_id, pos, - level, iter_seq, b, node_seq) -); - -DEFINE_EVENT(node_lock_fail, node_relock_fail, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos, - unsigned level, u32 iter_seq, struct btree *b, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, btree_id, pos, - level, iter_seq, b, node_seq) -); - #endif /* _TRACE_BCACHE_H */ /* This part must be outside protection */ diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 0939200..ae6ae78 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -800,6 +800,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, goto err; if (ca->mi.freespace_initialized && + test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) && bch2_trans_inconsistent_on(old.k->type != old_type, trans, "incorrect key when %s %s btree (got %s should be %s)\n" " for %s", @@ -1649,21 +1650,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto write; } - if (bch2_trans_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, trans, - "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" - "%s", - a->v.journal_seq, - c->journal.flushed_seq_ondisk, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -EIO; + if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { + if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + bch2_trans_inconsistent(trans, + "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" + "%s", + a->v.journal_seq, + c->journal.flushed_seq_ondisk, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + } goto out; } - if (bch2_trans_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, trans, - "bucket incorrectly set in need_discard btree\n" - "%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -EIO; + if (a->v.data_type != BCH_DATA_need_discard) { + if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + bch2_trans_inconsistent(trans, + "bucket incorrectly set in need_discard btree\n" + "%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + } + goto out; } diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 46f215c..2010a9a 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -312,28 +312,34 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc a = bch2_alloc_to_v4(k, &a_convert); - if (genbits != (alloc_freespace_genbits(*a) >> 56)) { - prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" - " freespace key ", - genbits, alloc_freespace_genbits(*a) >> 56); + if (a->data_type != BCH_DATA_free) { + if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + ob = NULL; + goto err; + } + + prt_printf(&buf, "non free bucket in freespace btree\n" + " freespace key "); bch2_bkey_val_to_text(&buf, c, freespace_k); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; - } - if (a->data_type != BCH_DATA_free) { - prt_printf(&buf, "non free bucket in freespace btree\n" - " freespace key "); + if (genbits != (alloc_freespace_genbits(*a) >> 56) && + test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" + " freespace key ", + genbits, alloc_freespace_genbits(*a) >> 56); bch2_bkey_val_to_text(&buf, c, freespace_k); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; + } if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { @@ -506,8 +512,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; - bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); - u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor; + bool freespace = READ_ONCE(ca->mi.freespace_initialized); + u64 start = freespace ? 0 : ca->bucket_alloc_trans_early_cursor; u64 avail; struct bucket_alloc_state s = { .cur_bucket = start }; bool waiting = false; @@ -546,20 +552,25 @@ again: if (ob) return ob; } - - ob = likely(ca->mi.freespace_initialized) +alloc: + ob = likely(freespace) ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl) : bch2_bucket_alloc_early(trans, ca, reserve, &s, cl); if (s.skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); - if (!ob && !freespace_initialized && start) { + if (!ob && !freespace && start) { start = s.cur_bucket = 0; - goto again; + goto alloc; } - if (!freespace_initialized) + if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + freespace = false; + goto alloc; + } + + if (!freespace) ca->bucket_alloc_trans_early_cursor = s.cur_bucket; err: if (!ob) @@ -1224,12 +1235,9 @@ err: if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) || bch2_err_matches(ret, BCH_ERR_freelist_empty)) return cl - ? -EAGAIN + ? -BCH_ERR_bucket_alloc_blocked : -BCH_ERR_ENOSPC_bucket_alloc; - if (bch2_err_matches(ret, BCH_ERR_insufficient_devices)) - return -EROFS; - return ret; } diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 7d4367f..405823d 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -242,6 +242,9 @@ btree: memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) { struct printbuf buf = PRINTBUF; + if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) + goto err; + prt_printf(&buf, "backpointer not found when deleting"); prt_newline(&buf); printbuf_indent_add(&buf, 2); @@ -262,10 +265,8 @@ btree: bch2_bkey_val_to_text(&buf, c, orig_k); bch_err(c, "%s", buf.buf); - if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { - bch2_inconsistent_error(c); - ret = -EIO; - } + bch2_inconsistent_error(c); + ret = -EIO; printbuf_exit(&buf); goto err; } diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 7f479cd..71f5145 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -549,6 +549,7 @@ enum { /* fsck passes: */ BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ + BCH_FS_CHECK_ALLOC_DONE, BCH_FS_CHECK_LRUS_DONE, BCH_FS_CHECK_BACKPOINTERS_DONE, BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index d24827f..b5e7804 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -577,7 +577,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) } trace_and_count(c, btree_cache_cannibalize_lock_fail, c); - return -EAGAIN; + return -BCH_ERR_btree_cache_cannibalize_lock_blocked; success: trace_and_count(c, btree_cache_cannibalize_lock, c); @@ -952,8 +952,6 @@ retry: * bch_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * - * If IO is necessary and running under generic_make_request, returns -EAGAIN. - * * The btree node will have either a read or a write lock held, depending on * the @write parameter. */ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 055987a..6b7353c 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1285,8 +1285,7 @@ fsck_err: return ret; } -static int bch2_gc_start(struct bch_fs *c, - bool metadata_only) +static int bch2_gc_start(struct bch_fs *c) { struct bch_dev *ca = NULL; unsigned i; @@ -1301,7 +1300,6 @@ static int bch2_gc_start(struct bch_fs *c, } for_each_member_device(ca, c, i) { - BUG_ON(ca->buckets_gc); BUG_ON(ca->usage_gc); ca->usage_gc = alloc_percpu(struct bch_dev_usage); @@ -1318,6 +1316,22 @@ static int bch2_gc_start(struct bch_fs *c, return 0; } +static int bch2_gc_reset(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + + for_each_member_device(ca, c, i) { + free_percpu(ca->usage_gc); + ca->usage_gc = NULL; + } + + free_percpu(c->usage_gc); + c->usage_gc = NULL; + + return bch2_gc_start(c); +} + /* returns true if not equal */ static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l, struct bch_alloc_v4 r) @@ -1763,7 +1777,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) bch2_btree_interior_updates_flush(c); - ret = bch2_gc_start(c, metadata_only) ?: + ret = bch2_gc_start(c) ?: bch2_gc_alloc_start(c, metadata_only) ?: bch2_gc_reflink_start(c, metadata_only); if (ret) @@ -1824,6 +1838,9 @@ again: bch2_gc_stripes_reset(c, metadata_only); bch2_gc_alloc_reset(c, metadata_only); bch2_gc_reflink_reset(c, metadata_only); + ret = bch2_gc_reset(c); + if (ret) + goto out; /* flush fsck errors, reset counters */ bch2_flush_fsck_errs(c); @@ -1975,7 +1992,7 @@ int bch2_gc_gens(struct bch_fs *c) NULL, NULL, BTREE_INSERT_NOFAIL, gc_btree_gens_key(&trans, &iter, k)); - if (ret && ret != -EROFS) + if (ret && !bch2_err_matches(ret, EROFS)) bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret)); if (ret) goto err; @@ -1988,7 +2005,7 @@ int bch2_gc_gens(struct bch_fs *c) NULL, NULL, BTREE_INSERT_NOFAIL, bch2_alloc_write_oldest_gen(&trans, &iter, k)); - if (ret && ret != -EROFS) + if (ret && !bch2_err_matches(ret, EROFS)) bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret)); if (ret) goto err; diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index d3326e9..dce2dc0 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -359,18 +359,8 @@ static inline bool btree_path_get_locks(struct btree_trans *trans, if (!(upgrade ? bch2_btree_node_upgrade(trans, path, l) - : bch2_btree_node_relock(trans, path, l))) { - (upgrade - ? trace_node_upgrade_fail - : trace_node_relock_fail)(0, _RET_IP_, - path->btree_id, &path->pos, - l, path->l[l].lock_seq, - path->l[l].b, - is_btree_node(path, l) - ? path->l[l].b->c.lock.state.seq - : 0); + : bch2_btree_node_relock(trans, path, l))) fail_idx = l; - } l++; } while (l < path->locks_want); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index a4476f1..a49e7b6 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1162,7 +1162,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, bch2_trans_unlock(trans); closure_sync(&cl); - } while (ret == -EAGAIN); + } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } if (ret) { diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 459d9a4..75d8a55 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -316,15 +316,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s, static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, unsigned flags) { - struct bch_fs *c = trans->c; - int ret; - - ret = bch2_journal_res_get(&c->journal, &trans->journal_res, - trans->journal_u64s, - flags| - (trans->flags & JOURNAL_WATERMARK_MASK)); - - return ret == -EAGAIN ? -BCH_ERR_btree_insert_need_journal_res : ret; + return bch2_journal_res_get(&trans->c->journal, &trans->journal_res, + trans->journal_u64s, + flags| + (trans->flags & JOURNAL_WATERMARK_MASK)); } #define JSET_ENTRY_LOG_U64s 4 @@ -643,21 +638,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, trans->journal_res.seq = c->journal.replay_journal_seq; } - if (unlikely(trans->extra_journal_entries.nr)) { - memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), - trans->extra_journal_entries.data, - trans->extra_journal_entries.nr); - - trans->journal_res.offset += trans->extra_journal_entries.nr; - trans->journal_res.u64s -= trans->extra_journal_entries.nr; - } - /* * Not allowed to fail after we've gotten our journal reservation - we * have to use it: */ - if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && + !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { if (bch2_journal_seq_verify) trans_for_each_update(trans, i) i->k->k.version.lo = trans->journal_res.seq; @@ -683,6 +670,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, return ret; } + if (unlikely(trans->extra_journal_entries.nr)) { + memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), + trans->extra_journal_entries.data, + trans->extra_journal_entries.nr); + + trans->journal_res.offset += trans->extra_journal_entries.nr; + trans->journal_res.u64s -= trans->extra_journal_entries.nr; + } + if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { trans_for_each_update(trans, i) { struct journal *j = &c->journal; @@ -841,7 +837,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, &trans->journal_preres, trans->journal_preres_u64s, JOURNAL_RES_GET_NONBLOCK| (trans->flags & JOURNAL_WATERMARK_MASK)); - if (unlikely(ret == -EAGAIN)) + if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked)) ret = bch2_trans_journal_preres_get_cold(trans, trans->journal_preres_u64s, trace_ip); if (unlikely(ret)) @@ -913,7 +909,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret) trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip); break; - case -BCH_ERR_btree_insert_need_journal_res: + case -BCH_ERR_journal_res_get_blocked: bch2_trans_unlock(trans); if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && @@ -967,7 +963,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)) || test_bit(BCH_FS_STARTED, &c->flags)) - return -EROFS; + return -BCH_ERR_erofs_trans_commit; bch2_trans_unlock(trans); diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 3edd7b7..d4cdfb4 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -349,7 +349,7 @@ void bch2_update_unwritten_extent(struct btree_trans *trans, update->op.nr_replicas, update->op.alloc_reserve, 0, &cl, &wp); - if (ret == -EAGAIN) { + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) { bch2_trans_unlock(trans); closure_sync(&cl); continue; @@ -459,7 +459,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, ? 0 : BCH_DISK_RESERVATION_NOFAIL); if (ret) - return ret; + goto err; } m->op.nr_replicas = m->op.nr_replicas_required = @@ -471,6 +471,14 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, if (bkey_extent_is_unwritten(k)) return -BCH_ERR_unwritten_extent_update; return 0; +err: + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + bch2_bucket_nocow_unlock(&c->nocow_locks, + PTR_BUCKET_POS(c, &p.ptr), 0); + + bch2_bkey_buf_exit(&m->k, c); + bch2_bio_free_pages_pool(c, &m->op.wbio.bio); + return ret; } void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 53f0d82..c234c8d 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -942,7 +942,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) closure_sync(&s->iodone); if (s->err) { - if (s->err != -EROFS) + if (!bch2_err_matches(s->err, EROFS)) bch_err(c, "error creating stripe: error writing data buckets"); goto err; } diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 1ab4c4c..543cdb5 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -20,7 +20,6 @@ x(0, open_buckets_empty) \ x(0, freelist_empty) \ x(BCH_ERR_freelist_empty, no_buckets_found) \ - x(0, insufficient_devices) \ x(0, transaction_restart) \ x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \ x(BCH_ERR_transaction_restart, transaction_restart_relock) \ @@ -81,6 +80,16 @@ x(EINVAL, device_already_online) \ x(EINVAL, insufficient_devices_to_start) \ x(EINVAL, invalid) \ + x(EROFS, erofs_trans_commit) \ + x(EROFS, erofs_no_writes) \ + x(EROFS, erofs_journal_err) \ + x(EROFS, erofs_sb_err) \ + x(EROFS, insufficient_devices) \ + x(0, operation_blocked) \ + x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ + x(BCH_ERR_operation_blocked, journal_res_get_blocked) \ + x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \ + x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \ x(BCH_ERR_invalid, invalid_sb) \ x(BCH_ERR_invalid_sb, invalid_sb_magic) \ x(BCH_ERR_invalid_sb, invalid_sb_version) \ diff --git a/libbcachefs/io.c b/libbcachefs/io.c index f0fca86..706f18b 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -427,7 +427,7 @@ retry: opts.data_replicas, opts.data_replicas, RESERVE_none, 0, &cl, &wp); - if (ret == -EAGAIN) { + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) { bch2_trans_unlock(trans); closure_sync(&cl); goto retry; @@ -1627,7 +1627,7 @@ again: BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : &op->cl, &wp)); if (unlikely(ret)) { - if (ret == -EAGAIN) + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) break; goto err; @@ -1775,7 +1775,7 @@ void bch2_write(struct closure *cl) if (c->opts.nochanges || !percpu_ref_tryget_live(&c->writes)) { - op->error = -EROFS; + op->error = -BCH_ERR_erofs_no_writes; goto err; } diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 95c2922..e35b685 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -199,12 +199,6 @@ static bool journal_entry_close(struct journal *j) /* * should _only_ called from journal_res_get() - when we actually want a * journal reservation - journal entry is open means journal is dirty: - * - * returns: - * 0: success - * -ENOSPC: journal currently full, must invoke reclaim - * -EAGAIN: journal blocked, must wait - * -EROFS: insufficient rw devices or journal error */ static int journal_entry_open(struct journal *j) { @@ -353,7 +347,7 @@ retry: return 0; if (bch2_journal_error(j)) - return -EROFS; + return -BCH_ERR_erofs_journal_err; spin_lock(&j->lock); @@ -445,7 +439,9 @@ unlock: } } - return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN; + return ret == JOURNAL_ERR_insufficient_devices + ? -EROFS + : -BCH_ERR_journal_res_get_blocked; } /* @@ -464,7 +460,8 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, int ret; closure_wait_event(&j->async_wait, - (ret = __journal_res_get(j, res, flags)) != -EAGAIN || + (ret = __journal_res_get(j, res, flags)) != + -BCH_ERR_journal_res_get_blocked|| (flags & JOURNAL_RES_GET_NONBLOCK)); return ret; } @@ -815,12 +812,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, } else { ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, false, cl); - if (IS_ERR(ob[nr_got])) { - ret = cl - ? -EAGAIN - : -BCH_ERR_ENOSPC_bucket_alloc; + ret = PTR_ERR_OR_ZERO(ob[nr_got]); + if (ret) break; - } bu[nr_got] = ob[nr_got]->bucket; } @@ -930,7 +924,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, closure_init_stack(&cl); - while (ja->nr != nr && (ret == 0 || ret == -EAGAIN)) { + while (ja->nr != nr && (ret == 0 || ret == -BCH_ERR_bucket_alloc_blocked)) { struct disk_reservation disk_res = { 0, 0 }; closure_sync(&cl); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 51d29a0..896a2d7 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -479,7 +479,7 @@ static inline int bch2_journal_preres_get(struct journal *j, return 0; if (flags & JOURNAL_RES_GET_NONBLOCK) - return -EAGAIN; + return -BCH_ERR_journal_preres_get_blocked; return __bch2_journal_preres_get(j, res, new_u64s, flags); } diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 47b77b3..b308354 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" #include "bkey_buf.h" @@ -257,7 +258,7 @@ static int bch2_move_extent(struct btree_trans *trans, } if (!percpu_ref_tryget_live(&c->writes)) - return -EROFS; + return -BCH_ERR_erofs_no_writes; /* * Before memory allocations & taking nocow locks in @@ -661,13 +662,29 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, struct btree_iter iter; struct bkey_buf sk; struct bch_backpointer bp; + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a; + struct bkey_s_c k; struct data_update_opts data_opts; + unsigned dirty_sectors, bucket_size; u64 bp_offset = 0, cur_inum = U64_MAX; int ret = 0; bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, + bucket, BTREE_ITER_CACHED); + ret = lockrestart_do(&trans, + bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + bch2_trans_iter_exit(&trans, &iter); + + if (!ret) { + a = bch2_alloc_to_v4(k, &a_convert); + dirty_sectors = a->dirty_sectors; + bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; + } + while (!(ret = move_ratelimit(&trans, ctxt))) { bch2_trans_begin(&trans); @@ -765,6 +782,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, bp_offset++; } + trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, ret); + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) { bch2_trans_unlock(&trans); move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index fbc8043..f0ab65f 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -163,7 +163,7 @@ static int bch2_copygc(struct bch_fs *c) bch2_moving_ctxt_exit(&ctxt); - if (ret < 0 && ret != -EROFS) + if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0); diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 069aafb..0ad2bb2 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -625,8 +625,8 @@ static int bch2_journal_replay(struct bch_fs *c) : 0), bch2_journal_replay_key(&trans, k)); if (ret) { - bch_err(c, "journal replay: error %d while replaying key at btree %s level %u", - ret, bch2_btree_ids[k->btree_id], k->level); + bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", + bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret)); goto err; } } @@ -1246,13 +1246,6 @@ use_clean: set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - bch_info(c, "checking need_discard and freespace btrees"); - err = "error checking need_discard and freespace btrees"; - ret = bch2_check_alloc_info(c); - if (ret) - goto err; - bch_verbose(c, "done checking need_discard and freespace btrees"); - if (c->sb.version < bcachefs_metadata_version_snapshot_2) { err = "error creating root snapshot node"; ret = bch2_fs_initialize_subvolumes(c); @@ -1277,6 +1270,15 @@ use_clean: if (c->opts.verbose || !c->sb.clean) bch_info(c, "journal replay done"); + bch_info(c, "checking need_discard and freespace btrees"); + err = "error checking need_discard and freespace btrees"; + ret = bch2_check_alloc_info(c); + if (ret) + goto err; + bch_verbose(c, "done checking need_discard and freespace btrees"); + + set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); + bch_info(c, "checking lrus"); err = "error checking lrus"; ret = bch2_check_lrus(c); @@ -1316,6 +1318,7 @@ use_clean: set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); } else { set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index ec672fe..e89a9a1 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -283,7 +283,7 @@ s64 bch2_remap_range(struct bch_fs *c, int ret = 0, ret2 = 0; if (!percpu_ref_tryget_live(&c->writes)) - return -EROFS; + return -BCH_ERR_erofs_no_writes; bch2_check_set_feature(c, BCH_FEATURE_reflink); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index c4020cb..738b68b 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -863,7 +863,7 @@ int bch2_write_super(struct bch_fs *c) le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); percpu_ref_put(&ca->io_ref); - ret = -EROFS; + ret = -BCH_ERR_erofs_sb_err; goto out; } @@ -873,7 +873,7 @@ int bch2_write_super(struct bch_fs *c) le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); percpu_ref_put(&ca->io_ref); - ret = -EROFS; + ret = -BCH_ERR_erofs_sb_err; goto out; } } diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 647d018..bad3eaf 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -194,6 +194,7 @@ read_attribute(btree_cache); read_attribute(btree_key_cache); read_attribute(stripes_heap); read_attribute(open_buckets); +read_attribute(nocow_lock_table); read_attribute(internal_uuid); @@ -445,6 +446,22 @@ SHOW(bch2_fs) if (attr == &sysfs_data_jobs) data_progress_to_text(out, c); + if (attr == &sysfs_nocow_lock_table) { + int i, count = 1; + long last, curr = 0; + + last = atomic_long_read(&c->nocow_locks.l[0].v); + for (i = 1; i < BUCKET_NOCOW_LOCKS; i++) { + curr = atomic_long_read(&c->nocow_locks.l[i].v); + if (last != curr) { + prt_printf(out, "%li: %d\n", last, count); + count = 1; + last = curr; + } else + count++; + } + prt_printf(out, "%li: %d\n", last, count); +} return 0; } @@ -627,6 +644,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_new_stripes, &sysfs_stripes_heap, &sysfs_open_buckets, + &sysfs_nocow_lock_table, &sysfs_io_timers_read, &sysfs_io_timers_write, -- 2.39.5