From ddac1641ee1e2686c2211a8d671ea723634dfc89 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Aug 2022 20:28:55 -0400 Subject: [PATCH] Update bcachefs sources to 90a9c61e2b bcachefs: Switch bch2_btree_delete_range() to bch2_trans_run() --- .bcachefs_revision | 2 +- include/linux/bio.h | 2 +- include/linux/blkdev.h | 5 +- include/linux/errname.h | 11 + include/linux/prandom.h | 27 ++ include/linux/random.h | 5 - include/linux/six.h | 7 + include/trace/events/bcachefs.h | 423 ++++++++++++----------- libbcachefs/acl.c | 4 +- libbcachefs/alloc_background.c | 300 +++++++--------- libbcachefs/alloc_background.h | 10 +- libbcachefs/alloc_foreground.c | 75 ++-- libbcachefs/backpointers.c | 39 +-- libbcachefs/bcachefs.h | 10 + libbcachefs/btree_cache.c | 70 ++-- libbcachefs/btree_gc.c | 287 ++++++++-------- libbcachefs/btree_io.c | 8 +- libbcachefs/btree_iter.c | 508 ++++++++++++++-------------- libbcachefs/btree_iter.h | 145 ++++++-- libbcachefs/btree_key_cache.c | 64 ++-- libbcachefs/btree_locking.h | 109 ++++-- libbcachefs/btree_types.h | 36 +- libbcachefs/btree_update.h | 27 +- libbcachefs/btree_update_interior.c | 39 +-- libbcachefs/btree_update_leaf.c | 104 +++--- libbcachefs/buckets.c | 35 +- libbcachefs/checksum.c | 31 +- libbcachefs/data_update.c | 4 +- libbcachefs/debug.c | 153 ++++++--- libbcachefs/dirent.c | 4 +- libbcachefs/ec.c | 138 ++++---- libbcachefs/errcode.c | 51 +++ libbcachefs/errcode.h | 73 +++- libbcachefs/error.c | 13 +- libbcachefs/error.h | 27 +- libbcachefs/fs-io.c | 62 ++-- libbcachefs/fs-io.h | 6 +- libbcachefs/fs.c | 22 +- libbcachefs/fsck.c | 305 ++++++++--------- libbcachefs/inode.c | 4 +- libbcachefs/io.c | 28 +- libbcachefs/journal.c | 11 +- libbcachefs/journal_io.c | 18 +- libbcachefs/journal_reclaim.c | 13 +- libbcachefs/journal_seq_blacklist.c | 2 +- libbcachefs/lru.c | 27 +- libbcachefs/migrate.c | 121 ++++--- libbcachefs/move.c | 23 +- libbcachefs/movinggc.c | 11 +- libbcachefs/movinggc.h | 1 + libbcachefs/opts.h | 5 + libbcachefs/quota.c | 49 +-- libbcachefs/rebalance.c | 9 +- libbcachefs/recovery.c | 43 ++- libbcachefs/reflink.c | 5 +- libbcachefs/subvolume.c | 132 +++----- libbcachefs/super-io.c | 11 +- libbcachefs/super.c | 48 +-- libbcachefs/tests.c | 225 ++++++------ libbcachefs/trace.c | 6 +- libbcachefs/util.c | 22 +- libbcachefs/xattr.c | 2 +- linux/bio.c | 2 +- linux/blkdev.c | 2 +- linux/six.c | 20 ++ 65 files changed, 2204 insertions(+), 1877 deletions(-) create mode 100644 include/linux/errname.h create mode 100644 include/linux/prandom.h create mode 100644 libbcachefs/errcode.c diff --git a/.bcachefs_revision b/.bcachefs_revision index 91d229a..7f12b42 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -1bda24d7cc91cb84fe5bcbc40c871e9d00542bd2 +90a9c61e2bcf20935aebda1c0c8078ad0fff2475 diff --git a/include/linux/bio.h b/include/linux/bio.h index 7d596df..0ad5a87 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -242,7 +242,7 @@ int bio_add_page(struct bio *, struct page *, unsigned, unsigned); struct bio *bio_alloc_clone(struct block_device *, struct bio *, gfp_t, struct bio_set *); -struct bio *bio_kmalloc(gfp_t, unsigned int); +struct bio *bio_kmalloc(unsigned int, gfp_t); extern void bio_endio(struct bio *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4ce43b5..01b3d4a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -69,8 +69,7 @@ static inline void submit_bio(struct bio *bio) generic_make_request(bio); } -int blkdev_issue_discard(struct block_device *, sector_t, - sector_t, gfp_t, unsigned long); +int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t); #define bdev_get_queue(bdev) (&((bdev)->queue)) @@ -85,7 +84,7 @@ int blkdev_issue_discard(struct block_device *, sector_t, #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) #define SECTOR_MASK (PAGE_SECTORS - 1) -#define blk_queue_discard(q) ((void) (q), 0) +#define bdev_max_discard_sectors(bdev) ((void) (bdev), 0) #define blk_queue_nonrot(q) ((void) (q), 0) unsigned bdev_logical_block_size(struct block_device *bdev); diff --git a/include/linux/errname.h b/include/linux/errname.h new file mode 100644 index 0000000..443d504 --- /dev/null +++ b/include/linux/errname.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_ERRNAME_H +#define _LINUX_ERRNAME_H + +#include + +static inline const char *errname(int err) +{ + return strerror(abs(err)); +} + +#endif /* _LINUX_ERRNAME_H */ diff --git a/include/linux/prandom.h b/include/linux/prandom.h new file mode 100644 index 0000000..6f177cd --- /dev/null +++ b/include/linux/prandom.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_PRANDOM_H +#define _LINUX_PRANDOM_H + +#include + +static inline void prandom_bytes(void *buf, int nbytes) +{ + return get_random_bytes(buf, nbytes); +} + +#define prandom_type(type) \ +static inline type prandom_##type(void) \ +{ \ + type v; \ + \ + prandom_bytes(&v, sizeof(v)); \ + return v; \ +} + +prandom_type(int); +prandom_type(long); +prandom_type(u32); +prandom_type(u64); +#undef prandom_type + +#endif /* _LINUX_PRANDOM_H */ + diff --git a/include/linux/random.h b/include/linux/random.h index 28c595a..ea101d5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -29,11 +29,6 @@ static inline void get_random_bytes(void *buf, int nbytes) BUG_ON(getrandom(buf, nbytes, 0) != nbytes); } -static inline void prandom_bytes(void *buf, int nbytes) -{ - return get_random_bytes(buf, nbytes); -} - #define get_random_type(type) \ static inline type get_random_##type(void) \ { \ diff --git a/include/linux/six.h b/include/linux/six.h index 477c33e..41ddf63 100644 --- a/include/linux/six.h +++ b/include/linux/six.h @@ -200,4 +200,11 @@ void six_lock_pcpu_free_rcu(struct six_lock *); void six_lock_pcpu_free(struct six_lock *); void six_lock_pcpu_alloc(struct six_lock *); +struct six_lock_count { + unsigned read; + unsigned intent; +}; + +struct six_lock_count six_lock_counts(struct six_lock *); + #endif /* _LINUX_SIX_H */ diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 66ad356..2c98074 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -7,21 +7,29 @@ #include +#define TRACE_BPOS_entries(name) \ + __field(u64, name##_inode ) \ + __field(u64, name##_offset ) \ + __field(u32, name##_snapshot ) + +#define TRACE_BPOS_assign(dst, src) \ + __entry->dst##_inode = (src).inode; \ + __entry->dst##_offset = (src).offset; \ + __entry->dst##_snapshot = (src).snapshot + DECLARE_EVENT_CLASS(bpos, TP_PROTO(struct bpos *p), TP_ARGS(p), TP_STRUCT__entry( - __field(u64, inode ) - __field(u64, offset ) + TRACE_BPOS_entries(p) ), TP_fast_assign( - __entry->inode = p->inode; - __entry->offset = p->offset; + TRACE_BPOS_assign(p, *p); ), - TP_printk("%llu:%llu", __entry->inode, __entry->offset) + TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) ); DECLARE_EVENT_CLASS(bkey, @@ -230,23 +238,22 @@ DECLARE_EVENT_CLASS(btree_node, TP_STRUCT__entry( __field(dev_t, dev ) __field(u8, level ) - __field(u8, id ) - __field(u64, inode ) - __field(u64, offset ) + __field(u8, btree_id ) + TRACE_BPOS_entries(pos) ), TP_fast_assign( __entry->dev = c->dev; __entry->level = b->c.level; - __entry->id = b->c.btree_id; - __entry->inode = b->key.k.p.inode; - __entry->offset = b->key.k.p.offset; + __entry->btree_id = b->c.btree_id; + TRACE_BPOS_assign(pos, b->key.k.p); ), - TP_printk("%d,%d %u id %u %llu:%llu", + TP_printk("%d,%d %u %s %llu:%llu:%u", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->level, __entry->id, - __entry->inode, __entry->offset) + __entry->level, + bch2_btree_ids[__entry->btree_id], + __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ); DEFINE_EVENT(btree_node, btree_read, @@ -376,43 +383,36 @@ TRACE_EVENT(btree_cache_scan, ); TRACE_EVENT(btree_node_relock_fail, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos, - unsigned long node, - u32 iter_lock_seq, - u32 node_lock_seq), - TP_ARGS(trans_fn, caller_ip, btree_id, pos, node, iter_lock_seq, node_lock_seq), + struct btree_path *path, + unsigned level), + TP_ARGS(trans, caller_ip, path, level), TP_STRUCT__entry( __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(u8, btree_id ) - __field(u64, pos_inode ) - __field(u64, pos_offset ) - __field(u32, pos_snapshot ) + TRACE_BPOS_entries(pos) __field(unsigned long, node ) __field(u32, iter_lock_seq ) __field(u32, node_lock_seq ) ), TP_fast_assign( - strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; - __entry->btree_id = btree_id; - __entry->pos_inode = pos->inode; - __entry->pos_offset = pos->offset; - __entry->pos_snapshot = pos->snapshot; - __entry->node = node; - __entry->iter_lock_seq = iter_lock_seq; - __entry->node_lock_seq = node_lock_seq; + __entry->btree_id = path->btree_id; + TRACE_BPOS_assign(pos, path->pos); + __entry->node = (unsigned long) btree_path_node(path, level); + __entry->iter_lock_seq = path->l[level].lock_seq; + __entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0; ), - TP_printk("%s %pS btree %u pos %llu:%llu:%u, node %lu iter seq %u lock seq %u", + TP_printk("%s %pS btree %s pos %llu:%llu:%u, node %lu iter seq %u lock seq %u", __entry->trans_fn, (void *) __entry->caller_ip, - __entry->btree_id, + bch2_btree_ids[__entry->btree_id], __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, @@ -421,6 +421,56 @@ TRACE_EVENT(btree_node_relock_fail, __entry->node_lock_seq) ); +TRACE_EVENT(btree_node_upgrade_fail, + TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, + struct btree_path *path, + unsigned level), + TP_ARGS(trans, caller_ip, path, level), + + TP_STRUCT__entry( + __array(char, trans_fn, 24 ) + __field(unsigned long, caller_ip ) + __field(u8, btree_id ) + TRACE_BPOS_entries(pos) + __field(u8, locked ) + __field(u8, self_read_count ) + __field(u8, self_intent_count) + __field(u8, read_count ) + __field(u8, intent_count ) + ), + + TP_fast_assign( + struct six_lock_count c; + + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->btree_id = path->btree_id; + TRACE_BPOS_assign(pos, path->pos); + __entry->locked = btree_node_locked(path, level); + + c = bch2_btree_node_lock_counts(trans, NULL, path->l[level].b, level), + __entry->self_read_count = c.read; + __entry->self_intent_count = c.intent; + c = six_lock_counts(&path->l[level].b->c.lock); + __entry->read_count = c.read; + __entry->intent_count = c.intent; + ), + + TP_printk("%s %pS btree %s pos %llu:%llu:%u, locked %u held %u:%u lock count %u:%u", + __entry->trans_fn, + (void *) __entry->caller_ip, + bch2_btree_ids[__entry->btree_id], + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot, + __entry->locked, + __entry->self_read_count, + __entry->self_intent_count, + __entry->read_count, + __entry->intent_count) +); + /* Garbage collection */ DEFINE_EVENT(bch_fs, gc_gens_start, @@ -456,55 +506,68 @@ TRACE_EVENT(bucket_alloc, TRACE_EVENT(bucket_alloc_fail, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, + u64 free, u64 avail, + u64 copygc_wait_amount, + s64 copygc_waiting_for, u64 seen, u64 open, u64 need_journal_commit, u64 nouse, bool nonblocking, - int ret), - TP_ARGS(ca, alloc_reserve, avail, seen, open, need_journal_commit, nouse, nonblocking, ret), + const char *err), + TP_ARGS(ca, alloc_reserve, free, avail, copygc_wait_amount, copygc_waiting_for, + seen, open, need_journal_commit, nouse, nonblocking, err), TP_STRUCT__entry( __field(dev_t, dev ) __array(char, reserve, 16 ) + __field(u64, free ) __field(u64, avail ) + __field(u64, copygc_wait_amount ) + __field(s64, copygc_waiting_for ) __field(u64, seen ) __field(u64, open ) __field(u64, need_journal_commit ) __field(u64, nouse ) __field(bool, nonblocking ) - __field(int, ret ) + __array(char, err, 16 ) ), TP_fast_assign( __entry->dev = ca->dev; strlcpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve)); + __entry->free = free; __entry->avail = avail; + __entry->copygc_wait_amount = copygc_wait_amount; + __entry->copygc_waiting_for = copygc_waiting_for; __entry->seen = seen; __entry->open = open; __entry->need_journal_commit = need_journal_commit; __entry->nouse = nouse; __entry->nonblocking = nonblocking; - __entry->ret = ret; + strlcpy(__entry->err, err, sizeof(__entry->err)); ), - TP_printk("%d,%d reserve %s avail %llu seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u ret %i", + TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u err %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->reserve, + __entry->free, __entry->avail, + __entry->copygc_wait_amount, + __entry->copygc_waiting_for, __entry->seen, __entry->open, __entry->need_journal_commit, __entry->nouse, __entry->nonblocking, - __entry->ret) + __entry->err) ); TRACE_EVENT(discard_buckets, TP_PROTO(struct bch_fs *c, u64 seen, u64 open, - u64 need_journal_commit, u64 discarded, int ret), - TP_ARGS(c, seen, open, need_journal_commit, discarded, ret), + u64 need_journal_commit, u64 discarded, const char *err), + TP_ARGS(c, seen, open, need_journal_commit, discarded, err), TP_STRUCT__entry( __field(dev_t, dev ) @@ -512,7 +575,7 @@ TRACE_EVENT(discard_buckets, __field(u64, open ) __field(u64, need_journal_commit ) __field(u64, discarded ) - __field(int, ret ) + __array(char, err, 16 ) ), TP_fast_assign( @@ -521,16 +584,16 @@ TRACE_EVENT(discard_buckets, __entry->open = open; __entry->need_journal_commit = need_journal_commit; __entry->discarded = discarded; - __entry->ret = ret; + strlcpy(__entry->err, err, sizeof(__entry->err)); ), - TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu ret %i", + TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->seen, __entry->open, __entry->need_journal_commit, __entry->discarded, - __entry->ret) + __entry->err) ); TRACE_EVENT(invalidate_bucket, @@ -649,9 +712,9 @@ TRACE_EVENT(copygc_wait, ); DECLARE_EVENT_CLASS(transaction_event, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip), + TP_ARGS(trans, caller_ip), TP_STRUCT__entry( __array(char, trans_fn, 24 ) @@ -659,7 +722,7 @@ DECLARE_EVENT_CLASS(transaction_event, ), TP_fast_assign( - strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; ), @@ -667,217 +730,206 @@ DECLARE_EVENT_CLASS(transaction_event, ); DEFINE_EVENT(transaction_event, transaction_commit, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); -DEFINE_EVENT(transaction_event, transaction_restart_ip, - TP_PROTO(const char *trans_fn, +DEFINE_EVENT(transaction_event, transaction_restart_injected, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_restart_journal_res_get, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_restart_journal_preres_get, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_restart_journal_reclaim, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_restart_fault_inject, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_traverse_all, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_restart_mark_replicas, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans_fn, caller_ip) + TP_ARGS(trans, caller_ip) +); + +DEFINE_EVENT(transaction_event, trans_restart_too_many_iters, + TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip), + TP_ARGS(trans, caller_ip) ); DECLARE_EVENT_CLASS(transaction_restart_iter, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos), + struct btree_path *path), + TP_ARGS(trans, caller_ip, path), TP_STRUCT__entry( __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(u8, btree_id ) - __field(u64, pos_inode ) - __field(u64, pos_offset ) - __field(u32, pos_snapshot ) + TRACE_BPOS_entries(pos) ), TP_fast_assign( - strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; - __entry->btree_id = btree_id; - __entry->pos_inode = pos->inode; - __entry->pos_offset = pos->offset; - __entry->pos_snapshot = pos->snapshot; + __entry->btree_id = path->btree_id; + TRACE_BPOS_assign(pos, path->pos) ), - TP_printk("%s %pS btree %u pos %llu:%llu:%u", + TP_printk("%s %pS btree %s pos %llu:%llu:%u", __entry->trans_fn, (void *) __entry->caller_ip, - __entry->btree_id, + bch2_btree_ids[__entry->btree_id], __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_event, transaction_restart_key_cache_upgrade, + TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip), + TP_ARGS(trans, caller_ip) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos), - TP_ARGS(trans_fn, caller_ip, btree_id, pos) + struct btree_path *path), + TP_ARGS(trans, caller_ip, path) ); TRACE_EVENT(trans_restart_would_deadlock, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - bool in_traverse_all, unsigned reason, - enum btree_id have_btree_id, - unsigned have_iter_type, - struct bpos *have_pos, - enum btree_id want_btree_id, - unsigned want_iter_type, + struct btree_path *have, + struct btree_path *want, struct bpos *want_pos), - TP_ARGS(trans_fn, caller_ip, in_traverse_all, reason, - have_btree_id, have_iter_type, have_pos, - want_btree_id, want_iter_type, want_pos), + TP_ARGS(trans, caller_ip, reason, + have, want, want_pos), TP_STRUCT__entry( __array(char, trans_fn, 24 ) @@ -885,35 +937,24 @@ TRACE_EVENT(trans_restart_would_deadlock, __field(u8, in_traverse_all ) __field(u8, reason ) __field(u8, have_btree_id ) - __field(u8, have_iter_type ) + __field(u8, have_type ) __field(u8, want_btree_id ) - __field(u8, want_iter_type ) - - __field(u64, have_pos_inode ) - __field(u64, have_pos_offset ) - __field(u32, have_pos_snapshot) - __field(u32, want_pos_snapshot) - __field(u64, want_pos_inode ) - __field(u64, want_pos_offset ) + __field(u8, want_type ) + TRACE_BPOS_entries(have_pos) + TRACE_BPOS_entries(want_pos) ), TP_fast_assign( - strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; - __entry->in_traverse_all = in_traverse_all; + __entry->in_traverse_all = trans->in_traverse_all; __entry->reason = reason; - __entry->have_btree_id = have_btree_id; - __entry->have_iter_type = have_iter_type; - __entry->want_btree_id = want_btree_id; - __entry->want_iter_type = want_iter_type; - - __entry->have_pos_inode = have_pos->inode; - __entry->have_pos_offset = have_pos->offset; - __entry->have_pos_snapshot = have_pos->snapshot; - - __entry->want_pos_inode = want_pos->inode; - __entry->want_pos_offset = want_pos->offset; - __entry->want_pos_snapshot = want_pos->snapshot; + __entry->have_btree_id = have->btree_id; + __entry->have_type = have->cached; + __entry->want_btree_id = want->btree_id; + __entry->want_type = want->cached; + TRACE_BPOS_assign(have_pos, have->pos); + TRACE_BPOS_assign(want_pos, *want_pos); ), TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u", @@ -922,37 +963,37 @@ TRACE_EVENT(trans_restart_would_deadlock, __entry->in_traverse_all, __entry->reason, __entry->have_btree_id, - __entry->have_iter_type, + __entry->have_type, __entry->have_pos_inode, __entry->have_pos_offset, __entry->have_pos_snapshot, __entry->want_btree_id, - __entry->want_iter_type, + __entry->want_type, __entry->want_pos_inode, __entry->want_pos_offset, __entry->want_pos_snapshot) ); TRACE_EVENT(trans_restart_would_deadlock_write, - TP_PROTO(const char *trans_fn), - TP_ARGS(trans_fn), + TP_PROTO(struct btree_trans *trans), + TP_ARGS(trans), TP_STRUCT__entry( __array(char, trans_fn, 24 ) ), TP_fast_assign( - strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); ), TP_printk("%s", __entry->trans_fn) ); TRACE_EVENT(trans_restart_mem_realloced, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, unsigned long bytes), - TP_ARGS(trans_fn, caller_ip, bytes), + TP_ARGS(trans, caller_ip, bytes), TP_STRUCT__entry( __array(char, trans_fn, 24 ) @@ -961,7 +1002,7 @@ TRACE_EVENT(trans_restart_mem_realloced, ), TP_fast_assign( - strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->bytes = bytes; ), @@ -973,32 +1014,28 @@ TRACE_EVENT(trans_restart_mem_realloced, ); TRACE_EVENT(trans_restart_key_cache_key_realloced, - TP_PROTO(const char *trans_fn, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *pos, + struct btree_path *path, unsigned old_u64s, unsigned new_u64s), - TP_ARGS(trans_fn, caller_ip, btree_id, pos, old_u64s, new_u64s), + TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s), TP_STRUCT__entry( __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(enum btree_id, btree_id ) - __field(u64, inode ) - __field(u64, offset ) - __field(u32, snapshot ) + TRACE_BPOS_entries(pos) __field(u32, old_u64s ) __field(u32, new_u64s ) ), TP_fast_assign( - strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); - __entry->caller_ip = caller_ip; - __entry->btree_id = btree_id; - __entry->inode = pos->inode; - __entry->offset = pos->offset; - __entry->snapshot = pos->snapshot; + strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + + __entry->btree_id = path->btree_id; + TRACE_BPOS_assign(pos, path->pos); __entry->old_u64s = old_u64s; __entry->new_u64s = new_u64s; ), @@ -1007,9 +1044,9 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, __entry->trans_fn, (void *) __entry->caller_ip, bch2_btree_ids[__entry->btree_id], - __entry->inode, - __entry->offset, - __entry->snapshot, + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot, __entry->old_u64s, __entry->new_u64s) ); diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 5070caf..5c6ccf6 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -236,7 +236,7 @@ retry: &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); if (ret) { - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (ret != -ENOENT) acl = ERR_PTR(ret); @@ -335,7 +335,7 @@ retry: btree_err: bch2_trans_iter_exit(&trans, &inode_iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) goto err; diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index ca1f45c..2281b8d 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -543,7 +543,7 @@ int bch2_alloc_read(struct bch_fs *c) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error reading alloc info: %i", ret); + bch_err(c, "error reading alloc info: %s", bch2_err_str(ret)); return ret; } @@ -794,7 +794,7 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter alloc_iter; - struct bkey_s_c k, freespace_k; + struct bkey_s_c alloc_k; struct bch_alloc_v4 a; u64 genbits; struct bpos pos; @@ -804,14 +804,6 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans, struct printbuf buf = PRINTBUF; int ret; - freespace_k = bch2_btree_iter_peek(iter); - if (!freespace_k.k) - return 1; - - ret = bkey_err(freespace_k); - if (ret) - return ret; - pos = iter->pos; pos.offset &= ~(~0ULL << 56); genbits = iter->pos.offset & (~0ULL << 56); @@ -823,18 +815,18 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans, bch2_btree_ids[iter->btree_id], pos.inode, pos.offset)) goto delete; - k = bch2_btree_iter_peek_slot(&alloc_iter); - ret = bkey_err(k); + alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); + ret = bkey_err(alloc_k); if (ret) goto err; - bch2_alloc_to_v4(k, &a); + bch2_alloc_to_v4(alloc_k, &a); if (fsck_err_on(a.data_type != state || (state == BCH_DATA_free && genbits != alloc_freespace_genbits(a)), c, "%s\n incorrectly set in %s index (free %u, genbits %llu should be %llu)", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf), + (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), bch2_btree_ids[iter->btree_id], a.data_type == state, genbits >> 56, alloc_freespace_genbits(a) >> 56)) @@ -855,6 +847,7 @@ int bch2_check_alloc_info(struct bch_fs *c) { struct btree_trans trans; struct btree_iter iter, discard_iter, freespace_iter; + struct bkey_s_c k; int ret = 0; bch2_trans_init(&trans, c, 0, 0); @@ -884,36 +877,16 @@ int bch2_check_alloc_info(struct bch_fs *c) if (ret < 0) goto err; - bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_PREFETCH); - while (1) { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - bch2_check_discard_freespace_key(&trans, &iter)); - if (ret) - break; - - bch2_btree_iter_advance(&iter); - } - bch2_trans_iter_exit(&trans, &iter); - - if (ret < 0) - goto err; - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN, - BTREE_ITER_PREFETCH); - while (1) { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - bch2_check_discard_freespace_key(&trans, &iter)); - if (ret) - break; - - bch2_btree_iter_advance(&iter); - } - bch2_trans_iter_exit(&trans, &iter); + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_need_discard, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + bch2_check_discard_freespace_key(&trans, &iter)) ?: + for_each_btree_key_commit(&trans, iter, + BTREE_ID_freespace, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + bch2_check_discard_freespace_key(&trans, &iter)); err: bch2_trans_exit(&trans); return ret < 0 ? ret : 0; @@ -1016,17 +989,44 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) return ret < 0 ? ret : 0; } -static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, - struct bch_dev *ca, bool *discard_done) +static int bch2_discard_one_bucket(struct btree_trans *trans, + struct btree_iter *need_discard_iter, + struct bpos *discard_pos_done, + u64 *seen, + u64 *open, + u64 *need_journal_commit, + u64 *discarded) { struct bch_fs *c = trans->c; - struct btree_iter iter; + struct bpos pos = need_discard_iter->pos; + struct btree_iter iter = { NULL }; struct bkey_s_c k; + struct bch_dev *ca; struct bkey_i_alloc_v4 *a; struct printbuf buf = PRINTBUF; - int ret; + bool did_discard = false; + int ret = 0; + + ca = bch_dev_bkey_exists(c, pos.inode); + if (!percpu_ref_tryget(&ca->io_ref)) { + bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0)); + return 0; + } + + if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { + (*open)++; + goto out; + } - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, pos, + if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, + c->journal.flushed_seq_ondisk, + pos.inode, pos.offset)) { + (*need_journal_commit)++; + goto out; + } + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + need_discard_iter->pos, BTREE_ITER_CACHED); k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); @@ -1062,7 +1062,8 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, goto out; } - if (!*discard_done && ca->mi.discard && !c->opts.nochanges) { + if (bkey_cmp(*discard_pos_done, iter.pos) && + ca->mi.discard && !c->opts.nochanges) { /* * This works without any other locks because this is the only * thread that removes items from the need_discard tree @@ -1071,20 +1072,32 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, blkdev_issue_discard(ca->disk_sb.bdev, k.k->p.offset * ca->mi.bucket_size, ca->mi.bucket_size, - GFP_KERNEL, 0); - *discard_done = true; + GFP_KERNEL); - ret = bch2_trans_relock(trans) ? 0 : -EINTR; + ret = bch2_trans_relock(trans); if (ret) goto out; } + *discard_pos_done = iter.pos; + did_discard = true; + SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); a->v.data_type = alloc_data_type(a->v, a->v.data_type); write: - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_USE_RESERVE|BTREE_INSERT_NOFAIL); + if (ret) + goto out; + + if (did_discard) { + this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]); + (*discarded)++; + } out: bch2_trans_iter_exit(trans, &iter); + percpu_ref_put(&ca->io_ref); printbuf_exit(&buf); return ret; } @@ -1092,61 +1105,27 @@ out: static void bch2_do_discards_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, discard_work); - struct bch_dev *ca = NULL; struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0; + struct bpos discard_pos_done = POS_MAX; int ret; bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_need_discard, - POS_MIN, 0, k, ret) { - bool discard_done = false; - - if (ca && k.k->p.inode != ca->dev_idx) { - percpu_ref_put(&ca->io_ref); - ca = NULL; - } - - if (!ca) { - ca = bch_dev_bkey_exists(c, k.k->p.inode); - if (!percpu_ref_tryget(&ca->io_ref)) { - ca = NULL; - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); - continue; - } - } - - seen++; - - if (bch2_bucket_is_open_safe(c, k.k->p.inode, k.k->p.offset)) { - open++; - continue; - } - - if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, - k.k->p.inode, k.k->p.offset)) { - need_journal_commit++; - continue; - } - - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_NOFAIL, - bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done)); - if (ret) - break; - - this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]); - discarded++; - } - bch2_trans_iter_exit(&trans, &iter); - - if (ca) - percpu_ref_put(&ca->io_ref); + /* + * We're doing the commit in bch2_discard_one_bucket instead of using + * for_each_btree_key_commit() so that we can increment counters after + * successful commit: + */ + ret = for_each_btree_key2(&trans, iter, + BTREE_ID_need_discard, POS_MIN, 0, k, + bch2_discard_one_bucket(&trans, &iter, &discard_pos_done, + &seen, + &open, + &need_journal_commit, + &discarded)); bch2_trans_exit(&trans); @@ -1155,7 +1134,8 @@ static void bch2_do_discards_work(struct work_struct *work) percpu_ref_put(&c->writes); - trace_discard_buckets(c, seen, open, need_journal_commit, discarded, ret); + trace_discard_buckets(c, seen, open, need_journal_commit, discarded, + bch2_err_str(ret)); } void bch2_do_discards(struct bch_fs *c) @@ -1165,29 +1145,20 @@ void bch2_do_discards(struct bch_fs *c) percpu_ref_put(&c->writes); } -static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca, - struct bpos *bucket_pos, unsigned *cached_sectors) +static int invalidate_one_bucket(struct btree_trans *trans, + struct btree_iter *lru_iter, struct bkey_s_c k, + unsigned dev_idx, s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; - struct btree_iter lru_iter, alloc_iter = { NULL }; - struct bkey_s_c k; + struct btree_iter alloc_iter = { NULL }; struct bkey_i_alloc_v4 *a; - u64 bucket, idx; + struct bpos bucket; struct printbuf buf = PRINTBUF; - int ret; - - bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru, - POS(ca->dev_idx, 0), 0); -next_lru: - k = bch2_btree_iter_peek(&lru_iter); - ret = bkey_err(k); - if (ret) - goto out; + unsigned cached_sectors; + int ret = 0; - if (!k.k || k.k->p.inode != ca->dev_idx) { - ret = 1; - goto out; - } + if (*nr_to_invalidate <= 0 || k.k->p.inode != dev_idx) + return 1; if (k.k->type != KEY_TYPE_lru) { prt_printf(&buf, "non lru key in lru btree:\n "); @@ -1195,26 +1166,22 @@ next_lru: if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { bch_err(c, "%s", buf.buf); - bch2_btree_iter_advance(&lru_iter); - goto next_lru; } else { bch2_trans_inconsistent(trans, "%s", buf.buf); ret = -EINVAL; - goto out; } - } - idx = k.k->p.offset; - bucket = le64_to_cpu(bkey_s_c_to_lru(k).v->idx); + goto out; + } - *bucket_pos = POS(ca->dev_idx, bucket); + bucket = POS(dev_idx, le64_to_cpu(bkey_s_c_to_lru(k).v->idx)); - a = bch2_trans_start_alloc_update(trans, &alloc_iter, *bucket_pos); + a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); ret = PTR_ERR_OR_ZERO(a); if (ret) goto out; - if (idx != alloc_lru_idx(a->v)) { + if (k.k->p.offset != alloc_lru_idx(a->v)) { prt_printf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); prt_printf(&buf, "\n "); @@ -1222,19 +1189,18 @@ next_lru: if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { bch_err(c, "%s", buf.buf); - bch2_btree_iter_advance(&lru_iter); - goto next_lru; } else { bch2_trans_inconsistent(trans, "%s", buf.buf); ret = -EINVAL; - goto out; } + + goto out; } if (!a->v.cached_sectors) bch_err(c, "invalidating empty bucket, confused"); - *cached_sectors = a->v.cached_sectors; + cached_sectors = a->v.cached_sectors; SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); a->v.gen++; @@ -1244,13 +1210,18 @@ next_lru: a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now); - ret = bch2_trans_update(trans, &alloc_iter, &a->k_i, - BTREE_TRIGGER_BUCKET_INVALIDATE); + ret = bch2_trans_update(trans, &alloc_iter, &a->k_i, + BTREE_TRIGGER_BUCKET_INVALIDATE) ?: + bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_USE_RESERVE|BTREE_INSERT_NOFAIL); if (ret) goto out; + + trace_invalidate_bucket(c, bucket.inode, bucket.offset, cached_sectors); + this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]); + --*nr_to_invalidate; out: bch2_trans_iter_exit(trans, &alloc_iter); - bch2_trans_iter_exit(trans, &lru_iter); printbuf_exit(&buf); return ret; } @@ -1260,8 +1231,9 @@ static void bch2_do_invalidates_work(struct work_struct *work) struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); struct bch_dev *ca; struct btree_trans trans; - struct bpos bucket; - unsigned i, sectors; + struct btree_iter iter; + struct bkey_s_c k; + unsigned i; int ret = 0; bch2_trans_init(&trans, c, 0, 0); @@ -1270,17 +1242,13 @@ static void bch2_do_invalidates_work(struct work_struct *work) s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); - while (nr_to_invalidate-- >= 0) { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_NOFAIL, - invalidate_one_bucket(&trans, ca, &bucket, - §ors)); - if (ret) - break; + ret = for_each_btree_key2(&trans, iter, BTREE_ID_lru, + POS(ca->dev_idx, 0), BTREE_ITER_INTENT, k, + invalidate_one_bucket(&trans, &iter, k, ca->dev_idx, &nr_to_invalidate)); - trace_invalidate_bucket(c, bucket.inode, bucket.offset, sectors); - this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]); + if (ret < 0) { + percpu_ref_put(&ca->ref); + break; } } @@ -1295,16 +1263,13 @@ void bch2_do_invalidates(struct bch_fs *c) percpu_ref_put(&c->writes); } -static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter) +static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k, struct bch_dev *ca) { struct bch_alloc_v4 a; - struct bkey_s_c k; - int ret; - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; + if (iter->pos.offset >= ca->mi.nbuckets) + return 1; bch2_alloc_to_v4(k, &a); return bch2_bucket_do_index(trans, k, &a, true); @@ -1320,25 +1285,16 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - BTREE_ITER_SLOTS| - BTREE_ITER_PREFETCH, k, ret) { - if (iter.pos.offset >= ca->mi.nbuckets) - break; - - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_LAZY_RW, - bucket_freespace_init(&trans, &iter)); - if (ret) - break; - } - bch2_trans_iter_exit(&trans, &iter); + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc, + POS(ca->dev_idx, ca->mi.first_bucket), + BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_LAZY_RW, + bucket_freespace_init(&trans, &iter, k, ca)); bch2_trans_exit(&trans); - if (ret) { - bch_err(ca, "error initializing free space: %i", ret); + if (ret < 0) { + bch_err(ca, "error initializing free space: %s", bch2_err_str(ret)); return ret; } @@ -1347,7 +1303,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); mutex_unlock(&c->sb_lock); - return ret; + return 0; } int bch2_fs_freespace_init(struct bch_fs *c) diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 2ac6b50..044bc72 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -150,11 +150,13 @@ void bch2_do_discards(struct bch_fs *); static inline u64 should_invalidate_buckets(struct bch_dev *ca, struct bch_dev_usage u) { - u64 free = u.d[BCH_DATA_free].buckets + - u.d[BCH_DATA_need_discard].buckets; + u64 want_free = ca->mi.nbuckets >> 7; + u64 free = max_t(s64, 0, + u.d[BCH_DATA_free].buckets + + u.d[BCH_DATA_need_discard].buckets + - bch2_dev_buckets_reserved(ca, RESERVE_none)); - return clamp_t(s64, (ca->mi.nbuckets >> 7) - free, - 0, u.d[BCH_DATA_cached].buckets); + return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); } void bch2_do_invalidates(struct bch_fs *); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 7a878a6..0a9f131 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -26,6 +26,7 @@ #include "error.h" #include "io.h" #include "journal.h" +#include "movinggc.h" #include #include @@ -226,7 +227,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * c->blocked_allocate_open_bucket = local_clock(); spin_unlock(&c->freelist_lock); - return ERR_PTR(-OPEN_BUCKETS_EMPTY); + return ERR_PTR(-BCH_ERR_open_buckets_empty); } /* Recheck under lock: */ @@ -339,6 +340,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc skipped_nouse, cl); err: + set_btree_iter_dontneed(&iter); bch2_trans_iter_exit(trans, &iter); printbuf_exit(&buf); return ob; @@ -395,7 +397,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans, *cur_bucket = max_t(u64, *cur_bucket, ca->mi.first_bucket); *cur_bucket = max_t(u64, *cur_bucket, ca->new_fs_bucket_idx); - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *cur_bucket), + for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *cur_bucket), BTREE_ITER_SLOTS, k, ret) { struct bch_alloc_v4 a; @@ -425,7 +427,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans, *cur_bucket = iter.pos.offset; - return ob ?: ERR_PTR(ret ?: -FREELIST_EMPTY); + return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); } static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, @@ -454,6 +456,11 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, BUG_ON(ca->new_fs_bucket_idx); + /* + * XXX: + * On transaction restart, we'd like to restart from the bucket we were + * at previously + */ for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace, POS(ca->dev_idx, *cur_bucket), 0, k, ret) { if (k.k->p.inode != ca->dev_idx) @@ -462,10 +469,9 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k)); *cur_bucket < k.k->p.offset && !ob; (*cur_bucket)++) { - if (btree_trans_too_many_iters(trans)) { - ob = ERR_PTR(-EINTR); + ret = btree_trans_too_many_iters(trans); + if (ret) break; - } (*buckets_seen)++; @@ -476,7 +482,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, skipped_nouse, k, cl); } - if (ob) + + if (ob || ret) break; } bch2_trans_iter_exit(trans, &iter); @@ -496,8 +503,10 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, { struct open_bucket *ob = NULL; struct bch_dev_usage usage; + bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); + u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor; u64 avail; - u64 cur_bucket = 0; + u64 cur_bucket = start; u64 buckets_seen = 0; u64 skipped_open = 0; u64 skipped_need_journal_commit = 0; @@ -506,7 +515,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, int ret; again: usage = bch2_dev_usage_read(ca); - avail = dev_buckets_free(ca, usage,reserve); + avail = dev_buckets_free(ca, usage, reserve); if (usage.d[BCH_DATA_need_discard].buckets > avail) bch2_do_discards(c); @@ -527,7 +536,7 @@ again: if (!c->blocked_allocate) c->blocked_allocate = local_clock(); - ob = ERR_PTR(-FREELIST_EMPTY); + ob = ERR_PTR(-BCH_ERR_freelist_empty); goto err; } @@ -551,17 +560,30 @@ again: if (skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); + + if (!ob && !ret && !freespace_initialized && start) { + start = cur_bucket = 0; + goto again; + } + + if (!freespace_initialized) + ca->bucket_alloc_trans_early_cursor = cur_bucket; err: if (!ob) - ob = ERR_PTR(ret ?: -FREELIST_EMPTY); + ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); if (IS_ERR(ob)) { - trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], avail, + trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], + usage.d[BCH_DATA_free].buckets, + avail, + bch2_copygc_wait_amount(c), + c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), buckets_seen, skipped_open, skipped_need_journal_commit, skipped_nouse, - cl == NULL, PTR_ERR(ob)); + cl == NULL, + bch2_err_str(PTR_ERR(ob))); atomic_long_inc(&c->bucket_alloc_fail); } @@ -648,7 +670,7 @@ int bch2_bucket_alloc_set(struct bch_fs *c, bch2_dev_alloc_list(c, stripe, devs_may_alloc); unsigned dev; struct bch_dev *ca; - int ret = -INSUFFICIENT_DEVICES; + int ret = -BCH_ERR_insufficient_devices; unsigned i; BUG_ON(*nr_effective >= nr_replicas); @@ -846,8 +868,8 @@ static int open_bucket_add_buckets(struct bch_fs *c, target, erasure_code, nr_replicas, nr_effective, have_cache, flags, _cl); - if (ret == -FREELIST_EMPTY || - ret == -OPEN_BUCKETS_EMPTY) + if (bch2_err_matches(ret, BCH_ERR_freelist_empty) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) return ret; if (*nr_effective >= nr_replicas) return 0; @@ -868,7 +890,9 @@ retry_blocking: ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs, nr_replicas, nr_effective, have_cache, reserve, flags, cl); - if (ret && ret != -INSUFFICIENT_DEVICES && !cl && _cl) { + if (ret && + !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && + !cl && _cl) { cl = _cl; goto retry_blocking; } @@ -1111,7 +1135,7 @@ alloc_done: if (erasure_code && !ec_open_bucket(c, &ptrs)) pr_debug("failed to get ec bucket: ret %u", ret); - if (ret == -INSUFFICIENT_DEVICES && + if (ret == -BCH_ERR_insufficient_devices && nr_effective >= nr_replicas_required) ret = 0; @@ -1142,19 +1166,18 @@ err: mutex_unlock(&wp->lock); - if (ret == -FREELIST_EMPTY && + if (bch2_err_matches(ret, BCH_ERR_freelist_empty) && try_decrease_writepoints(c, write_points_nr)) goto retry; - switch (ret) { - case -OPEN_BUCKETS_EMPTY: - case -FREELIST_EMPTY: + if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) || + bch2_err_matches(ret, BCH_ERR_freelist_empty)) return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC); - case -INSUFFICIENT_DEVICES: + + if (bch2_err_matches(ret, BCH_ERR_insufficient_devices)) return ERR_PTR(-EROFS); - default: - return ERR_PTR(ret); - } + + return ERR_PTR(ret); } struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 08d6795..5a46b25 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -443,8 +443,8 @@ int bch2_get_next_backpointer(struct btree_trans *trans, goto out; } - for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers, - bp_pos, 0, k, ret) { + for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, + bp_pos, 0, k, ret) { if (bpos_cmp(k.k->p, bp_end_pos) >= 0) break; @@ -569,22 +569,16 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, return NULL; } -static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter) +static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, + struct bkey_s_c k) { struct bch_fs *c = trans->c; struct btree_iter alloc_iter = { NULL }; struct bch_dev *ca; - struct bkey_s_c k, alloc_k; + struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret = 0; - k = bch2_btree_iter_peek(bp_iter); - ret = bkey_err(k); - if (ret) - return ret; - if (!k.k) - return 0; - if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, "backpointer for mising device:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -619,25 +613,14 @@ fsck_err: /* verify that every backpointer has a corresponding alloc key */ int bch2_check_btree_backpointers(struct bch_fs *c) { - struct btree_trans trans; struct btree_iter iter; - int ret = 0; - - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0); - - do { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL, - bch2_check_btree_backpointer(&trans, &iter)); - if (ret) - break; - } while (bch2_btree_iter_advance(&iter)); + struct bkey_s_c k; - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); - return ret; + return bch2_trans_run(c, + for_each_btree_key_commit(&trans, iter, + BTREE_ID_backpointers, POS_MIN, 0, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + bch2_check_btree_backpointer(&trans, &iter, k))); } static int check_bp_exists(struct btree_trans *trans, diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 31e387b..8ffdb4d 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -319,6 +319,8 @@ BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM #endif +#define BCH_LOCK_TIME_NR 128 + #define BCH_TIME_STATS() \ x(btree_node_mem_alloc) \ x(btree_node_split) \ @@ -463,6 +465,7 @@ struct bch_dev { /* Allocator: */ u64 new_fs_bucket_idx; + u64 bucket_alloc_trans_early_cursor; unsigned nr_open_buckets; unsigned nr_btree_reserve; @@ -528,6 +531,11 @@ struct btree_debug { unsigned id; }; +struct lock_held_stats { + struct time_stats times[BCH_LOCK_TIME_NR]; + const char *names[BCH_LOCK_TIME_NR]; +}; + struct bch_fs_pcpu { u64 sectors_available; }; @@ -921,6 +929,8 @@ struct bch_fs { bool promote_whole_extents; struct time_stats times[BCH_TIME_STAT_NR]; + + struct lock_held_stats lock_held_stats; }; static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 4d032ae..5a6c93d 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -7,6 +7,7 @@ #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" +#include "errcode.h" #include "error.h" #include @@ -700,20 +701,16 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * been freed: */ if (trans && !bch2_btree_node_relock(trans, path, level + 1)) { - trace_trans_restart_relock_parent_for_fill(trans->fn, - _THIS_IP_, btree_id, &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + trace_trans_restart_relock_parent_for_fill(trans, _THIS_IP_, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); } b = bch2_btree_node_mem_alloc(c, level != 0); if (trans && b == ERR_PTR(-ENOMEM)) { trans->memory_allocation_failure = true; - trace_trans_restart_memory_allocation_failure(trans->fn, - _THIS_IP_, btree_id, &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + trace_trans_restart_memory_allocation_failure(trans, _THIS_IP_, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail)); } if (IS_ERR(b)) @@ -750,18 +747,19 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, if (!sync) return NULL; - if (trans && - (!bch2_trans_relock(trans) || - !bch2_btree_path_relock_intent(trans, path))) { - BUG_ON(!trans->restarted); - return ERR_PTR(-EINTR); + if (trans) { + int ret = bch2_trans_relock(trans) ?: + bch2_btree_path_relock_intent(trans, path); + if (ret) { + BUG_ON(!trans->restarted); + return ERR_PTR(ret); + } } if (!six_relock_type(&b->c.lock, lock_type, seq)) { - trace_trans_restart_relock_after_fill(trans->fn, _THIS_IP_, - btree_id, &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + if (trans) + trace_trans_restart_relock_after_fill(trans, _THIS_IP_, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill)); } return b; @@ -772,7 +770,9 @@ static int lock_node_check_fn(struct six_lock *lock, void *p) struct btree *b = container_of(lock, struct btree, c.lock); const struct bkey_i *k = p; - return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1; + if (b->hash_val != btree_ptr_hash_val(k)) + return BCH_ERR_lock_fail_node_reused; + return 0; } static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) @@ -831,6 +831,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; + int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); @@ -893,13 +894,16 @@ lock_node: * was removed - and we'll bail out: */ if (btree_node_read_locked(path, level + 1)) - btree_node_unlock(path, level + 1); + btree_node_unlock(trans, path, level + 1); - if (!btree_node_lock(trans, path, b, k->k.p, level, lock_type, - lock_node_check_fn, (void *) k, trace_ip)) { - if (!trans->restarted) + ret = btree_node_lock(trans, path, b, k->k.p, level, lock_type, + lock_node_check_fn, (void *) k, trace_ip); + if (unlikely(ret)) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused)) goto retry; - return ERR_PTR(-EINTR); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ERR_PTR(ret); + BUG(); } if (unlikely(b->hash_val != btree_ptr_hash_val(k) || @@ -909,12 +913,8 @@ lock_node: if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; - trace_trans_restart_btree_node_reused(trans->fn, - trace_ip, - path->btree_id, - &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + trace_trans_restart_btree_node_reused(trans, trace_ip, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } } @@ -930,11 +930,13 @@ lock_node: * should_be_locked is not set on this path yet, so we need to * relock it specifically: */ - if (trans && - (!bch2_trans_relock(trans) || - !bch2_btree_path_relock_intent(trans, path))) { - BUG_ON(!trans->restarted); - return ERR_PTR(-EINTR); + if (trans) { + int ret = bch2_trans_relock(trans) ?: + bch2_btree_path_relock_intent(trans, path); + if (ret) { + BUG_ON(!trans->restarted); + return ERR_PTR(ret); + } } if (!six_relock_type(&b->c.lock, lock_type, seq)) diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 214529b..2f56336 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -98,7 +98,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, buf1.buf, buf2.buf) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); - ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + ret = -BCH_ERR_need_topology_repair; goto err; } else { set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); @@ -126,7 +126,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, buf1.buf, buf2.buf) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); - ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + ret = -BCH_ERR_need_topology_repair; goto err; } else { set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); @@ -402,8 +402,8 @@ again: } if (ret) { - bch_err(c, "%s: error %i getting btree node", - __func__, ret); + bch_err(c, "%s: error getting btree node: %s", + __func__, bch2_err_str(ret)); break; } @@ -471,8 +471,8 @@ again: ret = PTR_ERR_OR_ZERO(cur); if (ret) { - bch_err(c, "%s: error %i getting btree node", - __func__, ret); + bch_err(c, "%s: error getting btree node: %s", + __func__, bch2_err_str(ret)); goto err; } @@ -537,7 +537,7 @@ static int bch2_repair_topology(struct bch_fs *c) if (ret == DROP_THIS_NODE) { bch_err(c, "empty btree root - repair unimplemented"); - ret = FSCK_ERR_EXIT; + ret = -BCH_ERR_fsck_repair_unimplemented; } } @@ -804,7 +804,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, fsck_err: err: if (ret) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); return ret; } @@ -910,7 +910,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false, &k, true); if (ret) { - bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret); + bch_err(c, "%s: error from bch2_gc_mark_key: %s", + __func__, bch2_err_str(ret)); goto fsck_err; } @@ -959,7 +960,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { - ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + ret = -BCH_ERR_need_topology_repair; bch_info(c, "Halting mark and sweep to start topology repair pass"); goto fsck_err; } else { @@ -970,8 +971,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b continue; } } else if (ret) { - bch_err(c, "%s: error %i getting btree node", - __func__, ret); + bch_err(c, "%s: error getting btree node: %s", + __func__, bch2_err_str(ret)); break; } @@ -1012,7 +1013,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, if (mustfix_fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c, "btree root with incorrect min_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); - ret = FSCK_ERR_EXIT; + ret = -BCH_ERR_fsck_repair_unimplemented; goto fsck_err; } @@ -1021,7 +1022,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, if (mustfix_fsck_err_on(bpos_cmp(b->data->max_key, SPOS_MAX), c, "btree root with incorrect max_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); - ret = FSCK_ERR_EXIT; + ret = -BCH_ERR_fsck_repair_unimplemented; goto fsck_err; } @@ -1038,7 +1039,7 @@ fsck_err: six_unlock_read(&b->c.lock); if (ret < 0) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); printbuf_exit(&buf); return ret; } @@ -1071,7 +1072,7 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) : bch2_gc_btree(&trans, ids[i], initial, metadata_only); if (ret < 0) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; @@ -1269,7 +1270,7 @@ fsck_err: if (ca) percpu_ref_put(&ca->ref); if (ret) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); percpu_up_write(&c->mark_lock); printbuf_exit(&buf); @@ -1324,21 +1325,19 @@ static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l, static int bch2_alloc_write_key(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k, bool metadata_only) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); struct bucket gc, *b; - struct bkey_s_c k; struct bkey_i_alloc_v4 *a; struct bch_alloc_v4 old, new; enum bch_data_type type; int ret; - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; + if (bkey_cmp(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0) + return 1; bch2_alloc_to_v4(k, &old); new = old; @@ -1431,31 +1430,21 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) bch2_trans_init(&trans, c, 0, 0); for_each_member_device(ca, c, i) { - for_each_btree_key(&trans, iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - BTREE_ITER_SLOTS| - BTREE_ITER_PREFETCH, k, ret) { - if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0) - break; - - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_LAZY_RW, - bch2_alloc_write_key(&trans, &iter, - metadata_only)); - if (ret) - break; - } - bch2_trans_iter_exit(&trans, &iter); - - if (ret) { - bch_err(c, "error writing alloc info: %i", ret); + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc, + POS(ca->dev_idx, ca->mi.first_bucket), + BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_LAZY_RW, + bch2_alloc_write_key(&trans, &iter, k, metadata_only)); + + if (ret < 0) { + bch_err(c, "error writing alloc info: %s", bch2_err_str(ret)); percpu_ref_put(&ca->ref); break; } } bch2_trans_exit(&trans); - return ret; + return ret < 0 ? ret : 0; } static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) @@ -1512,7 +1501,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error reading alloc info at gc start: %i", ret); + bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret)); return ret; } @@ -1539,72 +1528,79 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only) }; } -static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) +static int bch2_gc_write_reflink_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + size_t *idx) { - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct reflink_gc *r; - size_t idx = 0; + struct bch_fs *c = trans->c; + const __le64 *refcount = bkey_refcount_c(k); struct printbuf buf = PRINTBUF; + struct reflink_gc *r; int ret = 0; - if (metadata_only) + if (!refcount) return 0; - bch2_trans_init(&trans, c, 0, 0); + while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && + r->offset < k.k->p.offset) + ++*idx; - for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - const __le64 *refcount = bkey_refcount_c(k); + if (!r || + r->offset != k.k->p.offset || + r->size != k.k->size) { + bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); + return -EINVAL; + } - if (!refcount) - continue; + if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, + "reflink key has wrong refcount:\n" + " %s\n" + " should be %u", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf), + r->refcount)) { + struct bkey_i *new; - r = genradix_ptr(&c->reflink_gc_table, idx++); - if (!r || - r->offset != k.k->p.offset || - r->size != k.k->size) { - bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); - ret = -EINVAL; - break; - } + new = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + return ret; - if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, - "reflink key has wrong refcount:\n" - " %s\n" - " should be %u", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf), - r->refcount)) { - struct bkey_i *new; + bkey_reassemble(new, k); - new = kmalloc(bkey_bytes(k.k), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - break; - } + if (!r->refcount) + new->k.type = KEY_TYPE_deleted; + else + *bkey_refcount(new) = cpu_to_le64(r->refcount); - bkey_reassemble(new, k); + ret = bch2_trans_update(trans, iter, new, 0); + } +fsck_err: + printbuf_exit(&buf); + return ret; +} - if (!r->refcount) - new->k.type = KEY_TYPE_deleted; - else - *bkey_refcount(new) = cpu_to_le64(r->refcount); +static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + size_t idx = 0; + int ret = 0; - ret = commit_do(&trans, NULL, NULL, 0, - __bch2_btree_insert(&trans, BTREE_ID_reflink, new)); - kfree(new); + if (metadata_only) + return 0; + + bch2_trans_init(&trans, c, 0, 0); + + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_gc_write_reflink_key(&trans, &iter, k, &idx)); - if (ret) - break; - } - } -fsck_err: - bch2_trans_iter_exit(&trans, &iter); c->reflink_gc_nr = 0; bch2_trans_exit(&trans); - printbuf_exit(&buf); return ret; } @@ -1656,66 +1652,73 @@ static void bch2_gc_reflink_reset(struct bch_fs *c, bool metadata_only) r->refcount = 0; } -static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) +static int bch2_gc_write_stripes_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) { - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct gc_stripe *m; - const struct bch_stripe *s; + struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + const struct bch_stripe *s; + struct gc_stripe *m; unsigned i; int ret = 0; - if (metadata_only) + if (k.k->type != KEY_TYPE_stripe) return 0; - bch2_trans_init(&trans, c, 0, 0); + s = bkey_s_c_to_stripe(k).v; + m = genradix_ptr(&c->gc_stripes, k.k->p.offset); - for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - if (k.k->type != KEY_TYPE_stripe) - continue; - - s = bkey_s_c_to_stripe(k).v; - m = genradix_ptr(&c->gc_stripes, k.k->p.offset); - - for (i = 0; i < s->nr_blocks; i++) - if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0)) - goto inconsistent; - continue; + for (i = 0; i < s->nr_blocks; i++) + if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0)) + goto inconsistent; + return 0; inconsistent: - if (fsck_err_on(true, c, - "stripe has wrong block sector count %u:\n" - " %s\n" - " should be %u", i, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf), - m ? m->block_sectors[i] : 0)) { - struct bkey_i_stripe *new; - - new = kmalloc(bkey_bytes(k.k), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - break; - } + if (fsck_err_on(true, c, + "stripe has wrong block sector count %u:\n" + " %s\n" + " should be %u", i, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf), + m ? m->block_sectors[i] : 0)) { + struct bkey_i_stripe *new; + + new = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + return ret; - bkey_reassemble(&new->k_i, k); + bkey_reassemble(&new->k_i, k); - for (i = 0; i < new->v.nr_blocks; i++) - stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); + for (i = 0; i < new->v.nr_blocks; i++) + stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); - ret = commit_do(&trans, NULL, NULL, 0, - __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i)); - kfree(new); - } + ret = bch2_trans_update(trans, iter, &new->k_i, 0); } fsck_err: - bch2_trans_iter_exit(&trans, &iter); + printbuf_exit(&buf); + return ret; +} - bch2_trans_exit(&trans); +static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; - printbuf_exit(&buf); + if (metadata_only) + return 0; + + bch2_trans_init(&trans, c, 0, 0); + + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_stripes, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_gc_write_stripes_key(&trans, &iter, k)); + + bch2_trans_exit(&trans); return ret; } @@ -1777,7 +1780,7 @@ again: ret = bch2_gc_btrees(c, initial, metadata_only); - if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR && + if (ret == -BCH_ERR_need_topology_repair && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) && !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); @@ -1785,8 +1788,8 @@ again: ret = 0; } - if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR) - ret = FSCK_ERR_EXIT; + if (ret == -BCH_ERR_need_topology_repair) + ret = -BCH_ERR_fsck_errors_not_fixed; if (ret) goto out; @@ -1969,7 +1972,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_INSERT_NOFAIL, gc_btree_gens_key(&trans, &iter, k)); if (ret) { - bch_err(c, "error recalculating oldest_gen: %i", ret); + bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret)); goto err; } } @@ -1982,7 +1985,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_INSERT_NOFAIL, bch2_alloc_write_oldest_gen(&trans, &iter, k)); if (ret) { - bch_err(c, "error writing oldest_gen: %i", ret); + bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret)); goto err; } @@ -2054,7 +2057,7 @@ static int bch2_gc_thread(void *arg) ret = bch2_gc_gens(c); #endif if (ret < 0) - bch_err(c, "btree gc failed: %i", ret); + bch_err(c, "btree gc failed: %s", bch2_err_str(ret)); debug_check_no_locks_held(); } @@ -2084,7 +2087,7 @@ int bch2_gc_thread_start(struct bch_fs *c) p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name); if (IS_ERR(p)) { - bch_err(c, "error creating gc thread: %li", PTR_ERR(p)); + bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p))); return PTR_ERR(p); } diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 9bf3f77..ae731b3 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -543,7 +543,7 @@ enum btree_validate_ret { struct printbuf out = PRINTBUF; \ \ btree_err_msg(&out, c, ca, b, i, b->written, write); \ - prt_printf(&out, ": " msg, ##__VA_ARGS__); \ + prt_printf(&out, ": " msg, ##__VA_ARGS__); \ \ if (type == BTREE_ERR_FIXABLE && \ write == READ && \ @@ -558,7 +558,7 @@ enum btree_validate_ret { \ switch (type) { \ case BTREE_ERR_FIXABLE: \ - ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + ret = -BCH_ERR_fsck_errors_not_fixed; \ goto fsck_err; \ case BTREE_ERR_WANT_RETRY: \ if (have_retry) { \ @@ -570,7 +570,7 @@ enum btree_validate_ret { ret = BTREE_RETRY_READ; \ goto fsck_err; \ case BTREE_ERR_FATAL: \ - ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + ret = -BCH_ERR_fsck_errors_not_fixed; \ goto fsck_err; \ } \ break; \ @@ -578,7 +578,7 @@ enum btree_validate_ret { bch_err(c, "corrupt metadata before write: %s", out.buf);\ \ if (bch2_fs_inconsistent(c)) { \ - ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + ret = -BCH_ERR_fsck_errors_not_fixed; \ goto fsck_err; \ } \ break; \ diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 923381d..946c462 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -16,6 +16,7 @@ #include "replicas.h" #include "subvolume.h" +#include #include #include @@ -46,7 +47,7 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans) if (need_resched() || race_fault()) { bch2_trans_unlock(trans); schedule(); - return bch2_trans_relock(trans) ? 0 : -EINTR; + return bch2_trans_relock(trans); } else { return 0; } @@ -99,12 +100,6 @@ static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos return p; } -static inline bool is_btree_node(struct btree_path *path, unsigned l) -{ - return l < BTREE_MAX_DEPTH && - (unsigned long) path->l[l].b >= 128; -} - static inline struct bpos btree_iter_search_key(struct btree_iter *iter) { struct bpos pos = iter->pos; @@ -143,15 +138,37 @@ void bch2_btree_node_unlock_write(struct btree_trans *trans, bch2_btree_node_unlock_write_inlined(trans, path, b); } -void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) +struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans, + struct btree_path *skip, + struct btree *b, + unsigned level) { - struct btree_path *linked; - unsigned readers = 0; + struct btree_path *path; + struct six_lock_count ret = { 0, 0 }; + + if (IS_ERR_OR_NULL(b)) + return ret; + + trans_for_each_path(trans, path) + if (path != skip && path->l[level].b == b) { + ret.read += btree_node_read_locked(path, level); + ret.intent += btree_node_intent_locked(path, level); + } + + return ret; +} - trans_for_each_path(trans, linked) - if (linked->l[b->c.level].b == b && - btree_node_read_locked(linked, b->c.level)) - readers++; +static inline void six_lock_readers_add(struct six_lock *lock, int nr) +{ + if (!lock->readers) + atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter); + else + this_cpu_add(*lock->readers, nr); +} + +void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) +{ + int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->c.level).read; /* * Must drop our read locks before calling six_lock_write() - @@ -159,19 +176,9 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) * goes to 0, and it's safe because we have the node intent * locked: */ - if (!b->c.lock.readers) - atomic64_sub(__SIX_VAL(read_lock, readers), - &b->c.lock.state.counter); - else - this_cpu_sub(*b->c.lock.readers, readers); - + six_lock_readers_add(&b->c.lock, -readers); six_lock_write(&b->c.lock, NULL, NULL); - - if (!b->c.lock.readers) - atomic64_add(__SIX_VAL(read_lock, readers), - &b->c.lock.state.counter); - else - this_cpu_add(*b->c.lock.readers, readers); + six_lock_readers_add(&b->c.lock, readers); } bool __bch2_btree_node_relock(struct btree_trans *trans, @@ -193,14 +200,9 @@ bool __bch2_btree_node_relock(struct btree_trans *trans, return true; } fail: - if (b != BTREE_ITER_NO_NODE_CACHED && - b != BTREE_ITER_NO_NODE_INIT) - trace_btree_node_relock_fail(trans->fn, _RET_IP_, - path->btree_id, - &path->pos, - (unsigned long) b, - path->l[level].lock_seq, - is_btree_node(path, level) ? b->c.lock.state.seq : 0); + if (b != ERR_PTR(-BCH_ERR_no_btree_node_cached) && + b != ERR_PTR(-BCH_ERR_no_btree_node_init)) + trace_btree_node_relock_fail(trans, _RET_IP_, path, level); return false; } @@ -236,10 +238,11 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans, if (btree_node_lock_seq_matches(path, b, level) && btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) { - btree_node_unlock(path, level); + btree_node_unlock(trans, path, level); goto success; } + trace_btree_node_upgrade_fail(trans, _RET_IP_, path, level); return false; success: mark_btree_node_intent_locked(trans, path, level); @@ -271,11 +274,13 @@ static inline bool btree_path_get_locks(struct btree_trans *trans, * the node that we failed to relock: */ if (fail_idx >= 0) { - __bch2_btree_path_unlock(path); + __bch2_btree_path_unlock(trans, path); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); do { - path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; + path->l[fail_idx].b = upgrade + ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade) + : ERR_PTR(-BCH_ERR_no_btree_node_relock); --fail_idx; } while (fail_idx >= 0); } @@ -297,13 +302,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, } /* Slowpath: */ -bool __bch2_btree_node_lock(struct btree_trans *trans, - struct btree_path *path, - struct btree *b, - struct bpos pos, unsigned level, - enum six_lock_type type, - six_lock_should_sleep_fn should_sleep_fn, void *p, - unsigned long ip) +int __bch2_btree_node_lock(struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct bpos pos, unsigned level, + enum six_lock_type type, + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip) { struct btree_path *linked; unsigned reason; @@ -373,16 +378,8 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, return btree_node_lock_type(trans, path, b, pos, level, type, should_sleep_fn, p); deadlock: - trace_trans_restart_would_deadlock(trans->fn, ip, - trans->in_traverse_all, reason, - linked->btree_id, - linked->cached, - &linked->pos, - path->btree_id, - path->cached, - &pos); - btree_trans_restart(trans); - return false; + trace_trans_restart_would_deadlock(trans, ip, reason, linked, path, &pos); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } /* Btree iterator locking: */ @@ -420,8 +417,8 @@ static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} /* * Only for btree_cache.c - only relocks intent locks */ -bool bch2_btree_path_relock_intent(struct btree_trans *trans, - struct btree_path *path) +int bch2_btree_path_relock_intent(struct btree_trans *trans, + struct btree_path *path) { unsigned l; @@ -429,30 +426,32 @@ bool bch2_btree_path_relock_intent(struct btree_trans *trans, l < path->locks_want && btree_path_node(path, l); l++) { if (!bch2_btree_node_relock(trans, path, l)) { - __bch2_btree_path_unlock(path); + __bch2_btree_path_unlock(trans, path); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); - trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_, - path->btree_id, &path->pos); - btree_trans_restart(trans); - return false; + trace_trans_restart_relock_path_intent(trans, _RET_IP_, path); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent); } } - return true; + return 0; } __flatten -static bool bch2_btree_path_relock(struct btree_trans *trans, +static bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path, unsigned long trace_ip) { - bool ret = btree_path_get_locks(trans, path, false); + return btree_path_get_locks(trans, path, false); +} - if (!ret) { - trace_trans_restart_relock_path(trans->fn, trace_ip, - path->btree_id, &path->pos); - btree_trans_restart(trans); +static int bch2_btree_path_relock(struct btree_trans *trans, + struct btree_path *path, unsigned long trace_ip) +{ + if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) { + trace_trans_restart_relock_path(trans, trace_ip, path); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path); } - return ret; + + return 0; } bool __bch2_btree_path_upgrade(struct btree_trans *trans, @@ -500,7 +499,8 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, return false; } -void __bch2_btree_path_downgrade(struct btree_path *path, +void __bch2_btree_path_downgrade(struct btree_trans *trans, + struct btree_path *path, unsigned new_locks_want) { unsigned l; @@ -512,7 +512,7 @@ void __bch2_btree_path_downgrade(struct btree_path *path, while (path->nodes_locked && (l = __fls(path->nodes_locked)) >= path->locks_want) { if (l > path->level) { - btree_node_unlock(path, l); + btree_node_unlock(trans, path, l); } else { if (btree_node_intent_locked(path, l)) { six_lock_downgrade(&path->l[l].b->c.lock); @@ -530,27 +530,26 @@ void bch2_trans_downgrade(struct btree_trans *trans) struct btree_path *path; trans_for_each_path(trans, path) - bch2_btree_path_downgrade(path); + bch2_btree_path_downgrade(trans, path); } /* Btree transaction locking: */ -bool bch2_trans_relock(struct btree_trans *trans) +int bch2_trans_relock(struct btree_trans *trans) { struct btree_path *path; if (unlikely(trans->restarted)) - return false; + return -BCH_ERR_transaction_restart_relock; trans_for_each_path(trans, path) if (path->should_be_locked && - !bch2_btree_path_relock(trans, path, _RET_IP_)) { - trace_trans_restart_relock(trans->fn, _RET_IP_, - path->btree_id, &path->pos); + bch2_btree_path_relock(trans, path, _RET_IP_)) { + trace_trans_restart_relock(trans, _RET_IP_, path); BUG_ON(!trans->restarted); - return false; + return -BCH_ERR_transaction_restart_relock; } - return true; + return 0; } void bch2_trans_unlock(struct btree_trans *trans) @@ -558,7 +557,7 @@ void bch2_trans_unlock(struct btree_trans *trans) struct btree_path *path; trans_for_each_path(trans, path) - __bch2_btree_path_unlock(path); + __bch2_btree_path_unlock(trans, path); /* * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking @@ -586,7 +585,7 @@ static void bch2_btree_path_verify_cached(struct btree_trans *trans, bkey_cmp(ck->key.pos, path->pos)); if (!locked) - btree_node_unlock(path, 0); + btree_node_unlock(trans, path, 0); } static void bch2_btree_path_verify_level(struct btree_trans *trans, @@ -643,7 +642,7 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans, } if (!locked) - btree_node_unlock(path, level); + btree_node_unlock(trans, path, level); return; err: bch2_bpos_to_text(&buf1, path->pos); @@ -1020,27 +1019,29 @@ static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c, bch2_btree_node_iter_peek_all(&l->iter, l->b)); } -static inline struct bkey_s_c btree_path_level_peek(struct bch_fs *c, +static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans, struct btree_path *path, struct btree_path_level *l, struct bkey *u) { - struct bkey_s_c k = __btree_iter_unpack(c, l, u, + struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, bch2_btree_node_iter_peek(&l->iter, l->b)); path->pos = k.k ? k.k->p : l->b->key.k.p; + bch2_btree_path_verify_level(trans, path, l - path->l); return k; } -static inline struct bkey_s_c btree_path_level_prev(struct bch_fs *c, +static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans, struct btree_path *path, struct btree_path_level *l, struct bkey *u) { - struct bkey_s_c k = __btree_iter_unpack(c, l, u, + struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, bch2_btree_node_iter_prev(&l->iter, l->b)); path->pos = k.k ? k.k->p : l->b->data->min_key; + bch2_btree_path_verify_level(trans, path, l - path->l); return k; } @@ -1115,7 +1116,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans, } if (!parent_locked) - btree_node_unlock(path, plevel); + btree_node_unlock(trans, path, plevel); } static inline void __btree_path_level_init(struct btree_path *path, @@ -1167,7 +1168,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) if (path->nodes_locked && t != BTREE_NODE_UNLOCKED) { - btree_node_unlock(path, b->c.level); + btree_node_unlock(trans, path, b->c.level); six_lock_increment(&b->c.lock, t); mark_btree_node_locked(trans, path, b->c.level, t); } @@ -1195,7 +1196,9 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) struct btree *b = container_of(lock, struct btree, c.lock); struct btree **rootp = p; - return b == *rootp ? 0 : -1; + if (b != *rootp) + return BCH_ERR_lock_fail_root_changed; + return 0; } static inline int btree_path_lock_root(struct btree_trans *trans, @@ -1207,6 +1210,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, struct btree *b, **rootp = &c->btree_roots[path->btree_id].b; enum six_lock_type lock_type; unsigned i; + int ret; EBUG_ON(path->nodes_locked); @@ -1228,20 +1232,23 @@ static inline int btree_path_lock_root(struct btree_trans *trans, } lock_type = __btree_lock_want(path, path->level); - if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX, - path->level, lock_type, - lock_root_check_fn, rootp, - trace_ip))) { - if (trans->restarted) - return -EINTR; - continue; + ret = btree_node_lock(trans, path, b, SPOS_MAX, + path->level, lock_type, + lock_root_check_fn, rootp, + trace_ip); + if (unlikely(ret)) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) + continue; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + BUG(); } if (likely(b == READ_ONCE(*rootp) && b->c.level == path->level && !race_fault())) { for (i = 0; i < path->level; i++) - path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; + path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root); path->l[path->level].b = b; for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) path->l[i].b = NULL; @@ -1286,7 +1293,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat } if (!was_locked) - btree_node_unlock(path, path->level); + btree_node_unlock(trans, path, path->level); bch2_bkey_buf_exit(&tmp, c); return ret; @@ -1321,7 +1328,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p } if (!was_locked) - btree_node_unlock(path, path->level); + btree_node_unlock(trans, path, path->level); bch2_bkey_buf_exit(&tmp, c); return ret; @@ -1346,7 +1353,7 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, bp->mem_ptr = (unsigned long)b; if (!locked) - btree_node_unlock(path, plevel); + btree_node_unlock(trans, path, plevel); } static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, @@ -1419,7 +1426,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans, btree_node_mem_ptr_set(trans, path, level + 1, b); if (btree_node_read_locked(path, level + 1)) - btree_node_unlock(path, level + 1); + btree_node_unlock(trans, path, level + 1); path->level = level; bch2_btree_path_verify_locks(path); @@ -1439,11 +1446,11 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) int i, ret = 0; if (trans->in_traverse_all) - return -EINTR; + return -BCH_ERR_transaction_restart_in_traverse_all; trans->in_traverse_all = true; retry_all: - trans->restarted = false; + trans->restarted = 0; trans->traverse_all_idx = U8_MAX; trans_for_each_path(trans, path) @@ -1487,7 +1494,8 @@ retry_all: */ if (path->uptodate) { ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_); - if (ret == -EINTR || ret == -ENOMEM) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + ret == -ENOMEM) goto retry_all; if (ret) goto err; @@ -1509,7 +1517,7 @@ err: trans->in_traverse_all = false; - trace_trans_traverse_all(trans->fn, trace_ip); + trace_trans_traverse_all(trans, trace_ip); return ret; } @@ -1528,14 +1536,6 @@ static inline bool btree_path_good_node(struct btree_trans *trans, return true; } -static void btree_path_set_level_up(struct btree_path *path) -{ - btree_node_unlock(path, path->level); - path->l[path->level].b = BTREE_ITER_NO_NODE_UP; - path->level++; - btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -} - static void btree_path_set_level_down(struct btree_trans *trans, struct btree_path *path, unsigned new_level) @@ -1546,7 +1546,7 @@ static void btree_path_set_level_down(struct btree_trans *trans, for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++) if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED) - btree_node_unlock(path, l); + btree_node_unlock(trans, path, l); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); bch2_btree_path_verify(trans, path); @@ -1559,22 +1559,16 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, unsigned i, l = path->level; while (btree_path_node(path, l) && - !btree_path_good_node(trans, path, l, check_pos)) { - btree_node_unlock(path, l); - path->l[l].b = BTREE_ITER_NO_NODE_UP; - l++; - } + !btree_path_good_node(trans, path, l, check_pos)) + __btree_path_set_level_up(trans, path, l++); /* If we need intent locks, take them too: */ for (i = l + 1; i < path->locks_want && btree_path_node(path, i); i++) if (!bch2_btree_node_relock(trans, path, i)) - while (l <= i) { - btree_node_unlock(path, l); - path->l[l].b = BTREE_ITER_NO_NODE_UP; - l++; - } + while (l <= i) + __btree_path_set_level_up(trans, path, l++); return l; } @@ -1594,19 +1588,17 @@ static int btree_path_traverse_one(struct btree_trans *trans, unsigned long trace_ip) { unsigned depth_want = path->level; - int ret = 0; + int ret = trans->restarted; - if (unlikely(trans->restarted)) { - ret = -EINTR; + if (unlikely(ret)) goto out; - } /* * Ensure we obey path->should_be_locked: if it's set, we can't unlock * and re-traverse the path without a transaction restart: */ if (path->should_be_locked) { - ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR; + ret = bch2_btree_path_relock(trans, path, trace_ip); goto out; } @@ -1640,22 +1632,16 @@ static int btree_path_traverse_one(struct btree_trans *trans, goto out; } - __bch2_btree_path_unlock(path); + __bch2_btree_path_unlock(trans, path); path->level = depth_want; - - if (ret == -EIO) - path->l[path->level].b = - BTREE_ITER_NO_NODE_ERROR; - else - path->l[path->level].b = - BTREE_ITER_NO_NODE_DOWN; + path->l[path->level].b = ERR_PTR(ret); goto out; } } path->uptodate = BTREE_ITER_UPTODATE; out: - BUG_ON((ret == -EINTR) != !!trans->restarted); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); bch2_btree_path_verify(trans, path); return ret; } @@ -1663,6 +1649,16 @@ out: int __must_check bch2_btree_path_traverse(struct btree_trans *trans, struct btree_path *path, unsigned flags) { + if (0 && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { + unsigned restart_probability_bits = 4 << min(trans->restart_count, 32U); + u64 mask = ~(~0ULL << restart_probability_bits); + + if ((prandom_u32() & mask) == mask) { + trace_transaction_restart_injected(trans, _RET_IP_); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); + } + } + if (path->uptodate < BTREE_ITER_NEED_RELOCK) return 0; @@ -1737,8 +1733,8 @@ bch2_btree_path_set_pos(struct btree_trans *trans, bch2_btree_path_check_sort(trans, path, cmp); if (unlikely(path->cached)) { - btree_node_unlock(path, 0); - path->l[0].b = BTREE_ITER_NO_NODE_CACHED; + btree_node_unlock(trans, path, 0); + path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); goto out; } @@ -1760,7 +1756,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans, if (l != path->level) { btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); - __bch2_btree_path_unlock(path); + __bch2_btree_path_unlock(trans, path); } out: bch2_btree_path_verify(trans, path); @@ -1771,37 +1767,37 @@ out: static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) { - struct btree_path *next; + struct btree_path *sib; - next = prev_btree_path(trans, path); - if (next && !btree_path_cmp(next, path)) - return next; + sib = prev_btree_path(trans, path); + if (sib && !btree_path_cmp(sib, path)) + return sib; - next = next_btree_path(trans, path); - if (next && !btree_path_cmp(next, path)) - return next; + sib = next_btree_path(trans, path); + if (sib && !btree_path_cmp(sib, path)) + return sib; return NULL; } static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path) { - struct btree_path *next; + struct btree_path *sib; - next = prev_btree_path(trans, path); - if (next && next->level == path->level && path_l(next)->b == path_l(path)->b) - return next; + sib = prev_btree_path(trans, path); + if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b) + return sib; - next = next_btree_path(trans, path); - if (next && next->level == path->level && path_l(next)->b == path_l(path)->b) - return next; + sib = next_btree_path(trans, path); + if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b) + return sib; return NULL; } static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path) { - __bch2_btree_path_unlock(path); + __bch2_btree_path_unlock(trans, path); btree_path_list_remove(trans, path); trans->paths_allocated &= ~(1ULL << path->idx); } @@ -1816,26 +1812,23 @@ void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool inte if (!__btree_path_put(path, intent)) return; - /* - * Perhaps instead we should check for duplicate paths in traverse_all: - */ - if (path->preserve && - (dup = have_path_at_pos(trans, path))) { - dup->preserve = true; - path->preserve = false; - goto free; - } + dup = path->preserve + ? have_path_at_pos(trans, path) + : have_node_at_pos(trans, path); + + if (!dup && !(!path->preserve && !is_btree_node(path, path->level))) + return; - if (!path->preserve && - (dup = have_node_at_pos(trans, path))) - goto free; - return; -free: if (path->should_be_locked && - !btree_node_locked(dup, path->level)) + !trans->restarted && + (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_))) return; - dup->should_be_locked |= path->should_be_locked; + if (dup) { + dup->preserve |= path->preserve; + dup->should_be_locked |= path->should_be_locked; + } + __bch2_path_free(trans, path); } @@ -1891,10 +1884,10 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) bch2_bpos_to_text(&buf, path->pos); - printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n", + printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n", path->idx, path->ref, path->intent_ref, - path->should_be_locked ? " S" : "", - path->preserve ? " P" : "", + path->preserve ? 'P' : ' ', + path->should_be_locked ? 'S' : ' ', bch2_btree_ids[path->btree_id], path->level, buf.buf, @@ -1947,6 +1940,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans, struct btree_path *path, *path_pos = NULL; bool cached = flags & BTREE_ITER_CACHED; bool intent = flags & BTREE_ITER_INTENT; + bool have_dup = false; int i; BUG_ON(trans->restarted); @@ -1954,14 +1948,24 @@ struct btree_path *bch2_path_get(struct btree_trans *trans, bch2_trans_verify_locks(trans); trans_for_each_path_inorder(trans, path, i) { - if (__btree_path_cmp(path, - btree_id, - cached, - pos, - level) > 0) + int cmp = __btree_path_cmp(path, + btree_id, + cached, + pos, + level); + if (cmp > 0) break; path_pos = path; + + if (cmp == 0) { + if (path->ref || path->preserve) { + path->preserve = true; + have_dup = true; + } else { + break; + } + } } if (path_pos && @@ -1985,14 +1989,14 @@ struct btree_path *bch2_path_get(struct btree_trans *trans, path->nodes_locked = 0; path->nodes_intent_locked = 0; for (i = 0; i < ARRAY_SIZE(path->l); i++) - path->l[i].b = BTREE_ITER_NO_NODE_INIT; + path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init); #ifdef CONFIG_BCACHEFS_DEBUG path->ip_allocated = ip; #endif btree_trans_verify_sorted(trans); } - if (!(flags & BTREE_ITER_NOPRESERVE)) + if (!(flags & BTREE_ITER_NOPRESERVE) && !have_dup) path->preserve = true; if (path->intent_ref) @@ -2039,11 +2043,7 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct EBUG_ON(ck && (path->btree_id != ck->key.btree_id || bkey_cmp(path->pos, ck->key.pos))); - - /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */ - if (unlikely(!ck || !ck->valid)) - return bkey_s_c_null; - + EBUG_ON(!ck || !ck->valid); EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); *u = ck->k->k; @@ -2079,7 +2079,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) if (ret) return ret; - iter->path->should_be_locked = true; + btree_path_set_should_be_locked(iter->path); return 0; } @@ -2110,8 +2110,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - iter->path->should_be_locked = true; - BUG_ON(iter->path->uptodate); + btree_path_set_should_be_locked(iter->path); out: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -2139,28 +2138,24 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) /* got to end? */ if (!btree_path_node(path, path->level + 1)) { - btree_path_set_level_up(path); + btree_path_set_level_up(trans, path); return NULL; } if (!bch2_btree_node_relock(trans, path, path->level + 1)) { - __bch2_btree_path_unlock(path); - path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS; - path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS; + __bch2_btree_path_unlock(trans, path); + path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); + path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); - trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_, - path->btree_id, &path->pos); - btree_trans_restart(trans); - ret = -EINTR; + trace_trans_restart_relock_next_node(trans, _THIS_IP_, path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); goto err; } b = btree_path_node(path, path->level + 1); if (!bpos_cmp(iter->pos, b->key.k.p)) { - btree_node_unlock(path, path->level); - path->l[path->level].b = BTREE_ITER_NO_NODE_UP; - path->level++; + __btree_path_set_level_up(trans, path, path->level++); } else { /* * Haven't gotten to the end of the parent node: go back down to @@ -2186,7 +2181,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - iter->path->should_be_locked = true; + btree_path_set_should_be_locked(iter->path); BUG_ON(iter->path->uptodate); out: bch2_btree_iter_verify_entry_exit(iter); @@ -2328,7 +2323,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos if (unlikely(ret)) return bkey_s_c_err(ret); - iter->key_cache_path->should_be_locked = true; + btree_path_set_should_be_locked(iter->key_cache_path); return bch2_btree_path_peek_slot(iter->key_cache_path, &u); } @@ -2356,7 +2351,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp goto out; } - iter->path->should_be_locked = true; + btree_path_set_should_be_locked(iter->path); k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); @@ -2444,7 +2439,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e while (1) { k = __bch2_btree_iter_peek(iter, search_key); if (!k.k || bkey_err(k)) - goto out; + goto out_no_locked; /* * iter->pos should be mononotically increasing, and always be @@ -2461,7 +2456,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e if (bkey_cmp(iter_pos, end) > 0) { bch2_btree_iter_set_pos(iter, end); k = bkey_s_c_null; - goto out; + goto out_no_locked; } if (iter->update_path && @@ -2523,18 +2518,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - BUG_ON(!iter->path->nodes_locked); -out: + + btree_path_set_should_be_locked(iter->path); +out_no_locked: if (iter->update_path) { if (iter->update_path->uptodate && - !bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)) { - k = bkey_s_c_err(-EINTR); - } else { - BUG_ON(!(iter->update_path->nodes_locked & 1)); - iter->update_path->should_be_locked = true; - } + (ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_))) + k = bkey_s_c_err(ret); + else + btree_path_set_should_be_locked(iter->update_path); } - iter->path->should_be_locked = true; if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) iter->pos.snapshot = iter->snapshot; @@ -2578,13 +2571,13 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter) /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); k = bkey_s_c_err(ret); - goto out; + goto out_no_locked; } /* Already at end? */ if (!btree_path_node(iter->path, iter->path->level)) { k = bkey_s_c_null; - goto out; + goto out_no_locked; } k = btree_path_level_peek_all(trans->c, @@ -2595,7 +2588,7 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter) (iter->advanced && !bpos_cmp(path_l(iter->path)->b->key.k.p, iter->pos))) { iter->pos = path_l(iter->path)->b->key.k.p; - btree_path_set_level_up(iter->path); + btree_path_set_level_up(trans, iter->path); iter->advanced = false; continue; } @@ -2637,8 +2630,8 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter) } iter->pos = k.k->p; -out: - iter->path->should_be_locked = true; + btree_path_set_should_be_locked(iter->path); +out_no_locked: bch2_btree_iter_verify(iter); return k; @@ -2692,16 +2685,16 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); k = bkey_s_c_err(ret); - goto out; + goto out_no_locked; } - k = btree_path_level_peek(trans->c, iter->path, + k = btree_path_level_peek(trans, iter->path, &iter->path->l[0], &iter->k); if (!k.k || ((iter->flags & BTREE_ITER_IS_EXTENTS) ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0 : bpos_cmp(k.k->p, search_key) > 0)) - k = btree_path_level_prev(trans->c, iter->path, + k = btree_path_level_prev(trans, iter->path, &iter->path->l[0], &iter->k); bch2_btree_path_check_sort(trans, iter->path, 0); @@ -2758,7 +2751,7 @@ got_key: /* Start of btree: */ bch2_btree_iter_set_pos(iter, POS_MIN); k = bkey_s_c_null; - goto out; + goto out_no_locked; } } @@ -2770,10 +2763,11 @@ got_key: if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) iter->pos.snapshot = iter->snapshot; -out: + + btree_path_set_should_be_locked(iter->path); +out_no_locked: if (saved_path) bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT); - iter->path->should_be_locked = true; bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -2846,9 +2840,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { - if (!bkey_err(k)) + if (bkey_err(k)) { + goto out_no_locked; + } else { iter->k = *k.k; - goto out; + goto out; + } } k = bch2_btree_path_peek_slot(iter->path, &iter->k); @@ -2902,8 +2899,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } } out: - iter->path->should_be_locked = true; - + btree_path_set_should_be_locked(iter->path); +out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); ret = bch2_btree_iter_verify_ret(iter, k); @@ -3184,9 +3181,8 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) trans->mem_bytes = new_bytes; if (old_bytes) { - trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + trace_trans_restart_mem_realloced(trans, _RET_IP_, new_bytes); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); } } @@ -3200,11 +3196,11 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset * - * While iterating over nodes or updating nodes a attempt to lock a btree - * node may return EINTR when the trylock fails. When this occurs - * bch2_trans_begin() should be called and the transaction retried. + * While iterating over nodes or updating nodes a attempt to lock a btree node + * may return BCH_ERR_transaction_restart when the trylock fails. When this + * occurs bch2_trans_begin() should be called and the transaction retried. */ -void bch2_trans_begin(struct btree_trans *trans) +u32 bch2_trans_begin(struct btree_trans *trans) { struct btree_path *path; @@ -3250,11 +3246,20 @@ void bch2_trans_begin(struct btree_trans *trans) bch2_trans_relock(trans); } + trans->last_restarted_ip = _RET_IP_; if (trans->restarted) bch2_btree_path_traverse_all(trans); - trans->restarted = false; trans->last_begin_time = ktime_get_ns(); + return trans->restart_count; +} + +void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count) +{ + bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans, + "trans->restart_count %u, should be %u, last restarted by %ps\n", + trans->restart_count, restart_count, + (void *) trans->last_restarted_ip); } static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) @@ -3291,6 +3296,15 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, trans->last_begin_time = ktime_get_ns(); trans->task = current; + while (c->lock_held_stats.names[trans->lock_name_idx] != fn + && c->lock_held_stats.names[trans->lock_name_idx] != 0) + trans->lock_name_idx++; + + if (trans->lock_name_idx >= BCH_LOCK_TIME_NR) + pr_warn_once("lock_times array not big enough!"); + else + c->lock_held_stats.names[trans->lock_name_idx] = fn; + bch2_trans_alloc_paths(trans, c); if (expected_mem_bytes) { @@ -3393,18 +3407,18 @@ void bch2_trans_exit(struct btree_trans *trans) static void __maybe_unused bch2_btree_path_node_to_text(struct printbuf *out, - struct btree_bkey_cached_common *_b, + struct btree_bkey_cached_common *b, bool cached) { prt_printf(out, " l=%u %s:", - _b->level, bch2_btree_ids[_b->btree_id]); - bch2_bpos_to_text(out, btree_node_pos(_b, cached)); + b->level, bch2_btree_ids[b->btree_id]); + bch2_bpos_to_text(out, btree_node_pos(b, cached)); } void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) { struct btree_path *path; - struct btree *b; + struct btree_bkey_cached_common *b; static char lock_types[] = { 'r', 'i', 'w' }; unsigned l; @@ -3423,12 +3437,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) prt_printf(out, "\n"); for (l = 0; l < BTREE_MAX_DEPTH; l++) { - if (btree_node_locked(path, l)) { + if (btree_node_locked(path, l) && + !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) { prt_printf(out, " %s l=%u ", btree_node_intent_locked(path, l) ? "i" : "r", l); - bch2_btree_path_node_to_text(out, - (void *) path->l[l].b, - path->cached); + bch2_btree_path_node_to_text(out, b, path->cached); prt_printf(out, "\n"); } } @@ -3446,8 +3459,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) bch2_bpos_to_text(out, trans->locking_pos); prt_printf(out, " node "); - bch2_btree_path_node_to_text(out, - (void *) b, path->cached); + bch2_btree_path_node_to_text(out, b, path->cached); prt_printf(out, "\n"); } } diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 4b9d03b..f38fd25 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -5,6 +5,8 @@ #include "bset.h" #include "btree_types.h" +#include + static inline void __btree_path_get(struct btree_path *path, bool intent) { path->ref++; @@ -159,19 +161,36 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *, struct btree *, struct btree_node_iter *, struct bkey_packed *, unsigned, unsigned); -bool bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); +int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); void bch2_path_put(struct btree_trans *, struct btree_path *, bool); -bool bch2_trans_relock(struct btree_trans *); +int bch2_trans_relock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); +static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count) +{ + return restart_count != trans->restart_count; +} + +void bch2_trans_verify_not_restarted(struct btree_trans *, u32); + +__always_inline +static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int err) +{ + BUG_ON(err <= 0); + BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart)); + + trans->restarted = err; + trans->restart_count++; + return -err; +} + __always_inline -static inline int btree_trans_restart(struct btree_trans *trans) +static inline int btree_trans_restart(struct btree_trans *trans, int err) { - trans->restarted = true; - bch2_trans_unlock(trans); - return -EINTR; + btree_trans_restart_nounlock(trans, err); + return -err; } bool bch2_btree_node_upgrade(struct btree_trans *, @@ -191,14 +210,15 @@ static inline bool bch2_btree_path_upgrade(struct btree_trans *trans, : path->uptodate == BTREE_ITER_UPTODATE; } -void __bch2_btree_path_downgrade(struct btree_path *, unsigned); +void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned); -static inline void bch2_btree_path_downgrade(struct btree_path *path) +static inline void bch2_btree_path_downgrade(struct btree_trans *trans, + struct btree_path *path) { unsigned new_locks_want = path->level + !!path->intent_ref; if (path->locks_want > new_locks_want) - __bch2_btree_path_downgrade(path, new_locks_want); + __bch2_btree_path_downgrade(trans, path, new_locks_want); } void bch2_trans_downgrade(struct btree_trans *); @@ -279,11 +299,12 @@ void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); static inline void set_btree_iter_dontneed(struct btree_iter *iter) { - iter->path->preserve = false; + if (!iter->trans->restarted) + iter->path->preserve = false; } void *bch2_trans_kmalloc(struct btree_trans *, size_t); -void bch2_trans_begin(struct btree_trans *); +u32 bch2_trans_begin(struct btree_trans *); static inline struct btree * __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter) @@ -291,7 +312,7 @@ __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter struct btree *b; while (b = bch2_btree_iter_peek_node(iter), - PTR_ERR_OR_ZERO(b) == -EINTR) + bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) bch2_trans_begin(trans); return b; @@ -315,6 +336,15 @@ static inline int bkey_err(struct bkey_s_c k) return PTR_ERR_OR_ZERO(k.k); } +static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, + unsigned flags) +{ + BUG_ON(flags & BTREE_ITER_ALL_LEVELS); + + return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : + bch2_btree_iter_peek_prev(iter); +} + static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, unsigned flags) { @@ -338,8 +368,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter * static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2 - ? -EINTR : 0; + if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) { + trace_trans_restart_too_many_iters(trans, _THIS_IP_); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); + } + + return 0; } static inline struct bkey_s_c @@ -350,12 +384,52 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, while (btree_trans_too_many_iters(trans) || (k = bch2_btree_iter_peek_type(iter, flags), - bkey_err(k) == -EINTR)) + bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) bch2_trans_begin(trans); return k; } +#define lockrestart_do(_trans, _do) \ +({ \ + int _ret; \ + \ + do { \ + bch2_trans_begin(_trans); \ + _ret = (_do); \ + } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \ + \ + _ret; \ +}) + +/* + * nested_lockrestart_do(), nested_commit_do(): + * + * These are like lockrestart_do() and commit_do(), with two differences: + * + * - We don't call bch2_trans_begin() unless we had a transaction restart + * - We return -BCH_ERR_transaction_restart_nested if we succeeded after a + * transaction restart + */ +#define nested_lockrestart_do(_trans, _do) \ +({ \ + u32 _restart_count, _orig_restart_count; \ + int _ret; \ + \ + _restart_count = _orig_restart_count = (_trans)->restart_count; \ + \ + while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\ + _restart_count = bch2_trans_begin(_trans); \ + \ + if (!_ret) \ + bch2_trans_verify_not_restarted(_trans, _restart_count);\ + \ + if (!_ret && trans_was_restarted(_trans, _orig_restart_count)) \ + _ret = -BCH_ERR_transaction_restart_nested; \ + \ + _ret; \ +}) + #define for_each_btree_key2(_trans, _iter, _btree_id, \ _start, _flags, _k, _do) \ ({ \ @@ -364,7 +438,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ \ - do { \ + while (1) { \ bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ if (!(_k).k) { \ @@ -373,9 +447,42 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, } \ \ _ret = bkey_err(_k) ?: (_do); \ - if (!_ret) \ - bch2_btree_iter_advance(&(_iter)); \ - } while (_ret == 0 || _ret == -EINTR); \ + if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + continue; \ + if (_ret) \ + break; \ + if (!bch2_btree_iter_advance(&(_iter))) \ + break; \ + } \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret; \ +}) + +#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ + _start, _flags, _k, _do) \ +({ \ + int _ret = 0; \ + \ + bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + \ + while (1) { \ + bch2_trans_begin(_trans); \ + (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ + if (!(_k).k) { \ + _ret = 0; \ + break; \ + } \ + \ + _ret = bkey_err(_k) ?: (_do); \ + if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + continue; \ + if (_ret) \ + break; \ + if (!bch2_btree_iter_rewind(&(_iter))) \ + break; \ + } \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret; \ diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index a5b0a95..fa90581 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -5,6 +5,7 @@ #include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update.h" +#include "errcode.h" #include "error.h" #include "journal.h" #include "journal_reclaim.h" @@ -290,9 +291,8 @@ static int btree_key_cache_fill(struct btree_trans *trans, k = bch2_btree_path_peek_slot(path, &u); if (!bch2_btree_node_relock(trans, ck_path, 0)) { - trace_trans_restart_relock_key_cache_fill(trans->fn, - _THIS_IP_, ck_path->btree_id, &ck_path->pos); - ret = btree_trans_restart(trans); + trace_trans_restart_relock_key_cache_fill(trans, _THIS_IP_, ck_path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); goto err; } @@ -347,8 +347,10 @@ static int bkey_cached_check_fn(struct six_lock *lock, void *p) struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock); const struct btree_path *path = p; - return ck->key.btree_id == path->btree_id && - !bpos_cmp(ck->key.pos, path->pos) ? 0 : -1; + if (ck->key.btree_id != path->btree_id && + bpos_cmp(ck->key.pos, path->pos)) + return BCH_ERR_lock_fail_node_reused; + return 0; } __flatten @@ -370,11 +372,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { - if (flags & BTREE_ITER_CACHED_NOCREATE) { - path->l[0].b = NULL; - return 0; - } - ck = btree_key_cache_create(c, path->btree_id, path->pos); ret = PTR_ERR_OR_ZERO(ck); if (ret) @@ -387,14 +384,15 @@ retry: } else { enum six_lock_type lock_want = __btree_lock_want(path, 0); - if (!btree_node_lock(trans, path, (void *) ck, path->pos, 0, - lock_want, - bkey_cached_check_fn, path, _THIS_IP_)) { - if (!trans->restarted) + ret = btree_node_lock(trans, path, (void *) ck, path->pos, 0, + lock_want, + bkey_cached_check_fn, path, _THIS_IP_); + if (ret) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused)) goto retry; - - ret = -EINTR; - goto err; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto err; + BUG(); } if (ck->key.btree_id != path->btree_id || @@ -409,11 +407,15 @@ retry: path->l[0].lock_seq = ck->c.lock.state.seq; path->l[0].b = (void *) ck; fill: - if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { + if (!ck->valid) { + /* + * Using the underscore version because we haven't set + * path->uptodate yet: + */ if (!path->locks_want && !__bch2_btree_path_upgrade(trans, path, 1)) { - trace_transaction_restart_ip(trans->fn, _THIS_IP_); - ret = btree_trans_restart(trans); + trace_transaction_restart_key_cache_upgrade(trans, _THIS_IP_); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade); goto err; } @@ -426,13 +428,14 @@ fill: set_bit(BKEY_CACHED_ACCESSED, &ck->flags); path->uptodate = BTREE_ITER_UPTODATE; + BUG_ON(!ck->valid); BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); return ret; err: - if (ret != -EINTR) { - btree_node_unlock(path, 0); - path->l[0].b = BTREE_ITER_NO_NODE_ERROR; + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + btree_node_unlock(trans, path, 0); + path->l[0].b = ERR_PTR(ret); } return ret; } @@ -455,8 +458,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_ITER_ALL_SNAPSHOTS); bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_CACHED_NOCREATE| BTREE_ITER_INTENT); b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE; @@ -497,13 +498,14 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, ? JOURNAL_WATERMARK_reserved : 0)| commit_flags); - if (ret) { - bch2_fs_fatal_err_on(ret != -EINTR && - ret != -EAGAIN && - !bch2_journal_error(j), c, - "error flushing key cache: %i", ret); + + bch2_fs_fatal_err_on(ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && + !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && + !bch2_journal_error(j), c, + "error flushing key cache: %s", bch2_err_str(ret)); + if (ret) goto out; - } bch2_journal_pin_drop(j, &ck->journal); bch2_journal_preres_put(j, &ck->res); diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index 67c970d..c3f3cb8 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -14,6 +14,11 @@ #include "btree_iter.h" +static inline bool is_btree_node(struct btree_path *path, unsigned l) +{ + return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b); +} + /* matches six lock types */ enum btree_node_locked_type { BTREE_NODE_UNLOCKED = -1, @@ -58,7 +63,7 @@ static inline void mark_btree_node_unlocked(struct btree_path *path, path->nodes_intent_locked &= ~(1 << level); } -static inline void mark_btree_node_locked(struct btree_trans *trans, +static inline void mark_btree_node_locked_noreset(struct btree_trans *trans, struct btree_path *path, unsigned level, enum six_lock_type type) @@ -73,11 +78,22 @@ static inline void mark_btree_node_locked(struct btree_trans *trans, path->nodes_intent_locked |= type << level; } +static inline void mark_btree_node_locked(struct btree_trans *trans, + struct btree_path *path, + unsigned level, + enum six_lock_type type) +{ + mark_btree_node_locked_noreset(trans, path, level, type); +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + path->l[level].lock_taken_time = ktime_get_ns(); +#endif +} + static inline void mark_btree_node_intent_locked(struct btree_trans *trans, struct btree_path *path, unsigned level) { - mark_btree_node_locked(trans, path, level, SIX_LOCK_intent); + mark_btree_node_locked_noreset(trans, path, level, SIX_LOCK_intent); } static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level) @@ -99,23 +115,35 @@ btree_lock_want(struct btree_path *path, int level) return BTREE_NODE_UNLOCKED; } -static inline void btree_node_unlock(struct btree_path *path, unsigned level) +static inline void btree_node_unlock(struct btree_trans *trans, + struct btree_path *path, unsigned level) { int lock_type = btree_node_locked_type(path, level); EBUG_ON(level >= BTREE_MAX_DEPTH); - if (lock_type != BTREE_NODE_UNLOCKED) + if (lock_type != BTREE_NODE_UNLOCKED) { six_unlock_type(&path->l[level].b->c.lock, lock_type); +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + if (trans->lock_name_idx < BCH_LOCK_TIME_NR) { + struct bch_fs *c = trans->c; + + __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx], + path->l[level].lock_taken_time, + ktime_get_ns()); + } +#endif + } mark_btree_node_unlocked(path, level); } -static inline void __bch2_btree_path_unlock(struct btree_path *path) +static inline void __bch2_btree_path_unlock(struct btree_trans *trans, + struct btree_path *path) { btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK); while (path->nodes_locked) - btree_node_unlock(path, __ffs(path->nodes_locked)); + btree_node_unlock(trans, path, __ffs(path->nodes_locked)); } static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type) @@ -132,7 +160,7 @@ static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type) } } -static inline bool btree_node_lock_type(struct btree_trans *trans, +static inline int btree_node_lock_type(struct btree_trans *trans, struct btree_path *path, struct btree *b, struct bpos pos, unsigned level, @@ -141,10 +169,10 @@ static inline bool btree_node_lock_type(struct btree_trans *trans, { struct bch_fs *c = trans->c; u64 start_time; - bool ret; + int ret; if (six_trylock_type(&b->c.lock, type)) - return true; + return 0; start_time = local_clock(); @@ -153,14 +181,15 @@ static inline bool btree_node_lock_type(struct btree_trans *trans, trans->locking_btree_id = path->btree_id; trans->locking_level = level; trans->locking_lock_type = type; - trans->locking = b; - ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0; + trans->locking = &b->c; + ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p); trans->locking = NULL; if (ret) - bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time); + return ret; - return ret; + bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time); + return 0; } /* @@ -183,26 +212,34 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, return false; } -bool __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, - struct btree *, struct bpos, unsigned, - enum six_lock_type, - six_lock_should_sleep_fn, void *, - unsigned long); +int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, + struct btree *, struct bpos, unsigned, + enum six_lock_type, + six_lock_should_sleep_fn, void *, + unsigned long); -static inline bool btree_node_lock(struct btree_trans *trans, +static inline int btree_node_lock(struct btree_trans *trans, struct btree_path *path, struct btree *b, struct bpos pos, unsigned level, enum six_lock_type type, six_lock_should_sleep_fn should_sleep_fn, void *p, unsigned long ip) { + int ret = 0; + EBUG_ON(level >= BTREE_MAX_DEPTH); EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); - return likely(six_trylock_type(&b->c.lock, type)) || - btree_node_lock_increment(trans, b, level, type) || - __bch2_btree_node_lock(trans, path, b, pos, level, type, - should_sleep_fn, p, ip); + if (likely(six_trylock_type(&b->c.lock, type)) || + btree_node_lock_increment(trans, b, level, type) || + !(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type, + should_sleep_fn, p, ip))) { +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + path->l[b->c.level].lock_taken_time = ktime_get_ns(); +#endif + } + + return ret; } bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned); @@ -254,6 +291,30 @@ static inline void bch2_btree_node_lock_write(struct btree_trans *trans, __bch2_btree_node_lock_write(trans, b); } -#endif /* _BCACHEFS_BTREE_LOCKING_H */ +static inline void btree_path_set_should_be_locked(struct btree_path *path) +{ + EBUG_ON(!btree_node_locked(path, path->level)); + EBUG_ON(path->uptodate); + path->should_be_locked = true; +} +static inline void __btree_path_set_level_up(struct btree_trans *trans, + struct btree_path *path, + unsigned l) +{ + btree_node_unlock(trans, path, l); + path->l[l].b = ERR_PTR(-BCH_ERR_no_btree_node_up); +} + +static inline void btree_path_set_level_up(struct btree_trans *trans, + struct btree_path *path) +{ + __btree_path_set_level_up(trans, path, path->level++); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); +} + +struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *, + struct btree_path *, struct btree *, unsigned); + +#endif /* _BCACHEFS_BTREE_LOCKING_H */ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index be12c9f..1ff9991 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -199,15 +199,13 @@ struct btree_node_iter { #define BTREE_ITER_IS_EXTENTS (1 << 4) #define BTREE_ITER_NOT_EXTENTS (1 << 5) #define BTREE_ITER_CACHED (1 << 6) -#define BTREE_ITER_CACHED_NOFILL (1 << 7) -#define BTREE_ITER_CACHED_NOCREATE (1 << 8) -#define BTREE_ITER_WITH_KEY_CACHE (1 << 9) -#define BTREE_ITER_WITH_UPDATES (1 << 10) -#define BTREE_ITER_WITH_JOURNAL (1 << 11) -#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12) -#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13) -#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14) -#define BTREE_ITER_NOPRESERVE (1 << 15) +#define BTREE_ITER_WITH_KEY_CACHE (1 << 7) +#define BTREE_ITER_WITH_UPDATES (1 << 8) +#define BTREE_ITER_WITH_JOURNAL (1 << 9) +#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 10) +#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11) +#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12) +#define BTREE_ITER_NOPRESERVE (1 << 13) enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, @@ -215,15 +213,6 @@ enum btree_path_uptodate { BTREE_ITER_NEED_TRAVERSE = 2, }; -#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1) -#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2) -#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3) -#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4) -#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5) -#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6) -#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7) -#define BTREE_ITER_NO_NODE_CACHED ((struct btree *) 8) - struct btree_path { u8 idx; u8 sorted_idx; @@ -251,6 +240,9 @@ struct btree_path { struct btree *b; struct btree_node_iter iter; u32 lock_seq; +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + u64 lock_taken_time; +#endif } l[BTREE_MAX_DEPTH]; #ifdef CONFIG_BCACHEFS_DEBUG unsigned long ip_allocated; @@ -391,7 +383,7 @@ struct btree_trans { const char *fn; struct list_head list; u64 last_begin_time; - struct btree *locking; + struct btree_bkey_cached_common *locking; unsigned locking_path_idx; struct bpos locking_pos; u8 locking_btree_id; @@ -405,9 +397,12 @@ struct btree_trans { u8 traverse_all_idx; bool used_mempool:1; bool in_traverse_all:1; - bool restarted:1; bool memory_allocation_failure:1; bool is_initial_gc:1; + enum bch_errcode restarted:16; + u32 restart_count; + unsigned long last_restarted_ip; + /* * For when bch2_trans_update notices we'll be splitting a compressed * extent: @@ -437,6 +432,7 @@ struct btree_trans { unsigned journal_u64s; unsigned journal_preres_u64s; struct replicas_delta_list *fs_usage_deltas; + int lock_name_idx; }; #define BTREE_FLAGS() \ diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index e9127db..89941fb 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -90,7 +90,6 @@ int bch2_trans_log_msg(struct btree_trans *, const char *); * This is main entry point for btree updates. * * Return values: - * -EINTR: locking changed, this function should be called again. * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ @@ -106,29 +105,33 @@ static inline int bch2_trans_commit(struct btree_trans *trans, return __bch2_trans_commit(trans); } -#define lockrestart_do(_trans, _do) \ +#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ + lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ + (_journal_seq), (_flags))) + +#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ + nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ + (_journal_seq), (_flags))) + +#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ ({ \ + struct btree_trans trans; \ int _ret; \ \ - do { \ - bch2_trans_begin(_trans); \ - _ret = (_do); \ - } while (_ret == -EINTR); \ + bch2_trans_init(&trans, (_c), 0, 0); \ + _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \ + bch2_trans_exit(&trans); \ \ _ret; \ }) -#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ - lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ - (_journal_seq), (_flags))) - -#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ +#define bch2_trans_run(_c, _do) \ ({ \ struct btree_trans trans; \ int _ret; \ \ bch2_trans_init(&trans, (_c), 0, 0); \ - _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \ + _ret = (_do); \ bch2_trans_exit(&trans); \ \ _ret; \ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index c3ef238..e413861 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1005,9 +1005,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, nr_nodes[1] += 1; if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { - trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_, - path->btree_id, &path->pos); - ret = btree_trans_restart(trans); + trace_trans_restart_iter_upgrade(trans, _RET_IP_, path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); return ERR_PTR(ret); } @@ -1016,9 +1015,10 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, else if (!down_read_trylock(&c->gc_lock)) { bch2_trans_unlock(trans); down_read(&c->gc_lock); - if (!bch2_trans_relock(trans)) { + ret = bch2_trans_relock(trans); + if (ret) { up_read(&c->gc_lock); - return ERR_PTR(-EINTR); + return ERR_PTR(ret); } } @@ -1060,8 +1060,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, journal_flags); if (ret) { bch2_btree_update_free(as); - trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_); - btree_trans_restart(trans); + trace_trans_restart_journal_preres_get(trans, _RET_IP_); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); return ERR_PTR(ret); } @@ -1076,10 +1076,9 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; - if (!bch2_trans_relock(trans)) { - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) goto err; - } return as; err: @@ -1650,7 +1649,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if (ret) goto err; - sib_path->should_be_locked = true; + btree_path_set_should_be_locked(sib_path); m = sib_path->l[level].b; @@ -1830,7 +1829,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, bch2_btree_update_done(as); out: - bch2_btree_path_downgrade(iter->path); + bch2_btree_path_downgrade(trans, iter->path); return ret; } @@ -1943,10 +1942,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BUG_ON(iter2.path->level != b->c.level); BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p)); - btree_node_unlock(iter2.path, iter2.path->level); - path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP; - iter2.path->level++; - btree_path_set_dirty(iter2.path, BTREE_ITER_NEED_TRAVERSE); + btree_path_set_level_up(trans, iter2.path); bch2_btree_path_check_sort(trans, iter2.path, 0); @@ -2017,10 +2013,8 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite int ret = 0; if (!btree_node_intent_locked(path, b->c.level) && - !bch2_btree_path_upgrade(trans, path, b->c.level + 1)) { - btree_trans_restart(trans); - return -EINTR; - } + !bch2_btree_path_upgrade(trans, path, b->c.level + 1)) + return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); closure_init_stack(&cl); @@ -2033,8 +2027,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite if (ret) { bch2_trans_unlock(trans); closure_sync(&cl); - if (!bch2_trans_relock(trans)) - return -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + return ret; } new_hash = bch2_btree_node_mem_alloc(c, false); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index aed26b5..dd832f1 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -10,6 +10,7 @@ #include "btree_locking.h" #include "buckets.h" #include "debug.h" +#include "errcode.h" #include "error.h" #include "extent_update.h" #include "journal.h" @@ -282,9 +283,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s, if (ret) return ret; - if (!bch2_trans_relock(trans)) { - trace_trans_restart_journal_preres_get(trans->fn, trace_ip); - return -EINTR; + ret = bch2_trans_relock(trans); + if (ret) { + trace_trans_restart_journal_preres_get(trans, trace_ip); + return ret; } return 0; @@ -373,15 +375,8 @@ btree_key_can_insert_cached(struct btree_trans *trans, * Keys returned by peek() are no longer valid pointers, so we need a * transaction restart: */ - trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_, - path->btree_id, &path->pos, - old_u64s, new_u64s); - /* - * Not using btree_trans_restart() because we can't unlock here, we have - * write locks held: - */ - trans->restarted = true; - return -EINTR; + trace_trans_restart_key_cache_key_realloced(trans, _RET_IP_, path, old_u64s, new_u64s); + return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced); } /* Triggers: */ @@ -572,9 +567,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, int ret; if (race_fault()) { - trace_trans_restart_fault_inject(trans->fn, trace_ip); - trans->restarted = true; - return -EINTR; + trace_trans_restart_fault_inject(trans, trace_ip); + return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); } /* @@ -726,8 +720,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, btree_insert_key_leaf(trans, i); else if (!i->key_cache_already_flushed) bch2_btree_insert_key_cached(trans, i->path, i->k); - else + else { bch2_btree_key_cache_drop(trans, i->path); + btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE); + } } return ret; @@ -806,6 +802,7 @@ static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct static inline int trans_lock_write(struct btree_trans *trans) { struct btree_insert_entry *i; + int ret; trans_for_each_update(trans, i) { if (same_leaf_as_prev(trans, i)) @@ -815,10 +812,11 @@ static inline int trans_lock_write(struct btree_trans *trans) if (have_conflicting_read_lock(trans, i->path)) goto fail; - btree_node_lock_type(trans, i->path, + ret = btree_node_lock_type(trans, i->path, insert_l(i)->b, i->path->pos, i->level, SIX_LOCK_write, NULL, NULL); + BUG_ON(ret); } bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); @@ -833,8 +831,8 @@ fail: bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b); } - trace_trans_restart_would_deadlock_write(trans->fn); - return btree_trans_restart(trans); + trace_trans_restart_would_deadlock_write(trans); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write); } static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) @@ -965,12 +963,8 @@ int bch2_trans_commit_error(struct btree_trans *trans, switch (ret) { case BTREE_INSERT_BTREE_NODE_FULL: ret = bch2_btree_split_leaf(trans, i->path, trans->flags); - if (!ret) - return 0; - - if (ret == -EINTR) - trace_trans_restart_btree_node_split(trans->fn, trace_ip, - i->btree_id, &i->path->pos); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + trace_trans_restart_btree_node_split(trans, trace_ip, i->path); break; case BTREE_INSERT_NEED_MARK_REPLICAS: bch2_trans_unlock(trans); @@ -979,19 +973,16 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret) break; - if (bch2_trans_relock(trans)) - return 0; - - trace_trans_restart_mark_replicas(trans->fn, trace_ip); - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + trace_trans_restart_mark_replicas(trans, trace_ip); break; case BTREE_INSERT_NEED_JOURNAL_RES: bch2_trans_unlock(trans); if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && !(trans->flags & JOURNAL_WATERMARK_reserved)) { - trans->restarted = true; - ret = -EAGAIN; + ret = -BCH_ERR_journal_reclaim_would_deadlock; break; } @@ -999,34 +990,30 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret) break; - if (bch2_trans_relock(trans)) - return 0; - - trace_trans_restart_journal_res_get(trans->fn, trace_ip); - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + trace_trans_restart_journal_res_get(trans, trace_ip); break; case BTREE_INSERT_NEED_JOURNAL_RECLAIM: bch2_trans_unlock(trans); - trace_trans_blocked_journal_reclaim(trans->fn, trace_ip); + trace_trans_blocked_journal_reclaim(trans, trace_ip); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); if (ret < 0) break; - if (bch2_trans_relock(trans)) - return 0; - - trace_trans_restart_journal_reclaim(trans->fn, trace_ip); - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + trace_trans_restart_journal_reclaim(trans, trace_ip); break; default: BUG_ON(ret >= 0); break; } - BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); BUG_ON(ret == -ENOSPC && !(trans->flags & BTREE_INSERT_NOWAIT) && (trans->flags & BTREE_INSERT_NOFAIL)); @@ -1046,13 +1033,11 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) bch2_trans_unlock(trans); - ret = bch2_fs_read_write_early(c); + ret = bch2_fs_read_write_early(c) ?: + bch2_trans_relock(trans); if (ret) return ret; - if (!bch2_trans_relock(trans)) - return -EINTR; - percpu_ref_get(&c->writes); return 0; } @@ -1122,9 +1107,8 @@ int __bch2_trans_commit(struct btree_trans *trans) BUG_ON(!i->path->should_be_locked); if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { - trace_trans_restart_upgrade(trans->fn, _RET_IP_, - i->btree_id, &i->path->pos); - ret = btree_trans_restart(trans); + trace_trans_restart_upgrade(trans, _RET_IP_, i->path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); goto out; } @@ -1164,7 +1148,7 @@ retry: if (ret) goto err; - trace_transaction_commit(trans->fn, _RET_IP_); + trace_transaction_commit(trans, _RET_IP_); out: bch2_journal_preres_put(&c->journal, &trans->journal_preres); @@ -1567,7 +1551,7 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa if (ret) goto err; - btree_path->should_be_locked = true; + btree_path_set_should_be_locked(btree_path); ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip); err: bch2_path_put(trans, btree_path, true); @@ -1633,12 +1617,11 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter ck = (void *) iter->key_cache_path->l[0].b; if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { - trace_trans_restart_key_cache_raced(trans->fn, _RET_IP_); - btree_trans_restart(trans); - return -EINTR; + trace_trans_restart_key_cache_raced(trans, _RET_IP_); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); } - iter->key_cache_path->should_be_locked = true; + btree_path_set_should_be_locked(iter->key_cache_path); } path = iter->key_cache_path; @@ -1763,7 +1746,7 @@ retry: break; } - if (ret == -EINTR) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; goto retry; } @@ -1782,9 +1765,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, unsigned update_flags, u64 *journal_seq) { - return bch2_trans_do(c, NULL, journal_seq, 0, - bch2_btree_delete_range_trans(&trans, id, start, end, - update_flags, journal_seq)); + return bch2_trans_run(c, + bch2_btree_delete_range_trans(&trans, id, start, end, update_flags, journal_seq)); } int bch2_trans_log_msg(struct btree_trans *trans, const char *msg) diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index fe2cd73..b4be212 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -544,22 +544,6 @@ int bch2_mark_alloc(struct btree_trans *trans, } } - if (new_a.data_type == BCH_DATA_free && - (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) - closure_wake_up(&c->freelist_wait); - - if (new_a.data_type == BCH_DATA_need_discard && - (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) - bch2_do_discards(c); - - if (old_a.data_type != BCH_DATA_cached && - new_a.data_type == BCH_DATA_cached && - should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) - bch2_do_invalidates(c); - - if (new_a.data_type == BCH_DATA_need_gc_gens) - bch2_do_gc_gens(c); - percpu_down_read(&c->mark_lock); if (!gc && new_a.gen != old_a.gen) *bucket_gen(ca, new.k->p.offset) = new_a.gen; @@ -599,6 +583,22 @@ int bch2_mark_alloc(struct btree_trans *trans, } } + if (new_a.data_type == BCH_DATA_free && + (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) + closure_wake_up(&c->freelist_wait); + + if (new_a.data_type == BCH_DATA_need_discard && + (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) + bch2_do_discards(c); + + if (old_a.data_type != BCH_DATA_cached && + new_a.data_type == BCH_DATA_cached && + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) + bch2_do_invalidates(c); + + if (new_a.data_type == BCH_DATA_need_gc_gens) + bch2_do_gc_gens(c); + return 0; } @@ -1939,8 +1939,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) { - return bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, - __bch2_trans_mark_dev_sb(&trans, ca)); + return bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca)); } /* Disk reservations: */ diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 7c2af67..b5850a7 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "checksum.h" +#include "errcode.h" #include "super.h" #include "super-io.h" @@ -527,7 +528,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c, ret = bch2_request_key(c->disk_sb.sb, &user_key); if (ret) { - bch_err(c, "error requesting encryption key: %i", ret); + bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret)); goto err; } @@ -552,20 +553,24 @@ err: static int bch2_alloc_ciphers(struct bch_fs *c) { + int ret; + if (!c->chacha20) c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); - if (IS_ERR(c->chacha20)) { - bch_err(c, "error requesting chacha20 module: %li", - PTR_ERR(c->chacha20)); - return PTR_ERR(c->chacha20); + ret = PTR_ERR_OR_ZERO(c->chacha20); + + if (ret) { + bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret)); + return ret; } if (!c->poly1305) c->poly1305 = crypto_alloc_shash("poly1305", 0, 0); - if (IS_ERR(c->poly1305)) { - bch_err(c, "error requesting poly1305 module: %li", - PTR_ERR(c->poly1305)); - return PTR_ERR(c->poly1305); + ret = PTR_ERR_OR_ZERO(c->poly1305); + + if (ret) { + bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret)); + return ret; } return 0; @@ -626,7 +631,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) if (keyed) { ret = bch2_request_key(c->disk_sb.sb, &user_key); if (ret) { - bch_err(c, "error requesting encryption key: %i", ret); + bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret)); goto err; } @@ -678,9 +683,9 @@ int bch2_fs_encryption_init(struct bch_fs *c) pr_verbose_init(c->opts, ""); c->sha256 = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(c->sha256)) { - bch_err(c, "error requesting sha256 module"); - ret = PTR_ERR(c->sha256); + ret = PTR_ERR_OR_ZERO(c->sha256); + if (ret) { + bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret)); goto out; } diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index c181dba..3b442b0 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -236,7 +236,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op) bch2_ob_add_backpointer(c, ec_ob, &insert->k); } err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; if (ret) break; @@ -272,7 +272,7 @@ out: bch2_trans_exit(&trans); bch2_bkey_buf_exit(&_insert, c); bch2_bkey_buf_exit(&_new, c); - BUG_ON(ret == -EINTR); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); return ret; } diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 05cae0e..cd37a10 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -189,6 +189,7 @@ struct dump_iter { struct bch_fs *c; enum btree_id id; struct bpos from; + struct bpos prev_node; u64 iter; struct printbuf buf; @@ -258,39 +259,30 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - return i->ret; - bch2_trans_init(&trans, i->c, 0, 0); - bch2_trans_iter_init(&trans, &iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(&iter); - - while (k.k && !(err = bkey_err(k))) { - bch2_bkey_val_to_text(&i->buf, i->c, k); - prt_char(&i->buf, '\n'); - - k = bch2_btree_iter_next(&iter); - i->from = iter.pos; - + err = for_each_btree_key2(&trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ err = flush_buf(i); if (err) break; if (!i->size) break; - } - bch2_trans_iter_exit(&trans, &iter); + + bch2_bkey_val_to_text(&i->buf, i->c, k); + prt_newline(&i->buf); + 0; + })); + i->from = iter.pos; + + if (!err) + err = flush_buf(i); bch2_trans_exit(&trans); - return err < 0 ? err : i->ret; + return err ?: i->ret; } static const struct file_operations btree_debug_ops = { @@ -360,7 +352,6 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct btree *prev_node = NULL; int err; i->ubuf = buf; @@ -376,44 +367,36 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - bch2_trans_iter_init(&trans, &iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - - while ((k = bch2_btree_iter_peek(&iter)).k && - !(err = bkey_err(k))) { + err = for_each_btree_key2(&trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct btree_path_level *l = &iter.path->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); - if (l->b != prev_node) { - bch2_btree_node_to_text(&i->buf, i->c, l->b); - err = flush_buf(i); - if (err) - break; - } - prev_node = l->b; - - bch2_bfloat_to_text(&i->buf, l->b, _k); - err = flush_buf(i); - if (err) - break; - - bch2_btree_iter_advance(&iter); - i->from = iter.pos; - err = flush_buf(i); if (err) break; if (!i->size) break; - } - bch2_trans_iter_exit(&trans, &iter); + + if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) { + bch2_btree_node_to_text(&i->buf, i->c, l->b); + i->prev_node = l->b->key.k.p; + } + + bch2_bfloat_to_text(&i->buf, l->b, _k); + 0; + })); + i->from = iter.pos; + + if (!err) + err = flush_buf(i); bch2_trans_exit(&trans); - return err < 0 ? err : i->ret; + return err ?: i->ret; } static const struct file_operations bfloat_failed_debug_ops = { @@ -636,6 +619,75 @@ static const struct file_operations journal_pins_ops = { .read = bch2_journal_pins_read, }; +static int lock_held_stats_open(struct inode *inode, struct file *file) +{ + struct bch_fs *c = inode->i_private; + struct dump_iter *i; + + i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); + + if (!i) + return -ENOMEM; + + i->iter = 0; + i->c = c; + i->buf = PRINTBUF; + file->private_data = i; + + return 0; +} + +static int lock_held_stats_release(struct inode *inode, struct file *file) +{ + struct dump_iter *i = file->private_data; + + printbuf_exit(&i->buf); + kfree(i); + + return 0; +} + +static ssize_t lock_held_stats_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct lock_held_stats *lhs = &i->c->lock_held_stats; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) { + err = flush_buf(i); + if (err) + return err; + + if (!i->size) + break; + + prt_printf(&i->buf, "%s:", lhs->names[i->iter]); + prt_newline(&i->buf); + printbuf_indent_add(&i->buf, 8); + bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]); + printbuf_indent_sub(&i->buf, 8); + prt_newline(&i->buf); + i->iter++; + } + + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; +} + +static const struct file_operations lock_held_stats_op = { + .owner = THIS_MODULE, + .open = lock_held_stats_open, + .release = lock_held_stats_release, + .read = lock_held_stats_read, +}; + void bch2_fs_debug_exit(struct bch_fs *c) { if (!IS_ERR_OR_NULL(c->fs_debug_dir)) @@ -664,6 +716,11 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, c->btree_debug, &journal_pins_ops); + if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { + debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir, + c, &lock_held_stats_op); + } + c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); if (IS_ERR_OR_NULL(c->btree_debug_dir)) return; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 0cbb765..4d942d2 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -471,7 +471,7 @@ retry: ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info, name, inum, 0); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (!ret) bch2_trans_iter_exit(&trans, &iter); @@ -556,7 +556,7 @@ retry: } bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 6ce352c..f33acf1 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -572,18 +572,14 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans, struct btree_iter *iter) { size_t idx = iter->pos.offset; - int ret = 0; if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_NOWAIT|__GFP_NOWARN)) - return ret; + return 0; bch2_trans_unlock(trans); - ret = -EINTR; - if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL)) - return ret; - - return -ENOMEM; + return __ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL) ?: + bch2_trans_relock(trans); } static ssize_t stripe_idx_to_delete(struct bch_fs *c) @@ -726,7 +722,7 @@ static int ec_stripe_bkey_insert(struct btree_trans *trans, struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); int ret; - for_each_btree_key(trans, iter, BTREE_ID_stripes, start_pos, + for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) { if (start_pos.offset) { @@ -740,12 +736,13 @@ static int ec_stripe_bkey_insert(struct btree_trans *trans, } if (bkey_deleted(k.k)) - goto found_slot; + break; } - goto err; -found_slot: - start_pos = iter.pos; + c->ec_stripe_hint = iter.pos.offset; + + if (ret) + goto err; ret = ec_stripe_mem_alloc(trans, &iter); if (ret) @@ -754,8 +751,6 @@ found_slot: stripe->k.p = iter.pos; ret = bch2_trans_update(trans, &iter, &stripe->k_i, 0); - - c->ec_stripe_hint = start_pos.offset; err: bch2_trans_iter_exit(trans, &iter); @@ -822,80 +817,62 @@ static void extent_stripe_ptr_add(struct bkey_s_extent e, }; } -static int ec_stripe_update_ptrs(struct bch_fs *c, - struct ec_stripe_buf *s, - struct bkey *pos) +static int ec_stripe_update_extent(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + struct ec_stripe_buf *s, + struct bpos end) { - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_s_extent e; - struct bkey_buf sk; - struct bpos next_pos; - int ret = 0, dev, block; - - bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - - /* XXX this doesn't support the reflink btree */ - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, - bkey_start_pos(pos), - BTREE_ITER_INTENT); -retry: - while (bch2_trans_begin(&trans), - (k = bch2_btree_iter_peek(&iter)).k && - !(ret = bkey_err(k)) && - bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) { - const struct bch_extent_ptr *ptr_c; - struct bch_extent_ptr *ptr, *ec_ptr = NULL; - - if (extent_has_stripe_ptr(k, s->key.k.p.offset)) { - bch2_btree_iter_advance(&iter); - continue; - } + const struct bch_extent_ptr *ptr_c; + struct bch_extent_ptr *ptr, *ec_ptr = NULL; + struct bkey_i *n; + int ret, dev, block; - ptr_c = bkey_matches_stripe(&s->key.v, k, &block); - /* - * It doesn't generally make sense to erasure code cached ptrs: - * XXX: should we be incrementing a counter? - */ - if (!ptr_c || ptr_c->cached) { - bch2_btree_iter_advance(&iter); - continue; - } + if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) + return 1; - dev = s->key.v.ptrs[block].dev; + if (extent_has_stripe_ptr(k, s->key.k.p.offset)) + return 0; - bch2_bkey_buf_reassemble(&sk, c, k); - e = bkey_i_to_s_extent(sk.k); + ptr_c = bkey_matches_stripe(&s->key.v, k, &block); + /* + * It doesn't generally make sense to erasure code cached ptrs: + * XXX: should we be incrementing a counter? + */ + if (!ptr_c || ptr_c->cached) + return 0; - bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev); - ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev); - BUG_ON(!ec_ptr); + dev = s->key.v.ptrs[block].dev; - extent_stripe_ptr_add(e, s, ec_ptr, block); + n = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; - bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k)); - next_pos = sk.k->k.p; + bkey_reassemble(n, k); - ret = bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(&trans, &iter, sk.k, 0) ?: - bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL); - if (!ret) - bch2_btree_iter_set_pos(&iter, next_pos); - if (ret) - break; - } - if (ret == -EINTR) - goto retry; - bch2_trans_iter_exit(&trans, &iter); + bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev); + ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev); + BUG_ON(!ec_ptr); - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); + extent_stripe_ptr_add(bkey_i_to_s_extent(n), s, ec_ptr, block); - return ret; + return bch2_trans_update(trans, iter, n, 0); +} + +static int ec_stripe_update_extents(struct bch_fs *c, + struct ec_stripe_buf *s, + struct bkey *pos) +{ + struct btree_iter iter; + struct bkey_s_c k; + + return bch2_trans_run(c, + for_each_btree_key_commit(&trans, iter, + BTREE_ID_extents, bkey_start_pos(pos), + BTREE_ITER_NOT_EXTENTS|BTREE_ITER_INTENT, k, + NULL, NULL, BTREE_INSERT_NOFAIL, + ec_stripe_update_extent(&trans, &iter, k, s, pos->p))); } /* @@ -966,9 +943,10 @@ static void ec_stripe_create(struct ec_stripe_new *s) } for_each_keylist_key(&s->keys, k) { - ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k); + ret = ec_stripe_update_extents(c, &s->new_stripe, &k->k); if (ret) { - bch_err(c, "error creating stripe: error %i updating pointers", ret); + bch_err(c, "error creating stripe: error updating pointers: %s", + bch2_err_str(ret)); break; } } diff --git a/libbcachefs/errcode.c b/libbcachefs/errcode.c new file mode 100644 index 0000000..9da8a59 --- /dev/null +++ b/libbcachefs/errcode.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "errcode.h" + +#include + +static const char * const bch2_errcode_strs[] = { +#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = #err, + BCH_ERRCODES() +#undef x + NULL +}; + +#define BCH_ERR_0 0 + +static unsigned bch2_errcode_parents[] = { +#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = BCH_ERR_##class, + BCH_ERRCODES() +#undef x +}; + +const char *bch2_err_str(int err) +{ + const char *errstr; + err = abs(err); + + BUG_ON(err >= BCH_ERR_MAX); + + if (err >= BCH_ERR_START) + errstr = bch2_errcode_strs[err - BCH_ERR_START]; + else if (err) + errstr = errname(err); + else + errstr = "(No error)"; + return errstr ?: "(Invalid error)"; +} + +bool __bch2_err_matches(int err, int class) +{ + err = abs(err); + class = abs(class); + + BUG_ON(err >= BCH_ERR_MAX); + BUG_ON(class >= BCH_ERR_MAX); + + while (err >= BCH_ERR_START && err != class) + err = bch2_errcode_parents[err - BCH_ERR_START]; + + return err == class; +} diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 0581f3c..15a1be2 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -2,12 +2,73 @@ #ifndef _BCACHEFS_ERRCODE_H #define _BCACHEFS_ERRCODE_H -enum { - /* Bucket allocator: */ - OPEN_BUCKETS_EMPTY = 2048, - FREELIST_EMPTY, /* Allocator thread not keeping up */ - INSUFFICIENT_DEVICES, - NEED_SNAPSHOT_CLEANUP, +#define BCH_ERRCODES() \ + x(0, open_buckets_empty) \ + x(0, freelist_empty) \ + x(freelist_empty, no_buckets_found) \ + x(0, insufficient_devices) \ + x(0, transaction_restart) \ + x(transaction_restart, transaction_restart_fault_inject) \ + x(transaction_restart, transaction_restart_relock) \ + x(transaction_restart, transaction_restart_relock_path) \ + x(transaction_restart, transaction_restart_relock_path_intent) \ + x(transaction_restart, transaction_restart_relock_after_fill) \ + x(transaction_restart, transaction_restart_too_many_iters) \ + x(transaction_restart, transaction_restart_lock_node_reused) \ + x(transaction_restart, transaction_restart_fill_relock) \ + x(transaction_restart, transaction_restart_fill_mem_alloc_fail)\ + x(transaction_restart, transaction_restart_mem_realloced) \ + x(transaction_restart, transaction_restart_in_traverse_all) \ + x(transaction_restart, transaction_restart_would_deadlock) \ + x(transaction_restart, transaction_restart_would_deadlock_write)\ + x(transaction_restart, transaction_restart_upgrade) \ + x(transaction_restart, transaction_restart_key_cache_upgrade) \ + x(transaction_restart, transaction_restart_key_cache_fill) \ + x(transaction_restart, transaction_restart_key_cache_raced) \ + x(transaction_restart, transaction_restart_key_cache_realloced)\ + x(transaction_restart, transaction_restart_journal_preres_get) \ + x(transaction_restart, transaction_restart_nested) \ + x(0, no_btree_node) \ + x(no_btree_node, no_btree_node_relock) \ + x(no_btree_node, no_btree_node_upgrade) \ + x(no_btree_node, no_btree_node_drop) \ + x(no_btree_node, no_btree_node_lock_root) \ + x(no_btree_node, no_btree_node_up) \ + x(no_btree_node, no_btree_node_down) \ + x(no_btree_node, no_btree_node_init) \ + x(no_btree_node, no_btree_node_cached) \ + x(0, lock_fail_node_reused) \ + x(0, lock_fail_root_changed) \ + x(0, journal_reclaim_would_deadlock) \ + x(0, fsck) \ + x(fsck, fsck_fix) \ + x(fsck, fsck_ignore) \ + x(fsck, fsck_errors_not_fixed) \ + x(fsck, fsck_repair_unimplemented) \ + x(fsck, fsck_repair_impossible) \ + x(0, need_snapshot_cleanup) \ + x(0, need_topology_repair) + +enum bch_errcode { + BCH_ERR_START = 2048, +#define x(class, err) BCH_ERR_##err, + BCH_ERRCODES() +#undef x + BCH_ERR_MAX }; +const char *bch2_err_str(int); +bool __bch2_err_matches(int, int); + +static inline bool _bch2_err_matches(int err, int class) +{ + return err && __bch2_err_matches(err, class); +} + +#define bch2_err_matches(_err, _class) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(_class)); \ + _bch2_err_matches(_err, _class); \ +}) + #endif /* _BCACHFES_ERRCODE_H */ diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 8279a9b..f6a895b 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -68,8 +68,7 @@ void bch2_io_error(struct bch_dev *ca) #include "tools-util.h" #endif -enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags, - const char *fmt, ...) +int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) { struct fsck_err_state *s = NULL; va_list args; @@ -83,10 +82,10 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags, if (c->opts.errors == BCH_ON_ERROR_continue) { bch_err(c, "fixing"); - return FSCK_ERR_FIX; + return -BCH_ERR_fsck_fix; } else { bch2_inconsistent_error(c); - return FSCK_ERR_EXIT; + return -BCH_ERR_fsck_errors_not_fixed; } } @@ -156,14 +155,14 @@ print: if (fix) { set_bit(BCH_FS_ERRORS_FIXED, &c->flags); - return FSCK_ERR_FIX; + return -BCH_ERR_fsck_fix; } else { set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags); set_bit(BCH_FS_ERROR, &c->flags); return c->opts.fix_errors == FSCK_OPT_EXIT || !(flags & FSCK_CAN_IGNORE) - ? FSCK_ERR_EXIT - : FSCK_ERR_IGNORE; + ? -BCH_ERR_fsck_errors_not_fixed + : -BCH_ERR_fsck_ignore; } } diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 6e63c38..b603d73 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -91,14 +91,6 @@ do { \ * be able to repair: */ -enum { - BCH_FSCK_OK = 0, - BCH_FSCK_ERRORS_NOT_FIXED = 1, - BCH_FSCK_REPAIR_UNIMPLEMENTED = 2, - BCH_FSCK_REPAIR_IMPOSSIBLE = 3, - BCH_FSCK_UNKNOWN_VERSION = 4, -}; - enum fsck_err_opts { FSCK_OPT_EXIT, FSCK_OPT_YES, @@ -106,13 +98,6 @@ enum fsck_err_opts { FSCK_OPT_ASK, }; -enum fsck_err_ret { - FSCK_ERR_IGNORE = 0, - FSCK_ERR_FIX = 1, - FSCK_ERR_EXIT = 2, - FSCK_ERR_START_TOPOLOGY_REPAIR = 3, -}; - struct fsck_err_state { struct list_head list; const char *fmt; @@ -127,21 +112,21 @@ struct fsck_err_state { #define FSCK_NO_RATELIMIT (1 << 3) __printf(3, 4) __cold -enum fsck_err_ret bch2_fsck_err(struct bch_fs *, - unsigned, const char *, ...); +int bch2_fsck_err(struct bch_fs *, unsigned, const char *, ...); void bch2_flush_fsck_errs(struct bch_fs *); #define __fsck_err(c, _flags, msg, ...) \ ({ \ - int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\ + int _ret = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__); \ \ - if (_fix == FSCK_ERR_EXIT) { \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) { \ bch_err(c, "Unable to continue, halting"); \ - ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + ret = _ret; \ goto fsck_err; \ } \ \ - _fix; \ + _ret == -BCH_ERR_fsck_fix; \ }) /* These macros return true if error should be fixed: */ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index bcfd9e5..0a7f172 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -409,7 +409,7 @@ retry: offset = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -850,13 +850,13 @@ void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) bch2_clear_page_bits(&folio->page); } -int bch2_releasepage(struct page *page, gfp_t gfp_mask) +bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) { - if (PageDirty(page)) - return 0; + if (folio_test_dirty(folio) || folio_test_writeback(folio)) + return false; - bch2_clear_page_bits(page); - return 1; + bch2_clear_page_bits(&folio->page); + return true; } #ifdef CONFIG_MIGRATION @@ -1045,10 +1045,9 @@ retry: * read_extent -> io_time_reset may cause a transaction restart * without returning an error, we need to check for that here: */ - if (!bch2_trans_relock(trans)) { - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) break; - } bch2_btree_iter_set_pos(&iter, POS(inum.inum, rbio->bio.bi_iter.bi_sector)); @@ -1101,7 +1100,7 @@ retry: err: bch2_trans_iter_exit(trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (ret) { @@ -1175,20 +1174,6 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, bch2_trans_exit(&trans); } -int bch2_readpage(struct file *file, struct page *page) -{ - struct bch_inode_info *inode = to_bch_ei(page->mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_io_opts opts = io_opts(c, &inode->ei_inode); - struct bch_read_bio *rbio; - - rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &c->bio_read), opts); - rbio->bio.bi_end_io = bch2_readpages_end_io; - - __bchfs_readpage(c, rbio, inode_inum(inode), page); - return 0; -} - static void bch2_read_single_page_end_io(struct bio *bio) { complete(bio->bi_private); @@ -1221,6 +1206,16 @@ static int bch2_read_single_page(struct page *page, return 0; } +int bch2_read_folio(struct file *file, struct folio *folio) +{ + struct page *page = &folio->page; + int ret; + + ret = bch2_read_single_page(page, page->mapping); + folio_unlock(folio); + return ret; +} + /* writepages: */ struct bch_writepage_state { @@ -1512,7 +1507,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc /* buffered writes: */ int bch2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, + loff_t pos, unsigned len, struct page **pagep, void **fsdata) { struct bch_inode_info *inode = to_bch_ei(mapping->host); @@ -1532,7 +1527,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, bch2_pagecache_add_get(&inode->ei_pagecache_lock); - page = grab_cache_page_write_begin(mapping, index, flags); + page = grab_cache_page_write_begin(mapping, index); if (!page) goto err_unlock; @@ -1663,7 +1658,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, bch2_page_reservation_init(c, inode, &res); for (i = 0; i < nr_pages; i++) { - pages[i] = grab_cache_page_write_begin(mapping, index + i, 0); + pages[i] = grab_cache_page_write_begin(mapping, index + i); if (!pages[i]) { nr_pages = i; if (!i) { @@ -2073,7 +2068,7 @@ retry: offset = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (err == -EINTR) + if (bch2_err_matches(err, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -2449,7 +2444,7 @@ retry: start = iter.pos; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -2839,7 +2834,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bch2_trans_copy_iter(&dst, &src); bch2_trans_copy_iter(&del, &src); - while (ret == 0 || ret == -EINTR) { + while (ret == 0 || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; @@ -3041,7 +3037,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, bkey_err: bch2_quota_reservation_put(c, inode, "a_res); bch2_disk_reservation_put(c, &disk_res); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; } @@ -3321,7 +3317,7 @@ retry: } bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -3436,7 +3432,7 @@ retry: } bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h index 7f2d7f4..a22a4e9 100644 --- a/libbcachefs/fs-io.h +++ b/libbcachefs/fs-io.h @@ -15,13 +15,13 @@ int __must_check bch2_write_inode_size(struct bch_fs *, struct bch_inode_info *, loff_t, unsigned); -int bch2_readpage(struct file *, struct page *); +int bch2_read_folio(struct file *, struct folio *); int bch2_writepages(struct address_space *, struct writeback_control *); void bch2_readahead(struct readahead_control *); int bch2_write_begin(struct file *, struct address_space *, loff_t, - unsigned, unsigned, struct page **, void **); + unsigned, struct page **, void **); int bch2_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); @@ -42,7 +42,7 @@ loff_t bch2_llseek(struct file *, loff_t, int); vm_fault_t bch2_page_fault(struct vm_fault *); vm_fault_t bch2_page_mkwrite(struct vm_fault *); void bch2_invalidate_folio(struct folio *, size_t, size_t); -int bch2_releasepage(struct page *, gfp_t); +bool bch2_release_folio(struct folio *, gfp_t); int bch2_migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 2354c98..3e2b609 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -8,6 +8,7 @@ #include "buckets.h" #include "chardev.h" #include "dirent.h" +#include "errcode.h" #include "extents.h" #include "fs.h" #include "fs-common.h" @@ -153,7 +154,7 @@ retry: bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -323,7 +324,7 @@ retry: bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); err_before_quota: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; goto err_trans; } @@ -754,7 +755,7 @@ retry: btree_err: bch2_trans_iter_exit(&trans, &inode_iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) goto err_trans; @@ -985,7 +986,7 @@ retry: start = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (!ret && have_extent) @@ -1112,14 +1113,14 @@ static const struct inode_operations bch_special_inode_operations = { }; static const struct address_space_operations bch_address_space_operations = { - .readpage = bch2_readpage, + .read_folio = bch2_read_folio, .writepages = bch2_writepages, .readahead = bch2_readahead, .dirty_folio = filemap_dirty_folio, .write_begin = bch2_write_begin, .write_end = bch2_write_end, .invalidate_folio = bch2_invalidate_folio, - .releasepage = bch2_releasepage, + .release_folio = bch2_release_folio, .direct_IO = noop_direct_IO, #ifdef CONFIG_MIGRATION .migratepage = bch2_migrate_page, @@ -1335,7 +1336,7 @@ found: memcpy(name, d.v->d_name, name_len); name[name_len] = '\0'; err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter1); @@ -1870,10 +1871,9 @@ got_sb: sb->s_shrink.seeks = 0; vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); - if (IS_ERR(vinode)) { - bch_err(c, "error mounting: error getting root inode %i", - (int) PTR_ERR(vinode)); - ret = PTR_ERR(vinode); + ret = PTR_ERR_OR_ZERO(vinode); + if (ret) { + bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret)); goto err_put_super; } diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 6165878..c93e177 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -136,9 +136,9 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, ret = bch2_inode_unpack(k, inode); err: - if (ret && ret != -EINTR) - bch_err(trans->c, "error %i fetching inode %llu", - ret, inode_nr); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(trans->c, "error fetching inode %llu: %s", + inode_nr, bch2_err_str(ret)); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -164,9 +164,9 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, if (!ret) *snapshot = iter.pos.snapshot; err: - if (ret && ret != -EINTR) - bch_err(trans->c, "error %i fetching inode %llu:%u", - ret, inode_nr, *snapshot); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(trans->c, "error fetching inode %llu:%u: %s", + inode_nr, *snapshot, bch2_err_str(ret)); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -225,7 +225,8 @@ static int write_inode(struct btree_trans *trans, BTREE_INSERT_LAZY_RW, __write_inode(trans, inode, snapshot)); if (ret) - bch_err(trans->c, "error in fsck: error %i updating inode", ret); + bch_err(trans->c, "error in fsck: error updating inode: %s", + bch2_err_str(ret)); return ret; } @@ -286,7 +287,7 @@ retry: BTREE_INSERT_NOFAIL); err: bch2_trans_iter_exit(trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; return ret; @@ -313,8 +314,8 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); bch2_trans_iter_exit(trans, &iter); err: - if (ret && ret != -EINTR) - bch_err(c, "error %i from __remove_dirent()", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error from __remove_dirent(): %s", bch2_err_str(ret)); return ret; } @@ -349,8 +350,8 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, goto create_lostfound; } - if (ret && ret != -EINTR) - bch_err(c, "error looking up lost+found: %i", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret)); if (ret) return ret; @@ -372,8 +373,8 @@ create_lostfound: lostfound, &lostfound_str, 0, 0, S_IFDIR|0700, 0, NULL, NULL, (subvol_inum) { }, 0); - if (ret && ret != -EINTR) - bch_err(c, "error creating lost+found: %i", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error creating lost+found: %s", bch2_err_str(ret)); return ret; } @@ -437,8 +438,8 @@ static int reattach_inode(struct btree_trans *trans, BTREE_INSERT_NOFAIL, __reattach_inode(trans, inode, inode_snapshot)); if (ret) { - bch_err(trans->c, "error %i reattaching inode %llu", - ret, inode->bi_inum); + bch_err(trans->c, "error reattaching inode %llu: %s", + inode->bi_inum, bch2_err_str(ret)); return ret; } @@ -518,7 +519,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, .id = pos.snapshot, .equiv = bch2_snapshot_equiv(c, pos.snapshot), }; - int ret; + int ret = 0; if (bkey_cmp(s->pos, pos)) s->ids.nr = 0; @@ -528,14 +529,13 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, darray_for_each(s->ids, i) if (i->equiv == n.equiv) { - if (i->id != n.id) { - bch_err(c, "snapshot deletion did not run correctly:\n" + if (fsck_err_on(i->id != n.id, c, + "snapshot deletion did not run correctly:\n" " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", bch2_btree_ids[btree_id], pos.inode, pos.offset, - i->id, n.id, n.equiv); - return -NEED_SNAPSHOT_CLEANUP; - } + i->id, n.id, n.equiv)) + return -BCH_ERR_need_snapshot_cleanup; return 0; } @@ -544,6 +544,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, if (ret) bch_err(c, "error reallocating snapshots_seen table (size %zu)", s->ids.size); +fsck_err: return ret; } @@ -649,6 +650,7 @@ static int __walk_inode(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; + u32 restart_count = trans->restart_count; unsigned i; int ret; @@ -676,6 +678,10 @@ static int __walk_inode(struct btree_trans *trans, w->cur_inum = pos.inode; w->first_this_inode = true; + + if (trans_was_restarted(trans, restart_count)) + return -BCH_ERR_transaction_restart_nested; + lookup_snapshot: for (i = 0; i < w->inodes.nr; i++) if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->inodes.data[i].snapshot)) @@ -837,15 +843,14 @@ bad_hash: "hashed to %llu\n%s", bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash, (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE) - return 0; - - ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); - if (ret) { - bch_err(c, "hash_redo_key err %i", ret); - return ret; + bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { + ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); + if (ret) { + bch_err(c, "hash_redo_key err %s", bch2_err_str(ret)); + return ret; + } + ret = -BCH_ERR_transaction_restart_nested; } - ret = -EINTR; fsck_err: goto out; } @@ -910,7 +915,8 @@ static int check_inode(struct btree_trans *trans, ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot); if (ret) - bch_err(c, "error in fsck: error %i while deleting inode", ret); + bch_err(c, "error in fsck: error while deleting inode: %s", + bch2_err_str(ret)); return ret; } @@ -933,7 +939,8 @@ static int check_inode(struct btree_trans *trans, POS(u.bi_inum, U64_MAX), 0, NULL); if (ret) { - bch_err(c, "error in fsck: error %i truncating inode", ret); + bch_err(c, "error in fsck: error truncating inode: %s", + bch2_err_str(ret)); return ret; } @@ -958,8 +965,8 @@ static int check_inode(struct btree_trans *trans, sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); if (sectors < 0) { - bch_err(c, "error in fsck: error %i recounting inode sectors", - (int) sectors); + bch_err(c, "error in fsck: error recounting inode sectors: %s", + bch2_err_str(sectors)); return sectors; } @@ -978,13 +985,13 @@ static int check_inode(struct btree_trans *trans, if (do_update) { ret = __write_inode(trans, &u, iter->pos.snapshot); if (ret) - bch_err(c, "error in fsck: error %i " - "updating inode", ret); + bch_err(c, "error in fsck: error updating inode: %s", + bch2_err_str(ret)); } err: fsck_err: if (ret) - bch_err(c, "error %i from check_inode()", ret); + bch_err(c, "error from check_inode(): %s", bch2_err_str(ret)); return ret; } @@ -1003,16 +1010,14 @@ static int check_inodes(struct bch_fs *c, bool full) ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, - k, - NULL, NULL, - BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, check_inode(&trans, &iter, k, &prev, &s, full)); bch2_trans_exit(&trans); snapshots_seen_exit(&s); if (ret) - bch_err(c, "error %i from check_inodes()", ret); + bch_err(c, "error from check_inodes(): %s", bch2_err_str(ret)); return ret; } @@ -1115,15 +1120,15 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; struct inode_walker_entry *i; - int ret = 0, ret2 = 0; + u32 restart_count = trans->restart_count; + int ret = 0; s64 count2; darray_for_each(w->inodes, i) { if (i->inode.bi_sectors == i->count) continue; - count2 = lockrestart_do(trans, - bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot)); + count2 = bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot); if (i->count != count2) { bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu", @@ -1136,19 +1141,21 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c, "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", w->cur_inum, i->snapshot, - i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE) - continue; - - i->inode.bi_sectors = i->count; - ret = write_inode(trans, &i->inode, i->snapshot); - if (ret) - break; - ret2 = -EINTR; + i->inode.bi_sectors, i->count)) { + i->inode.bi_sectors = i->count; + ret = write_inode(trans, &i->inode, i->snapshot); + if (ret) + break; + } } fsck_err: - if (ret) - bch_err(c, "error %i from check_i_sectors()", ret); - return ret ?: ret2; + if (ret) { + bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret)); + return ret; + } + if (trans_was_restarted(trans, restart_count)) + return -BCH_ERR_transaction_restart_nested; + return 0; } static int check_extent(struct btree_trans *trans, struct btree_iter *iter, @@ -1184,14 +1191,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto err; } - if (!iter->path->should_be_locked) { - /* - * hack: check_i_sectors may have handled a transaction restart, - * it shouldn't be but we need to fix the new i_sectors check - * code and delete the old bch2_count_inode_sectors() first - */ - return -EINTR; - } + BUG_ON(!iter->path->should_be_locked); #if 0 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { char buf1[200]; @@ -1201,7 +1201,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_val_to_text(&PBUF(buf2), c, k); if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) { - ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR; + ret = fix_overlapping_extent(trans, k, prev.k->k.p) + ?: -BCH_ERR_transaction_restart_nested; goto out; } } @@ -1286,8 +1287,8 @@ err: fsck_err: printbuf_exit(&buf); - if (ret && ret != -EINTR) - bch_err(c, "error %i from check_extent()", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error from check_extent(): %s", bch2_err_str(ret)); return ret; } @@ -1329,7 +1330,7 @@ static int check_extents(struct bch_fs *c) snapshots_seen_exit(&s); if (ret) - bch_err(c, "error %i from check_extents()", ret); + bch_err(c, "error from check_extents(): %s", bch2_err_str(ret)); return ret; } @@ -1337,7 +1338,8 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; struct inode_walker_entry *i; - int ret = 0, ret2 = 0; + u32 restart_count = trans->restart_count; + int ret = 0; s64 count2; darray_for_each(w->inodes, i) { @@ -1363,13 +1365,16 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) ret = write_inode(trans, &i->inode, i->snapshot); if (ret) break; - ret2 = -EINTR; } } fsck_err: - if (ret) - bch_err(c, "error %i from check_subdir_count()", ret); - return ret ?: ret2; + if (ret) { + bch_err(c, "error from check_subdir_count(): %s", bch2_err_str(ret)); + return ret; + } + if (trans_was_restarted(trans, restart_count)) + return -BCH_ERR_transaction_restart_nested; + return 0; } static int check_dirent_target(struct btree_trans *trans, @@ -1486,8 +1491,8 @@ err: fsck_err: printbuf_exit(&buf); - if (ret && ret != -EINTR) - bch_err(c, "error %i from check_target()", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error from check_target(): %s", bch2_err_str(ret)); return ret; } @@ -1527,10 +1532,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto err; } - if (!iter->path->should_be_locked) { - /* hack: see check_extent() */ - return -EINTR; - } + BUG_ON(!iter->path->should_be_locked); ret = __walk_inode(trans, dir, equiv); if (ret < 0) @@ -1659,8 +1661,8 @@ err: fsck_err: printbuf_exit(&buf); - if (ret && ret != -EINTR) - bch_err(c, "error %i from check_dirent()", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error from check_dirent(): %s", bch2_err_str(ret)); return ret; } @@ -1699,7 +1701,7 @@ static int check_dirents(struct bch_fs *c) inode_walker_exit(&target); if (ret) - bch_err(c, "error %i from check_dirents()", ret); + bch_err(c, "error from check_dirents(): %s", bch2_err_str(ret)); return ret; } @@ -1734,8 +1736,8 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); fsck_err: - if (ret && ret != -EINTR) - bch_err(c, "error %i from check_xattr()", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error from check_xattr(): %s", bch2_err_str(ret)); return ret; } @@ -1767,7 +1769,7 @@ static int check_xattrs(struct bch_fs *c) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error %i from check_xattrs()", ret); + bch_err(c, "error from check_xattrs(): %s", bch2_err_str(ret)); return ret; } @@ -1799,7 +1801,7 @@ static int check_root_trans(struct btree_trans *trans) BTREE_INSERT_LAZY_RW, __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i)); if (ret) { - bch_err(c, "error writing root subvol: %i", ret); + bch_err(c, "error writing root subvol: %s", bch2_err_str(ret)); goto err; } @@ -1818,7 +1820,7 @@ static int check_root_trans(struct btree_trans *trans) ret = __write_inode(trans, &root_inode, snapshot); if (ret) - bch_err(c, "error writing root inode: %i", ret); + bch_err(c, "error writing root inode: %s", bch2_err_str(ret)); } err: fsck_err: @@ -1971,7 +1973,7 @@ static int check_path(struct btree_trans *trans, } fsck_err: if (ret) - bch_err(c, "%s: err %i", __func__, ret); + bch_err(c, "%s: err %s", __func__, bch2_err_str(ret)); return ret; } @@ -2015,8 +2017,6 @@ static int check_directory_structure(struct bch_fs *c) } bch2_trans_iter_exit(&trans, &iter); - BUG_ON(ret == -EINTR); - darray_exit(&path); bch2_trans_exit(&trans); @@ -2194,6 +2194,47 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links return ret; } +static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k, + struct nlink_table *links, + size_t *idx, u64 range_end) +{ + struct bch_fs *c = trans->c; + struct bch_inode_unpacked u; + struct nlink *link = &links->d[*idx]; + int ret = 0; + + if (k.k->p.offset >= range_end) + return 1; + + if (!bkey_is_inode(k.k)) + return 0; + + BUG_ON(bch2_inode_unpack(k, &u)); + + if (S_ISDIR(le16_to_cpu(u.bi_mode))) + return 0; + + if (!u.bi_nlink) + return 0; + + while ((cmp_int(link->inum, k.k->p.offset) ?: + cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { + BUG_ON(*idx == links->nr); + link = &links->d[++*idx]; + } + + if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c, + "inode %llu type %s has wrong i_nlink (%u, should be %u)", + u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], + bch2_inode_nlink_get(&u), link->count)) { + bch2_inode_nlink_set(&u, link->count); + ret = __write_inode(trans, &u, k.k->p.snapshot); + } +fsck_err: + return ret; +} + noinline_for_stack static int check_nlinks_update_hardlinks(struct bch_fs *c, struct nlink_table *links, @@ -2202,56 +2243,25 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct bch_inode_unpacked u; - struct nlink *link = links->d; + size_t idx = 0; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - for_each_btree_key(&trans, iter, BTREE_ID_inodes, - POS(0, range_start), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - if (k.k->p.offset >= range_end) - break; - - if (!bkey_is_inode(k.k)) - continue; - - BUG_ON(bch2_inode_unpack(k, &u)); - - if (S_ISDIR(le16_to_cpu(u.bi_mode))) - continue; - - if (!u.bi_nlink) - continue; - - while ((cmp_int(link->inum, k.k->p.offset) ?: - cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { - link++; - BUG_ON(link >= links->d + links->nr); - } - - if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c, - "inode %llu type %s has wrong i_nlink (%u, should be %u)", - u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], - bch2_inode_nlink_get(&u), link->count)) { - bch2_inode_nlink_set(&u, link->count); + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes, + POS(0, range_start), + BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end)); - ret = write_inode(&trans, &u, k.k->p.snapshot); - if (ret) - bch_err(c, "error in fsck: error %i updating inode", ret); - } - } -fsck_err: - bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); - if (ret) + if (ret < 0) { bch_err(c, "error in fsck: btree error %i while walking inodes", ret); + return ret; + } - return ret; + return 0; } noinline_for_stack @@ -2291,21 +2301,13 @@ static int check_nlinks(struct bch_fs *c) return ret; } -static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter) +static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) { - struct bkey_s_c k; struct bkey_s_c_reflink_p p; struct bkey_i_reflink_p *u; int ret; - k = bch2_btree_iter_peek(iter); - if (!k.k) - return 0; - - ret = bkey_err(k); - if (ret) - return ret; - if (k.k->type != KEY_TYPE_reflink_p) return 0; @@ -2341,20 +2343,11 @@ static int fix_reflink_p(struct bch_fs *c) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - if (k.k->type == KEY_TYPE_reflink_p) { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - fix_reflink_p_key(&trans, &iter)); - if (ret) - break; - } - } - bch2_trans_iter_exit(&trans, &iter); + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_extents, POS_MIN, + BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + fix_reflink_p_key(&trans, &iter, k)); bch2_trans_exit(&trans); return ret; @@ -2380,7 +2373,7 @@ again: check_nlinks(c) ?: fix_reflink_p(c); - if (ret == -NEED_SNAPSHOT_CLEANUP) { + if (bch2_err_matches(ret, BCH_ERR_need_snapshot_cleanup)) { set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); goto again; } diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 6a2b949..0831060 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -639,7 +639,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); err: - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; } @@ -710,7 +710,7 @@ retry: BTREE_INSERT_NOFAIL); err: bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 50fa572..93771f8 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -312,7 +312,7 @@ int bch2_extent_update(struct btree_trans *trans, } /* - * Returns -EINTR if we had to drop locks: + * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, @@ -325,7 +325,8 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, int ret = 0, ret2 = 0; u32 snapshot; - while (!ret || ret == -EINTR) { + while (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; @@ -384,7 +385,10 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); - return ret == -EINTR ? 0 : ret; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + ret = 0; + + return ret; } int bch2_write_index_default(struct bch_write_op *op) @@ -415,7 +419,7 @@ int bch2_write_index_default(struct bch_write_op *op) ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &sk.k->k.p.snapshot); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; @@ -430,7 +434,7 @@ int bch2_write_index_default(struct bch_write_op *op) op->flags & BCH_WRITE_CHECK_ENOSPC); bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; @@ -580,14 +584,14 @@ static void __bch2_write_index(struct bch_write_op *op) u64 sectors_start = keylist_sectors(keys); int ret = op->index_update_fn(op); - BUG_ON(ret == -EINTR); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); BUG_ON(keylist_sectors(keys) && !ret); op->written += sectors_start - keylist_sectors(keys); if (ret) { bch_err_inum_ratelimited(c, op->pos.inode, - "write error %i from btree update", ret); + "write error while doing btree update: %s", bch2_err_str(ret)); op->error = ret; } } @@ -1915,6 +1919,7 @@ static void bch2_read_endio(struct bio *bio) } if (rbio->narrow_crcs || + rbio->promote || crc_is_compressed(rbio->pick.crc) || bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; @@ -2316,10 +2321,9 @@ retry: * read_extent -> io_time_reset may cause a transaction restart * without returning an error, we need to check for that here: */ - if (!bch2_trans_relock(&trans)) { - ret = -EINTR; + ret = bch2_trans_relock(&trans); + if (ret) break; - } bch2_btree_iter_set_pos(&iter, POS(inum.inum, bvec_iter.bi_sector)); @@ -2373,7 +2377,9 @@ retry: err: bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + ret == READ_RETRY || + ret == READ_RETRY_AVOID) goto retry; bch2_trans_exit(&trans); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index b561ed7..d77092a 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -883,7 +883,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, if (!new_fs) { for (i = 0; i < nr_got; i++) { - ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, + ret = bch2_trans_run(c, bch2_trans_mark_metadata_bucket(&trans, ca, bu[i], BCH_DATA_journal, ca->mi.bucket_size)); @@ -1146,7 +1146,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) bch2_sb_get_journal(sb); struct bch_sb_field_journal_v2 *journal_buckets_v2 = bch2_sb_get_journal_v2(sb); - unsigned i; + unsigned i, nr_bvecs; ja->nr = 0; @@ -1163,11 +1163,14 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) if (!ja->bucket_seq) return -ENOMEM; - ca->journal.bio = bio_kmalloc(GFP_KERNEL, - DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE)); + nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); + + ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); if (!ca->journal.bio) return -ENOMEM; + bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0); + ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); if (!ja->buckets) return -ENOMEM; diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 0ff78a2..107521e 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -197,7 +197,7 @@ static void journal_entry_null_range(void *start, void *end) bch_err(c, "corrupt metadata before write:\n" \ msg, ##__VA_ARGS__); \ if (bch2_fs_inconsistent(c)) { \ - ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + ret = -BCH_ERR_fsck_errors_not_fixed; \ goto fsck_err; \ } \ break; \ @@ -823,20 +823,20 @@ static int journal_read_bucket(struct bch_dev *ca, while (offset < end) { if (!sectors_read) { struct bio *bio; + unsigned nr_bvecs; reread: sectors_read = min_t(unsigned, end - offset, buf->size >> 9); + nr_bvecs = buf_pages(buf->data, sectors_read << 9); - bio = bio_kmalloc(GFP_KERNEL, - buf_pages(buf->data, - sectors_read << 9)); - bio_set_dev(bio, ca->disk_sb.bdev); - bio->bi_iter.bi_sector = offset; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); + bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ); + + bio->bi_iter.bi_sector = offset; bch2_bio_map(bio, buf->data, sectors_read << 9); ret = submit_bio_wait(bio); - bio_put(bio); + kfree(bio); if (bch2_dev_io_err_on(ret, ca, "journal read error: sector %llu", @@ -858,7 +858,7 @@ reread: end - offset, sectors_read, READ); switch (ret) { - case BCH_FSCK_OK: + case 0: sectors = vstruct_sectors(j, c->block_bits); break; case JOURNAL_ENTRY_REREAD: diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index fdc94e8..9f8b63b 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" +#include "errcode.h" #include "error.h" #include "journal.h" #include "journal_io.h" @@ -282,11 +283,11 @@ void bch2_journal_do_discards(struct journal *j) while (should_discard_bucket(j, ja)) { if (!c->opts.nochanges && ca->mi.discard && - blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev))) + bdev_max_discard_sectors(ca->disk_sb.bdev)) blkdev_issue_discard(ca->disk_sb.bdev, bucket_to_sector(ca, ja->buckets[ja->discard_idx]), - ca->mi.bucket_size, GFP_NOIO, 0); + ca->mi.bucket_size, GFP_NOIO); spin_lock(&j->lock); ja->discard_idx = (ja->discard_idx + 1) % ja->nr; @@ -740,15 +741,17 @@ int bch2_journal_reclaim_start(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct task_struct *p; + int ret; if (j->reclaim_thread) return 0; p = kthread_create(bch2_journal_reclaim_thread, j, "bch-reclaim/%s", c->name); - if (IS_ERR(p)) { - bch_err(c, "error creating journal reclaim thread: %li", PTR_ERR(p)); - return PTR_ERR(p); + ret = PTR_ERR_OR_ZERO(p); + if (ret) { + bch_err(c, "error creating journal reclaim thread: %s", bch2_err_str(ret)); + return ret; } get_task_struct(p); diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c index d9b4042..5c555b3 100644 --- a/libbcachefs/journal_seq_blacklist.c +++ b/libbcachefs/journal_seq_blacklist.c @@ -272,7 +272,7 @@ retry: !test_bit(BCH_FS_STOPPING, &c->flags)) b = bch2_btree_iter_next_node(&iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter); diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index 94ecb3a..53e607d 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -130,25 +130,18 @@ int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx, } static int bch2_check_lru_key(struct btree_trans *trans, - struct btree_iter *lru_iter) + struct btree_iter *lru_iter, + struct bkey_s_c lru_k) { struct bch_fs *c = trans->c; struct btree_iter iter; - struct bkey_s_c lru_k, k; + struct bkey_s_c k; struct bch_alloc_v4 a; struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; struct bpos alloc_pos; int ret; - lru_k = bch2_btree_iter_peek(lru_iter); - if (!lru_k.k) - return 0; - - ret = bkey_err(lru_k); - if (ret) - return ret; - alloc_pos = POS(lru_k.k->p.inode, le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx)); @@ -202,16 +195,10 @@ int bch2_check_lrus(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - bch2_check_lru_key(&trans, &iter)); - if (ret) - break; - } - bch2_trans_iter_exit(&trans, &iter); + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + bch2_check_lru_key(&trans, &iter, k)); bch2_trans_exit(&trans); return ret; diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 5345697..8b258d9 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -8,6 +8,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" +#include "errcode.h" #include "extents.h" #include "io.h" #include "journal.h" @@ -35,85 +36,76 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, return 0; } -static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags, - enum btree_id btree_id) +static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + unsigned dev_idx, + int flags) +{ + struct bch_fs *c = trans->c; + struct bkey_i *n; + int ret; + + if (!bch2_bkey_has_device(k, dev_idx)) + return 0; + + n = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + bkey_reassemble(n, k); + + ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, false); + if (ret) + return ret; + + /* + * If the new extent no longer has any pointers, bch2_extent_normalize() + * will do the appropriate thing with it (turning it into a + * KEY_TYPE_error key, or just a discard if it was a cached extent) + */ + bch2_extent_normalize(c, bkey_i_to_s(n)); + + /* + * Since we're not inserting through an extent iterator + * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), + * we aren't using the extent overwrite path to delete, we're + * just using the normal key deletion path: + */ + if (bkey_deleted(&n->k)) + n->k.size = 0; + + return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); +} + +static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct bkey_buf sk; + enum btree_id id; int ret = 0; - bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - - while ((bch2_trans_begin(&trans), - (k = bch2_btree_iter_peek(&iter)).k) && - !(ret = bkey_err(k))) { - if (!bch2_bkey_has_device(k, dev_idx)) { - bch2_btree_iter_advance(&iter); + for (id = 0; id < BTREE_ID_NR; id++) { + if (!btree_type_has_ptrs(id)) continue; - } - bch2_bkey_buf_reassemble(&sk, c, k); - - ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k), - dev_idx, flags, false); - if (ret) - break; - - /* - * If the new extent no longer has any pointers, bch2_extent_normalize() - * will do the appropriate thing with it (turning it into a - * KEY_TYPE_error key, or just a discard if it was a cached extent) - */ - bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - - /* - * Since we're not inserting through an extent iterator - * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), - * we aren't using the extent overwrite path to delete, we're - * just using the normal key deletion path: - */ - if (bkey_deleted(&sk.k->k)) - sk.k->k.size = 0; - - ret = bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(&trans, &iter, sk.k, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL); - - /* - * don't want to leave ret == -EINTR, since if we raced and - * something else overwrote the key we could spuriously return - * -EINTR below: - */ - if (ret == -EINTR) - ret = 0; + ret = for_each_btree_key_commit(&trans, iter, id, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_dev_usrdata_drop_key(&trans, &iter, k, dev_idx, flags)); if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); - - BUG_ON(ret == -EINTR); return ret; } -static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) -{ - return __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_extents) ?: - __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_reflink); -} - static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct btree_trans trans; @@ -154,19 +146,20 @@ retry: } ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, false); - if (ret == -EINTR) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; continue; } if (ret) { - bch_err(c, "Error updating btree node key: %i", ret); + bch_err(c, "Error updating btree node key: %s", + bch2_err_str(ret)); break; } next: bch2_btree_iter_next_node(&iter); } - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter); @@ -181,7 +174,7 @@ err: bch2_trans_exit(&trans); bch2_bkey_buf_exit(&k, c); - BUG_ON(ret == -EINTR); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); return ret; } diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 9748b86..2fc2474 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -9,6 +9,7 @@ #include "btree_update_interior.h" #include "disk_groups.h" #include "ec.h" +#include "errcode.h" #include "error.h" #include "inode.h" #include "io.h" @@ -370,7 +371,7 @@ static int move_get_io_opts(struct btree_trans *trans, ret = lookup_inode(trans, SPOS(0, k.k->p.inode, k.k->p.snapshot), &inode); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; if (!ret) @@ -418,7 +419,7 @@ static int __bch2_move_data(struct moving_context *ctxt, break; ret = bkey_err(k); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; @@ -449,7 +450,7 @@ static int __bch2_move_data(struct moving_context *ctxt, ret2 = bch2_move_extent(&trans, ctxt, io_opts, btree_id, k, data_opts); if (ret2) { - if (ret2 == -EINTR) + if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; if (ret2 == -ENOMEM) { @@ -574,7 +575,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, ret = bch2_get_next_backpointer(&trans, bucket, gen, &bp_offset, &bp); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; @@ -589,7 +590,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, k = bch2_backpointer_get_key(&trans, &iter, bucket, bp_offset, bp); ret = bkey_err(k); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; @@ -616,7 +617,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, ret = bch2_move_extent(&trans, ctxt, io_opts, bp.btree_id, k, data_opts); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ @@ -635,7 +636,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, b = bch2_backpointer_get_node(&trans, &iter, bucket, bp_offset, bp); ret = PTR_ERR_OR_ZERO(b); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; @@ -645,7 +646,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, ret = bch2_btree_node_rewrite(&trans, &iter, b, 0); bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; @@ -740,14 +741,14 @@ retry: goto next; ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret; - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; next: bch2_btree_iter_next_node(&iter); } - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter); @@ -759,7 +760,7 @@ next: bch2_trans_exit(&trans); if (ret) - bch_err(c, "error %i in bch2_move_btree", ret); + bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret)); bch2_btree_interior_updates_flush(c); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index f9ad4cb..f913864 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -13,6 +13,7 @@ #include "buckets.h" #include "clock.h" #include "disk_groups.h" +#include "errcode.h" #include "error.h" #include "extents.h" #include "eytzinger.h" @@ -162,7 +163,7 @@ static int bch2_copygc(struct bch_fs *c) bch2_moving_ctxt_exit(&ctxt); if (ret < 0) - bch_err(c, "error %i from bch2_move_data() in copygc", ret); + bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0); return ret; @@ -251,6 +252,7 @@ void bch2_copygc_stop(struct bch_fs *c) int bch2_copygc_start(struct bch_fs *c) { struct task_struct *t; + int ret; if (c->copygc_thread) return 0; @@ -262,9 +264,10 @@ int bch2_copygc_start(struct bch_fs *c) return -ENOMEM; t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name); - if (IS_ERR(t)) { - bch_err(c, "error creating copygc thread: %li", PTR_ERR(t)); - return PTR_ERR(t); + ret = PTR_ERR_OR_ZERO(t); + if (ret) { + bch_err(c, "error creating copygc thread: %s", bch2_err_str(ret)); + return ret; } get_task_struct(t); diff --git a/libbcachefs/movinggc.h b/libbcachefs/movinggc.h index 9227382..e85c813 100644 --- a/libbcachefs/movinggc.h +++ b/libbcachefs/movinggc.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_MOVINGGC_H #define _BCACHEFS_MOVINGGC_H +unsigned long bch2_copygc_wait_amount(struct bch_fs *); void bch2_copygc_stop(struct bch_fs *); int bch2_copygc_start(struct bch_fs *); void bch2_fs_copygc_init(struct bch_fs *); diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 2f5f49c..5b8586e 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -341,6 +341,11 @@ enum opt_type { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Don't open device in exclusive mode") \ + x(direct_io, u8, \ + OPT_FS|OPT_MOUNT, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, true, \ + NULL, "Use O_DIRECT (userspace only)") \ x(sb, u64, \ OPT_MOUNT, \ OPT_UINT(0, S64_MAX), \ diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index e35a6d1..454c76e 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "btree_update.h" +#include "errcode.h" #include "inode.h" #include "quota.h" #include "subvolume.h" @@ -370,6 +371,9 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) BUG_ON(k.k->p.inode >= QTYP_NR); + if (!((1U << k.k->p.inode) & enabled_qtypes(c))) + return 0; + switch (k.k->type) { case KEY_TYPE_quota: dq = bkey_s_c_to_quota(k); @@ -393,30 +397,6 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) return 0; } -static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - - bch2_trans_init(&trans, c, 0, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_quotas, POS(type, 0), - BTREE_ITER_PREFETCH, k, ret) { - if (k.k->p.inode != type) - break; - - ret = __bch2_quota_set(c, k); - if (ret) - break; - } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - return ret; -} - void bch2_fs_quota_exit(struct bch_fs *c) { unsigned i; @@ -491,8 +471,6 @@ advance: int bch2_fs_quota_read(struct bch_fs *c) { - unsigned i, qtypes = enabled_qtypes(c); - struct bch_memquota_type *q; struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; @@ -502,23 +480,16 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_sb_quota_read(c); mutex_unlock(&c->sb_lock); - for_each_set_qtype(c, i, q, qtypes) { - ret = bch2_quota_init_type(c, i); - if (ret) - return ret; - } - bch2_trans_init(&trans, c, 0, 0); - ret = for_each_btree_key2(&trans, iter, BTREE_ID_inodes, - POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, - k, + ret = for_each_btree_key2(&trans, iter, BTREE_ID_quotas, + POS_MIN, BTREE_ITER_PREFETCH, k, + __bch2_quota_set(c, k)) ?: + for_each_btree_key2(&trans, iter, BTREE_ID_inodes, + POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, bch2_fs_quota_read_inode(&trans, &iter, k)); if (ret) - bch_err(c, "err reading inodes in quota init: %i", ret); + bch_err(c, "err in quota_read: %s", bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 31da409..ecc64dd 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -6,6 +6,7 @@ #include "buckets.h" #include "clock.h" #include "disk_groups.h" +#include "errcode.h" #include "extents.h" #include "io.h" #include "move.h" @@ -331,6 +332,7 @@ void bch2_rebalance_stop(struct bch_fs *c) int bch2_rebalance_start(struct bch_fs *c) { struct task_struct *p; + int ret; if (c->rebalance.thread) return 0; @@ -339,9 +341,10 @@ int bch2_rebalance_start(struct bch_fs *c) return 0; p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); - if (IS_ERR(p)) { - bch_err(c, "error creating rebalance thread: %li", PTR_ERR(p)); - return PTR_ERR(p); + ret = PTR_ERR_OR_ZERO(p); + if (ret) { + bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret)); + return ret; } get_task_struct(p); diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 64b1e79..b070bdf 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -11,6 +11,7 @@ #include "buckets.h" #include "dirent.h" #include "ec.h" +#include "errcode.h" #include "error.h" #include "fs-common.h" #include "fsck.h" @@ -87,9 +88,9 @@ static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t i return keys->d + idx_to_pos(keys, idx); } -static size_t bch2_journal_key_search(struct journal_keys *keys, - enum btree_id id, unsigned level, - struct bpos pos) +static size_t __bch2_journal_key_search(struct journal_keys *keys, + enum btree_id id, unsigned level, + struct bpos pos) { size_t l = 0, r = keys->nr, m; @@ -107,7 +108,14 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, BUG_ON(l && __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0); - return idx_to_pos(keys, l); + return l; +} + +static size_t bch2_journal_key_search(struct journal_keys *keys, + enum btree_id id, unsigned level, + struct bpos pos) +{ + return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); } struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id, @@ -116,22 +124,21 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree { struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; + struct journal_key *k; search: if (!*idx) - *idx = bch2_journal_key_search(keys, btree_id, level, pos); + *idx = __bch2_journal_key_search(keys, btree_id, level, pos); - while (*idx < keys->size && - keys->d[*idx].btree_id == btree_id && - keys->d[*idx].level == level && - bpos_cmp(keys->d[*idx].k->k.p, end_pos) <= 0) { - if (bpos_cmp(keys->d[*idx].k->k.p, pos) >= 0 && - !keys->d[*idx].overwritten) - return keys->d[*idx].k; + while (*idx < keys->nr && + (k = idx_to_key(keys, *idx), + k->btree_id == btree_id && + k->level == level && + bpos_cmp(k->k->k.p, end_pos) <= 0)) { + if (bpos_cmp(k->k->k.p, pos) >= 0 && + !k->overwritten) + return k->k; (*idx)++; - if (*idx == keys->gap) - *idx += keys->size - keys->nr; - iters++; if (iters == 10) { *idx = 0; @@ -1153,7 +1160,7 @@ int bch2_fs_recovery(struct bch_fs *c) use_clean: if (!clean) { bch_err(c, "no superblock clean section found"); - ret = BCH_FSCK_REPAIR_IMPOSSIBLE; + ret = -BCH_ERR_fsck_repair_impossible; goto err; } @@ -1435,9 +1442,9 @@ out: } if (ret) - bch_err(c, "Error in recovery: %s (%i)", err, ret); + bch_err(c, "Error in recovery: %s (%s)", err, bch2_err_str(ret)); else - bch_verbose(c, "ret %i", ret); + bch_verbose(c, "ret %s", bch2_err_str(ret)); return ret; err: fsck_err: diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 2038e35..d5c14bb 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -299,7 +299,8 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start, BTREE_ITER_INTENT); - while ((ret == 0 || ret == -EINTR) && + while ((ret == 0 || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) && bkey_cmp(dst_iter.pos, dst_end) < 0) { struct disk_reservation disk_res = { 0 }; @@ -409,7 +410,7 @@ s64 bch2_remap_range(struct bch_fs *c, } bch2_trans_iter_exit(&trans, &inode_iter); - } while (ret2 == -EINTR); + } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&new_src, c); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 1a212ba..24244bc 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" #include "btree_update.h" +#include "errcode.h" #include "error.h" #include "fs.h" #include "subvolume.h" @@ -291,22 +292,14 @@ int bch2_fs_check_snapshots(struct bch_fs *c) } static int check_subvol(struct btree_trans *trans, - struct btree_iter *iter) + struct btree_iter *iter, + struct bkey_s_c k) { - struct bkey_s_c k; struct bkey_s_c_subvolume subvol; struct bch_snapshot snapshot; unsigned snapid; int ret; - k = bch2_btree_iter_peek(iter); - if (!k.k) - return 0; - - ret = bkey_err(k); - if (ret) - return ret; - if (k.k->type != KEY_TYPE_subvolume) return 0; @@ -322,9 +315,9 @@ static int check_subvol(struct btree_trans *trans, if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { ret = bch2_subvolume_delete(trans, iter->pos.offset); - if (ret && ret != -EINTR) - bch_err(trans->c, "error deleting subvolume %llu: %i", - iter->pos.offset, ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(trans->c, "error deleting subvolume %llu: %s", + iter->pos.offset, bch2_err_str(ret)); if (ret) return ret; } @@ -336,22 +329,15 @@ int bch2_fs_check_subvols(struct bch_fs *c) { struct btree_trans trans; struct btree_iter iter; + struct bkey_s_c k; int ret; bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes, - POS_MIN, BTREE_ITER_PREFETCH); - - do { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL, - check_subvol(&trans, &iter)); - if (ret) - break; - } while (bch2_btree_iter_advance(&iter)); - bch2_trans_iter_exit(&trans, &iter); + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + check_subvol(&trans, &iter, k)); bch2_trans_exit(&trans); @@ -380,7 +366,7 @@ int bch2_fs_snapshots_start(struct bch_fs *c) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error starting snapshots: %i", ret); + bch_err(c, "error starting snapshots: %s", bch2_err_str(ret)); return ret; } @@ -595,59 +581,27 @@ err: return ret; } -static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, - snapshot_id_list *deleted, - enum btree_id btree_id) +static int snapshot_delete_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + snapshot_id_list *deleted, + snapshot_id_list *equiv_seen, + struct bpos *last_pos) { struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; - snapshot_id_list equiv_seen = { 0 }; - struct bpos last_pos = POS_MIN; - int ret = 0; - - /* - * XXX: We should also delete whiteouts that no longer overwrite - * anything - */ + u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv; - bch2_trans_iter_init(trans, &iter, btree_id, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS); - - while ((bch2_trans_begin(trans), - (k = bch2_btree_iter_peek(&iter)).k) && - !(ret = bkey_err(k))) { - u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv; - - if (bkey_cmp(k.k->p, last_pos)) - equiv_seen.nr = 0; - last_pos = k.k->p; - - if (snapshot_list_has_id(deleted, k.k->p.snapshot) || - snapshot_list_has_id(&equiv_seen, equiv)) { - ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL, - bch2_btree_iter_traverse(&iter) ?: - bch2_btree_delete_at(trans, &iter, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); - if (ret) - break; - } else { - ret = snapshot_list_add(c, &equiv_seen, equiv); - if (ret) - break; - } + if (bkey_cmp(k.k->p, *last_pos)) + equiv_seen->nr = 0; + *last_pos = k.k->p; - bch2_btree_iter_advance(&iter); + if (snapshot_list_has_id(deleted, k.k->p.snapshot) || + snapshot_list_has_id(equiv_seen, equiv)) { + return bch2_btree_delete_at(trans, iter, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + } else { + return snapshot_list_add(c, equiv_seen, equiv); } - bch2_trans_iter_exit(trans, &iter); - - darray_exit(&equiv_seen); - - return ret; } static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter, @@ -694,7 +648,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!test_bit(BCH_FS_STARTED, &c->flags)) { ret = bch2_fs_read_write_early(c); if (ret) { - bch_err(c, "error deleleting dead snapshots: error going rw: %i", ret); + bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret)); return ret; } } @@ -710,7 +664,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) NULL, NULL, 0, bch2_delete_redundant_snapshot(&trans, &iter, k)); if (ret) { - bch_err(c, "error deleting redundant snapshots: %i", ret); + bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret)); goto err; } @@ -718,7 +672,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) POS_MIN, 0, k, bch2_snapshot_set_equiv(&trans, k)); if (ret) { - bch_err(c, "error in bch2_snapshots_set_equiv: %i", ret); + bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret)); goto err; } @@ -737,17 +691,27 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) bch2_trans_iter_exit(&trans, &iter); if (ret) { - bch_err(c, "error walking snapshots: %i", ret); + bch_err(c, "error walking snapshots: %s", bch2_err_str(ret)); goto err; } for (id = 0; id < BTREE_ID_NR; id++) { + struct bpos last_pos = POS_MIN; + snapshot_id_list equiv_seen = { 0 }; + if (!btree_type_has_snapshots(id)) continue; - ret = bch2_snapshot_delete_keys_btree(&trans, &deleted, id); + ret = for_each_btree_key_commit(&trans, iter, + id, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BTREE_INSERT_NOFAIL, + snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos)); + + darray_exit(&equiv_seen); + if (ret) { - bch_err(c, "error deleting snapshot keys: %i", ret); + bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret)); goto err; } } @@ -756,8 +720,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ret = commit_do(&trans, NULL, NULL, 0, bch2_snapshot_node_delete(&trans, deleted.data[i])); if (ret) { - bch_err(c, "error deleting snapshot %u: %i", - deleted.data[i], ret); + bch_err(c, "error deleting snapshot %u: %s", + deleted.data[i], bch2_err_str(ret)); goto err; } } @@ -913,6 +877,8 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) goto err; ret = bch2_snapshot_node_set_deleted(trans, snapid); + if (ret) + goto err; h = bch2_trans_kmalloc(trans, sizeof(*h)); ret = PTR_ERR_OR_ZERO(h); @@ -949,7 +915,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_subvolume_delete(&trans, *id)); if (ret) { - bch_err(c, "error %i deleting subvolume %u", ret, *id); + bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret)); break; } } diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 8b81309..55f8c65 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -101,7 +101,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb, void bch2_free_super(struct bch_sb_handle *sb) { if (sb->bio) - bio_put(sb->bio); + kfree(sb->bio); if (!IS_ERR_OR_NULL(sb->bdev)) blkdev_put(sb->bdev, sb->mode); @@ -143,13 +143,16 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) return -ENOMEM; if (sb->have_bio) { - bio = bio_kmalloc(GFP_KERNEL, - DIV_ROUND_UP(new_buffer_size, PAGE_SIZE)); + unsigned nr_bvecs = DIV_ROUND_UP(new_buffer_size, PAGE_SIZE); + + bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); if (!bio) return -ENOMEM; + bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); + if (sb->bio) - bio_put(sb->bio); + kfree(sb->bio); sb->bio = bio; } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 2908974..7c63480 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -24,6 +24,7 @@ #include "debug.h" #include "disk_groups.h" #include "ec.h" +#include "errcode.h" #include "error.h" #include "fs.h" #include "fs-io.h" @@ -930,31 +931,10 @@ out: up_write(&c->state_lock); return ret; err: - switch (ret) { - case BCH_FSCK_ERRORS_NOT_FIXED: - bch_err(c, "filesystem contains errors: please report this to the developers"); - pr_cont("mount with -o fix_errors to repair\n"); - break; - case BCH_FSCK_REPAIR_UNIMPLEMENTED: - bch_err(c, "filesystem contains errors: please report this to the developers"); - pr_cont("repair unimplemented: inform the developers so that it can be added\n"); - break; - case BCH_FSCK_REPAIR_IMPOSSIBLE: - bch_err(c, "filesystem contains errors, but repair impossible"); - break; - case BCH_FSCK_UNKNOWN_VERSION: - bch_err(c, "unknown metadata version"); - break; - case -ENOMEM: - bch_err(c, "cannot allocate memory"); - break; - case -EIO: - bch_err(c, "IO error"); - break; - } + bch_err(c, "error starting filesystem: %s", bch2_err_str(ret)); - if (ret >= 0) - ret = -EIO; + if (ret < -BCH_ERR_START) + ret = -EINVAL; goto out; } @@ -1438,7 +1418,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, BTREE_TRIGGER_NORUN, NULL); if (ret) - bch_err(c, "error %i removing dev alloc info", ret); + bch_err(c, "error removing dev alloc info: %s", bch2_err_str(ret)); return ret; } @@ -1466,7 +1446,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_dev_data_drop(c, ca->dev_idx, flags); if (ret) { - bch_err(ca, "Remove failed: error %i dropping data", ret); + bch_err(ca, "Remove failed: error dropping data: %s", bch2_err_str(ret)); goto err; } @@ -1478,7 +1458,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); if (ret) { - bch_err(ca, "Remove failed: error %i flushing journal", ret); + bch_err(ca, "Remove failed: error flushing journal: %s", bch2_err_str(ret)); goto err; } @@ -1490,7 +1470,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_replicas_gc2(c); if (ret) { - bch_err(ca, "Remove failed: error %i from replicas gc", ret); + bch_err(ca, "Remove failed: error from replicas gc: %s", bch2_err_str(ret)); goto err; } @@ -1554,7 +1534,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ret = bch2_read_super(path, &opts, &sb); if (ret) { - bch_err(c, "device add error: error reading super: %i", ret); + bch_err(c, "device add error: error reading super: %s", bch2_err_str(ret)); goto err; } @@ -1647,13 +1627,13 @@ have_slot: ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err(c, "device add error: error marking new superblock: %i", ret); + bch_err(c, "device add error: error marking new superblock: %s", bch2_err_str(ret)); goto err_late; } ret = bch2_fs_freespace_init(c); if (ret) { - bch_err(c, "device add error: error initializing free space: %i", ret); + bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret)); goto err_late; } @@ -1715,8 +1695,8 @@ int bch2_dev_online(struct bch_fs *c, const char *path) ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err(c, "error bringing %s online: error %i from bch2_trans_mark_dev_sb", - path, ret); + bch_err(c, "error bringing %s online: error from bch2_trans_mark_dev_sb: %s", + path, bch2_err_str(ret)); goto err; } @@ -1785,7 +1765,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ret = bch2_dev_buckets_resize(c, ca, nbuckets); if (ret) { - bch_err(ca, "Resize error: %i", ret); + bch_err(ca, "Resize error: %s", bch2_err_str(ret)); goto err; } diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index 57245ca..56058a5 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -46,7 +46,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { - bch_err(c, "update error in test_delete: %i", ret); + bch_err(c, "update error in test_delete: %s", bch2_err_str(ret)); goto err; } @@ -55,7 +55,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { - bch_err(c, "delete error (first) in test_delete: %i", ret); + bch_err(c, "delete error (first) in test_delete: %s", bch2_err_str(ret)); goto err; } @@ -64,7 +64,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { - bch_err(c, "delete error (second) in test_delete: %i", ret); + bch_err(c, "delete error (second) in test_delete: %s", bch2_err_str(ret)); goto err; } err: @@ -92,7 +92,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { - bch_err(c, "update error in test_delete_written: %i", ret); + bch_err(c, "update error in test_delete_written: %s", bch2_err_str(ret)); goto err; } @@ -103,7 +103,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { - bch_err(c, "delete error in test_delete_written: %i", ret); + bch_err(c, "delete error in test_delete_written: %s", bch2_err_str(ret)); goto err; } err: @@ -136,7 +136,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate: %i", ret); + bch_err(c, "insert error in test_iterate: %s", bch2_err_str(ret)); goto err; } } @@ -145,20 +145,30 @@ static int test_iterate(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), 0, k, ret) { - if (k.k->p.inode) - break; - + ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i++); + 0; + })); + if (ret) { + bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret)); + goto err; } BUG_ON(i != nr); pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) - BUG_ON(k.k->p.offset != --i); + ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_xattrs, + SPOS(0, U64_MAX, U32_MAX), 0, k, + ({ + BUG_ON(k.k->p.offset != --i); + 0; + })); + if (ret) { + bch_err(c, "%s(): error iterating backwards: %s", __func__, bch2_err_str(ret)); + goto err; + } BUG_ON(i); err: @@ -192,7 +202,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate_extents: %i", ret); + bch_err(c, "insert error in test_iterate_extents: %s", bch2_err_str(ret)); goto err; } } @@ -201,19 +211,31 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), 0, k, ret) { + ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i); i = k.k->p.offset; + 0; + })); + if (ret) { + bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret)); + goto err; } BUG_ON(i != nr); pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) { - BUG_ON(k.k->p.offset != i); - i = bkey_start_offset(k.k); + ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_extents, + SPOS(0, U64_MAX, U32_MAX), 0, k, + ({ + BUG_ON(k.k->p.offset != i); + i = bkey_start_offset(k.k); + 0; + })); + if (ret) { + bch_err(c, "%s(): error iterating backwards: %s", __func__, bch2_err_str(ret)); + goto err; } BUG_ON(i); @@ -247,7 +269,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate_slots: %i", ret); + bch_err(c, "insert error in test_iterate_slots: %s", bch2_err_str(ret)); goto err; } } @@ -256,15 +278,16 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), 0, k, ret) { - if (k.k->p.inode) - break; - + ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i); i += 2; + 0; + })); + if (ret) { + bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret)); + goto err; } - bch2_trans_iter_exit(&trans, &iter); BUG_ON(i != nr * 2); @@ -272,17 +295,23 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), - BTREE_ITER_SLOTS, k, ret) { + ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), + BTREE_ITER_SLOTS, k, ({ + if (i >= nr * 2) + break; + BUG_ON(k.k->p.offset != i); BUG_ON(bkey_deleted(k.k) != (i & 1)); i++; - if (i == nr * 2) - break; + 0; + })); + if (ret < 0) { + bch_err(c, "%s(): error iterating forwards by slots: %s", __func__, bch2_err_str(ret)); + goto err; } - bch2_trans_iter_exit(&trans, &iter); + ret = 0; err: bch2_trans_exit(&trans); return ret; @@ -313,7 +342,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate_slots_extents: %i", ret); + bch_err(c, "insert error in test_iterate_slots_extents: %s", bch2_err_str(ret)); goto err; } } @@ -322,13 +351,17 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), 0, k, ret) { + ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(k.k->size != 8); i += 16; + 0; + })); + if (ret) { + bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret)); + goto err; } - bch2_trans_iter_exit(&trans, &iter); BUG_ON(i != nr); @@ -336,19 +369,23 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), - BTREE_ITER_SLOTS, k, ret) { + ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), + BTREE_ITER_SLOTS, k, ({ + if (i == nr) + break; BUG_ON(bkey_deleted(k.k) != !(i % 16)); BUG_ON(bkey_start_offset(k.k) != i); BUG_ON(k.k->size != 8); i = k.k->p.offset; - - if (i == nr) - break; + 0; + })); + if (ret) { + bch_err(c, "%s(): error iterating forwards by slots: %s", __func__, bch2_err_str(ret)); + goto err; } - bch2_trans_iter_exit(&trans, &iter); + ret = 0; err: bch2_trans_exit(&trans); return 0; @@ -368,10 +405,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr) bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - k = bch2_btree_iter_peek(&iter); + lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter))); BUG_ON(k.k); - k = bch2_btree_iter_peek(&iter); + lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter))); BUG_ON(k.k); bch2_trans_iter_exit(&trans, &iter); @@ -389,10 +426,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), 0); - k = bch2_btree_iter_peek(&iter); + lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter))); BUG_ON(k.k); - k = bch2_btree_iter_peek(&iter); + lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter))); BUG_ON(k.k); bch2_trans_iter_exit(&trans, &iter); @@ -419,7 +456,7 @@ static int insert_test_extent(struct bch_fs *c, ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) - bch_err(c, "insert error in insert_test_extent: %i", ret); + bch_err(c, "insert error in insert_test_extent: %s", bch2_err_str(ret)); return ret; } @@ -482,7 +519,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) bch2_trans_init(&trans, c, 0, 0); bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); - k = bch2_btree_iter_peek(&iter); + lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter))); BUG_ON(k.k->p.snapshot != U32_MAX); @@ -518,7 +555,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr) ret = test_snapshot_filter(c, snapids[0], snapids[1]); if (ret) { - bch_err(c, "err %i from test_snapshot_filter", ret); + bch_err(c, "err from test_snapshot_filter: %s", bch2_err_str(ret)); return ret; } @@ -555,7 +592,7 @@ static int rand_insert(struct bch_fs *c, u64 nr) ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); if (ret) { - bch_err(c, "error in rand_insert: %i", ret); + bch_err(c, "error in rand_insert: %s", bch2_err_str(ret)); break; } } @@ -591,7 +628,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[6].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[7].k_i)); if (ret) { - bch_err(c, "error in rand_insert_multi: %i", ret); + bch_err(c, "error in rand_insert_multi: %s", bch2_err_str(ret)); break; } } @@ -615,10 +652,10 @@ static int rand_lookup(struct bch_fs *c, u64 nr) for (i = 0; i < nr; i++) { bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX)); - k = bch2_btree_iter_peek(&iter); + lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter))); ret = bkey_err(k); if (ret) { - bch_err(c, "error in rand_lookup: %i", ret); + bch_err(c, "error in rand_lookup: %s", bch2_err_str(ret)); break; } } @@ -638,10 +675,10 @@ static int rand_mixed_trans(struct btree_trans *trans, bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX)); - k = bch2_btree_iter_peek(iter); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(iter))); ret = bkey_err(k); - if (ret && ret != -EINTR) - bch_err(trans->c, "lookup error in rand_mixed: %i", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(trans->c, "lookup error in rand_mixed: %s", bch2_err_str(ret)); if (ret) return ret; @@ -671,7 +708,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr) ret = commit_do(&trans, NULL, NULL, 0, rand_mixed_trans(&trans, &iter, &cookie, i, rand)); if (ret) { - bch_err(c, "update error in rand_mixed: %i", ret); + bch_err(c, "update error in rand_mixed: %s", bch2_err_str(ret)); break; } } @@ -689,7 +726,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, BTREE_ITER_INTENT); - k = bch2_btree_iter_peek(&iter); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter))); ret = bkey_err(k); if (ret) goto err; @@ -717,7 +754,7 @@ static int rand_delete(struct bch_fs *c, u64 nr) ret = commit_do(&trans, NULL, NULL, 0, __do_delete(&trans, pos)); if (ret) { - bch_err(c, "error in rand_delete: %i", ret); + bch_err(c, "error in rand_delete: %s", bch2_err_str(ret)); break; } } @@ -733,28 +770,23 @@ static int seq_insert(struct bch_fs *c, u64 nr) struct bkey_s_c k; struct bkey_i_cookie insert; int ret = 0; - u64 i = 0; bkey_cookie_init(&insert.k_i); bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - insert.k.p = iter.pos; - - ret = commit_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(&trans, &iter, &insert.k_i, 0)); - if (ret) { - bch_err(c, "error in seq_insert: %i", ret); - break; - } - - if (++i == nr) - break; - } - bch2_trans_iter_exit(&trans, &iter); + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, + NULL, NULL, 0, + ({ + if (iter.pos.offset >= nr) + break; + insert.k.p = iter.pos; + bch2_trans_update(&trans, &iter, &insert.k_i, 0); + })); + if (ret) + bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; @@ -769,10 +801,11 @@ static int seq_lookup(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), 0, k, ret) - ; - bch2_trans_iter_exit(&trans, &iter); + ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), 0, k, + 0); + if (ret) + bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; @@ -787,22 +820,18 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), - BTREE_ITER_INTENT, k, ret) { - struct bkey_i_cookie u; - - bkey_reassemble(&u.k_i, k); + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), + BTREE_ITER_INTENT, k, + NULL, NULL, 0, + ({ + struct bkey_i_cookie u; - ret = commit_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(&trans, &iter, &u.k_i, 0)); - if (ret) { - bch_err(c, "error in seq_overwrite: %i", ret); - break; - } - } - bch2_trans_iter_exit(&trans, &iter); + bkey_reassemble(&u.k_i, k); + bch2_trans_update(&trans, &iter, &u.k_i, 0); + })); + if (ret) + bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; @@ -816,7 +845,7 @@ static int seq_delete(struct bch_fs *c, u64 nr) SPOS(0, 0, U32_MAX), SPOS_MAX, 0, NULL); if (ret) - bch_err(c, "error in seq_delete: %i", ret); + bch_err(c, "error in seq_delete: %s", bch2_err_str(ret)); return ret; } @@ -853,7 +882,7 @@ static int btree_perf_test_thread(void *data) ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads)); if (ret) { - bch_err(j->c, "%ps: error %i", j->fn, ret); + bch_err(j->c, "%ps: error %s", j->fn, bch2_err_str(ret)); j->ret = ret; } diff --git a/libbcachefs/trace.c b/libbcachefs/trace.c index 59e8dfa..7057398 100644 --- a/libbcachefs/trace.c +++ b/libbcachefs/trace.c @@ -2,11 +2,13 @@ #include "bcachefs.h" #include "alloc_types.h" #include "buckets.h" -#include "btree_types.h" +#include "btree_iter.h" +#include "btree_locking.h" #include "keylist.h" +#include "opts.h" #include -#include "keylist.h" +#include #define CREATE_TRACE_POINTS #include diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 8ef4b59..ee2c7d9 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -376,31 +376,37 @@ void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats) u64 q, last_q = 0; int i; - prt_printf(out, "count:\t\t%llu\n", + prt_printf(out, "count:\t\t%llu", stats->count); - prt_printf(out, "rate:\t\t%llu/sec\n", + prt_newline(out); + prt_printf(out, "rate:\t\t%llu/sec", freq ? div64_u64(NSEC_PER_SEC, freq) : 0); + prt_newline(out); prt_printf(out, "frequency:\t"); pr_time_units(out, freq); - prt_printf(out, "\navg duration:\t"); + prt_newline(out); + prt_printf(out, "avg duration:\t"); pr_time_units(out, stats->average_duration); - prt_printf(out, "\nmax duration:\t"); + prt_newline(out); + prt_printf(out, "max duration:\t"); pr_time_units(out, stats->max_duration); i = eytzinger0_first(NR_QUANTILES); u = pick_time_units(stats->quantiles.entries[i].m); - prt_printf(out, "\nquantiles (%s):\t", u->name); + prt_newline(out); + prt_printf(out, "quantiles (%s):\t", u->name); eytzinger0_for_each(i, NR_QUANTILES) { bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; q = max(stats->quantiles.entries[i].m, last_q); - prt_printf(out, "%llu%s", - div_u64(q, u->nsecs), - is_last ? "\n" : " "); + prt_printf(out, "%llu ", + div_u64(q, u->nsecs)); + if (is_last) + prt_newline(out); last_q = q; } } diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 1236127..186ffab 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -344,7 +344,7 @@ retry: offset = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); diff --git a/linux/bio.c b/linux/bio.c index 5e19c8a..93a791c 100644 --- a/linux/bio.c +++ b/linux/bio.c @@ -293,7 +293,7 @@ void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf) atomic_set(&bio->__bi_remaining, 1); } -struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) +struct bio *bio_kmalloc(unsigned int nr_iovecs, gfp_t gfp_mask) { struct bio *bio; diff --git a/linux/blkdev.c b/linux/blkdev.c index bd9dc9c..9b3ea93 100644 --- a/linux/blkdev.c +++ b/linux/blkdev.c @@ -113,7 +113,7 @@ int submit_bio_wait(struct bio *bio) int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, - gfp_t gfp_mask, unsigned long flags) + gfp_t gfp_mask) { return 0; } diff --git a/linux/six.c b/linux/six.c index fca1208..5b2d92c 100644 --- a/linux/six.c +++ b/linux/six.c @@ -757,3 +757,23 @@ void six_lock_pcpu_alloc(struct six_lock *lock) #endif } EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc); + +/* + * Returns lock held counts, for both read and intent + */ +struct six_lock_count six_lock_counts(struct six_lock *lock) +{ + struct six_lock_count ret = { 0, lock->state.intent_lock }; + + if (!lock->readers) + ret.read += lock->state.read_lock; + else { + int cpu; + + for_each_possible_cpu(cpu) + ret.read += *per_cpu_ptr(lock->readers, cpu); + } + + return ret; +} +EXPORT_SYMBOL_GPL(six_lock_counts); -- 2.39.2