From 8642d4ae10f167a2eb850403f6d2b60757242b31 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 Jun 2023 18:10:23 -0400 Subject: [PATCH] Update bcachefs sources to 7c0fe6f104 bcachefs: Fix bch2_fsck_ask_yn() Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- Makefile | 4 + include/linux/atomic.h | 7 ++ include/linux/math.h | 5 + include/linux/mean_and_variance.h | 17 ++- include/linux/sched.h | 2 +- libbcachefs/acl.c | 29 +++-- libbcachefs/bcachefs.h | 1 - libbcachefs/btree_cache.c | 34 +++--- libbcachefs/btree_io.c | 8 +- libbcachefs/btree_iter.c | 49 ++++---- libbcachefs/btree_iter.h | 59 ++++++++- libbcachefs/btree_key_cache.c | 13 +- libbcachefs/btree_locking.c | 8 +- libbcachefs/btree_update.h | 23 ++++ libbcachefs/btree_update_interior.c | 22 ++-- libbcachefs/btree_update_leaf.c | 179 ++++++++++++++++++---------- libbcachefs/buckets.c | 83 +++++++++---- libbcachefs/chardev.c | 2 +- libbcachefs/compress.c | 12 +- libbcachefs/data_update.c | 96 ++------------- libbcachefs/debug.c | 4 +- libbcachefs/ec.c | 13 +- libbcachefs/errcode.h | 13 +- libbcachefs/error.c | 3 +- libbcachefs/fs-common.c | 2 +- libbcachefs/fs-io.c | 98 ++++++++------- libbcachefs/fs-ioctl.c | 2 +- libbcachefs/fs.c | 6 +- libbcachefs/fsck.c | 24 ++-- libbcachefs/inode.c | 2 +- libbcachefs/io.c | 26 ++-- libbcachefs/journal_io.c | 2 +- libbcachefs/journal_reclaim.c | 2 +- libbcachefs/keylist.c | 2 +- libbcachefs/move.c | 2 +- libbcachefs/movinggc.c | 2 +- libbcachefs/quota.c | 13 +- libbcachefs/recovery.c | 2 +- libbcachefs/reflink.c | 20 +--- libbcachefs/str_hash.h | 4 +- libbcachefs/subvolume.c | 21 ++-- libbcachefs/super.c | 2 +- libbcachefs/trace.h | 19 --- libbcachefs/util.c | 4 +- libbcachefs/xattr.c | 6 +- linux/six.c | 17 +-- 47 files changed, 531 insertions(+), 435 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 1d85f95..1df24ce 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -31c09369cd01b34fb8ba845fa09776576b03a1e2 +7c0fe6f104a68065c15b069176247bf5d237b2b3 diff --git a/Makefile b/Makefile index 3fc52cf..033cdfb 100644 --- a/Makefile +++ b/Makefile @@ -208,6 +208,10 @@ update-bcachefs-sources: git add include/linux/kmemleak.h cp $(LINUX_DIR)/lib/math/int_sqrt.c linux/ git add linux/int_sqrt.c + cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/ + git add linux/mean_and_variance.c + cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/ + git add include/linux/mean_and_variance.h cp $(LINUX_DIR)/scripts/Makefile.compiler ./ git add Makefile.compiler $(RM) libbcachefs/*.mod.c diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 79cf5aa..f4d047c 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -271,6 +271,13 @@ static inline i_type a_type##_cmpxchg(a_type##_t *v, i_type old, i_type new)\ static inline i_type a_type##_cmpxchg_acquire(a_type##_t *v, i_type old, i_type new)\ { \ return cmpxchg_acquire(&v->counter, old, new); \ +} \ + \ +static inline bool a_type##_try_cmpxchg_acquire(a_type##_t *v, i_type *old, i_type new)\ +{ \ + i_type prev = *old; \ + *old = cmpxchg_acquire(&v->counter, *old, new); \ + return prev == *old; \ } DEF_ATOMIC_OPS(atomic, int) diff --git a/include/linux/math.h b/include/linux/math.h index 3cf6726..db7cdd2 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -2,6 +2,11 @@ #ifndef _LINUX_MATH_H #define _LINUX_MATH_H +#include + +/* abs() */ +#include + /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be diff --git a/include/linux/mean_and_variance.h b/include/linux/mean_and_variance.h index 9ed79f4..6475050 100644 --- a/include/linux/mean_and_variance.h +++ b/include/linux/mean_and_variance.h @@ -3,10 +3,9 @@ #define MEAN_AND_VARIANCE_H_ #include -#include #include +#include #include -#include #define SQRT_U64_MAX 4294967295ULL @@ -178,14 +177,12 @@ static inline s64 fast_divpow2(s64 n, u8 d) * * see linked pdf equation 12. */ -static inline struct mean_and_variance -mean_and_variance_update(struct mean_and_variance s, s64 v) -{ - return (struct mean_and_variance) { - .n = s.n + 1, - .sum = s.sum + v, - .sum_squares = u128_add(s.sum_squares, u128_square(abs(v))), - }; +static inline void +mean_and_variance_update(struct mean_and_variance *s, s64 v) +{ + s->n++; + s->sum += v; + s->sum_squares = u128_add(s->sum_squares, u128_square(abs(v))); } s64 mean_and_variance_get_mean(struct mean_and_variance s); diff --git a/include/linux/sched.h b/include/linux/sched.h index fef7e32..c5c8e3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -117,7 +117,7 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -#define cond_resched() +static inline void cond_resched(void) {} #define need_resched() 0 void schedule(void); diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 2bf58aa..ce7a460 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -35,12 +35,14 @@ static inline int acl_to_xattr_type(int type) /* * Convert from filesystem to in-memory representation. */ -static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size) +static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, + const void *value, size_t size) { const void *p, *end = value + size; struct posix_acl *acl; struct posix_acl_entry *out; unsigned count = 0; + int ret; if (!value) return NULL; @@ -81,9 +83,14 @@ static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size) if (!count) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = allocate_dropping_locks(trans, ret, + posix_acl_alloc(count, _gfp)); if (!acl) return ERR_PTR(-ENOMEM); + if (ret) { + kfree(acl); + return ERR_PTR(ret); + } out = acl->a_entries; @@ -234,9 +241,7 @@ retry: &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); if (ret) { - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - if (ret != -ENOENT) + if (!bch2_err_matches(ret, ENOENT)) acl = ERR_PTR(ret); goto out; } @@ -249,12 +254,15 @@ retry: } xattr = bkey_s_c_to_xattr(k); - acl = bch2_acl_from_disk(xattr_val(xattr.v), + acl = bch2_acl_from_disk(&trans, xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); if (!IS_ERR(acl)) set_cached_acl(&inode->v, type, acl); out: + if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart)) + goto retry; + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return acl; @@ -287,7 +295,7 @@ int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum, inum, &search); } - return ret == -ENOENT ? 0 : ret; + return bch2_err_matches(ret, ENOENT) ? 0 : ret; } int bch2_set_acl(struct mnt_idmap *idmap, @@ -368,20 +376,21 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (ret) - return ret == -ENOENT ? 0 : ret; + return bch2_err_matches(ret, ENOENT) ? 0 : ret; k = bch2_btree_iter_peek_slot(&iter); xattr = bkey_s_c_to_xattr(k); if (ret) goto err; - acl = bch2_acl_from_disk(xattr_val(xattr.v), + acl = bch2_acl_from_disk(trans, xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); ret = PTR_ERR_OR_ZERO(acl); if (IS_ERR_OR_NULL(acl)) goto err; - ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode); + ret = allocate_dropping_locks_errcode(trans, + __posix_acl_chmod(&acl, _gfp, mode)); if (ret) goto err; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index e8ec7b8..f354c9d 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -963,7 +963,6 @@ struct bch_fs { struct bio_set ec_bioset; /* REFLINK */ - u64 reflink_hint; reflink_gc_table reflink_gc_table; size_t reflink_gc_nr; diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index f840270..661b766 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -128,9 +128,6 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) return NULL; bkey_btree_ptr_init(&b->key); -#ifdef CONFIG_DEBUG_LOCK_ALLOC - lockdep_set_no_check_recursion(&b->c.lock.dep_map); -#endif INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); b->byte_order = ilog2(btree_bytes(c)); @@ -639,9 +636,10 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea goto got_node; } - b = __btree_node_mem_alloc(c, __GFP_NOWARN); + b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); if (!b) { mutex_unlock(&bc->lock); + bch2_trans_unlock(trans); b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) goto err; @@ -670,8 +668,11 @@ got_node: mutex_unlock(&bc->lock); - if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL)) - goto err; + if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { + bch2_trans_unlock(trans); + if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) + goto err; + } mutex_lock(&bc->lock); bc->used++; @@ -864,6 +865,7 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; + bool need_relock = false; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); @@ -877,6 +879,7 @@ retry: */ b = bch2_btree_node_fill(trans, path, k, path->btree_id, level, lock_type, true); + need_relock = true; /* We raced and found the btree node in the cache */ if (!b) @@ -915,6 +918,7 @@ retry: six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(trans); + need_relock = true; bch2_btree_node_wait_on_read(b); @@ -922,19 +926,19 @@ retry: * should_be_locked is not set on this path yet, so we need to * relock it specifically: */ - if (trans) { - int ret = bch2_trans_relock(trans) ?: - bch2_btree_path_relock_intent(trans, path); - if (ret) { - BUG_ON(!trans->restarted); - return ERR_PTR(ret); - } - } - if (!six_relock_type(&b->c.lock, lock_type, seq)) goto retry; } + if (unlikely(need_relock)) { + int ret = bch2_trans_relock(trans) ?: + bch2_btree_path_relock_intent(trans, path); + if (ret) { + six_unlock_type(&b->c.lock, lock_type); + return ERR_PTR(ret); + } + } + prefetch(b->aux_data); for_each_bset(b, t) { diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 0a7a18e..27a2a7b 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -117,7 +117,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); if (!p) { *used_mempool = true; - p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); + p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); } memalloc_nofs_restore(flags); return p; @@ -937,7 +937,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, /* We might get called multiple times on read retry: */ b->written = 0; - iter = mempool_alloc(&c->fill_iter, GFP_NOIO); + iter = mempool_alloc(&c->fill_iter, GFP_NOFS); sort_iter_init(iter, b); iter->size = (btree_blocks(c) + 1) * 2; @@ -1580,7 +1580,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, bio = bio_alloc_bioset(NULL, buf_pages(b->data, btree_bytes(c)), REQ_OP_READ|REQ_SYNC|REQ_META, - GFP_NOIO, + GFP_NOFS, &c->btree_bio); rb = container_of(bio, struct btree_read_bio, bio); rb->c = c; @@ -2077,7 +2077,7 @@ do_write: wbio = container_of(bio_alloc_bioset(NULL, buf_pages(data, sectors_to_write << 9), REQ_OP_WRITE|REQ_META, - GFP_NOIO, + GFP_NOFS, &c->btree_bio), struct btree_write_bio, wbio.bio); wbio_init(&wbio->wbio.bio); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 4b9c04d..485e93c 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -41,13 +41,10 @@ static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_pa */ static inline int bch2_trans_cond_resched(struct btree_trans *trans) { - if (need_resched() || race_fault()) { - bch2_trans_unlock(trans); - schedule(); - return bch2_trans_relock(trans); - } else { + if (need_resched() || race_fault()) + return drop_locks_do(trans, (schedule(), 0)); + else return 0; - } } static inline int __btree_path_cmp(const struct btree_path *l, @@ -2793,6 +2790,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) unsigned new_top = trans->mem_top + size; size_t old_bytes = trans->mem_bytes; size_t new_bytes = roundup_pow_of_two(new_top); + int ret; void *new_mem; void *p; @@ -2800,15 +2798,27 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); - new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS); - if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { - new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); - new_bytes = BTREE_TRANS_MEM_MAX; - kfree(trans->mem); - } + new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); + if (unlikely(!new_mem)) { + bch2_trans_unlock(trans); + + new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { + new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); + new_bytes = BTREE_TRANS_MEM_MAX; + kfree(trans->mem); + } + + if (!new_mem) + return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + + trans->mem = new_mem; + trans->mem_bytes = new_bytes; - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + } trans->mem = new_mem; trans->mem_bytes = new_bytes; @@ -2879,11 +2889,8 @@ u32 bch2_trans_begin(struct btree_trans *trans) if (!trans->restarted && (need_resched() || - local_clock() - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) { - bch2_trans_unlock(trans); - cond_resched(); - bch2_trans_relock(trans); - } + local_clock() - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) + drop_locks_do(trans, (cond_resched(), 0)); if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) bch2_trans_reset_srcu_lock(trans); @@ -3110,7 +3117,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) struct btree_path *path; struct btree_bkey_cached_common *b; static char lock_types[] = { 'r', 'i', 'w' }; - unsigned l; + unsigned l, idx; if (!out->nr_tabstops) { printbuf_tabstop_push(out, 16); @@ -3119,7 +3126,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn); - trans_for_each_path(trans, path) { + trans_for_each_path_safe(trans, path, idx) { if (!path->nodes_locked) continue; diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 198e381..f81a115 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -89,6 +89,32 @@ __trans_next_path(struct btree_trans *trans, unsigned idx) #define trans_for_each_path(_trans, _path) \ trans_for_each_path_from(_trans, _path, 0) +static inline struct btree_path * +__trans_next_path_safe(struct btree_trans *trans, unsigned *idx) +{ + u64 l; + + if (*idx == BTREE_ITER_MAX) + return NULL; + + l = trans->paths_allocated >> *idx; + if (!l) + return NULL; + + *idx += __ffs64(l); + EBUG_ON(*idx >= BTREE_ITER_MAX); + return &trans->paths[*idx]; +} + +/* + * This version is intended to be safe for use on a btree_trans that is owned by + * another thread, for bch2_btree_trans_to_text(); + */ +#define trans_for_each_path_safe(_trans, _path, _idx) \ + for (_idx = 0; \ + (_path = __trans_next_path_safe((_trans), &_idx)); \ + _idx++) + static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path) { unsigned idx = path ? path->sorted_idx + 1 : 0; @@ -487,7 +513,7 @@ static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(iter); if (!bkey_err(k) && type && k.k->type != type) - k = bkey_s_c_err(-ENOENT); + k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch); if (unlikely(bkey_err(k))) bch2_trans_iter_exit(trans, iter); return k; @@ -825,6 +851,37 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) +#define drop_locks_do(_trans, _do) \ +({ \ + bch2_trans_unlock(_trans); \ + _do ?: bch2_trans_relock(_trans); \ +}) + +#define allocate_dropping_locks_errcode(_trans, _do) \ +({ \ + gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ + int _ret = _do; \ + \ + if (bch2_err_matches(_ret, ENOMEM)) { \ + _gfp = GFP_KERNEL; \ + _ret = drop_locks_do(trans, _do); \ + } \ + _ret; \ +}) + +#define allocate_dropping_locks(_trans, _ret, _do) \ +({ \ + gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ + typeof(_do) _p = _do; \ + \ + _ret = 0; \ + if (unlikely(!_p)) { \ + _gfp = GFP_KERNEL; \ + _ret = drop_locks_do(trans, ((_p = _do), 0)); \ + } \ + _p; \ +}) + /* new multiple iterator interface: */ void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 645fa99..aafb54d 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -265,15 +265,8 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, return ck; } - ck = kmem_cache_zalloc(bch2_key_cache, GFP_NOWAIT|__GFP_NOWARN); - if (likely(ck)) - goto init; - - bch2_trans_unlock(trans); - - ck = kmem_cache_zalloc(bch2_key_cache, GFP_KERNEL); - - ret = bch2_trans_relock(trans); + ck = allocate_dropping_locks(trans, ret, + kmem_cache_zalloc(bch2_key_cache, _gfp)); if (ret) { kmem_cache_free(bch2_key_cache, ck); return ERR_PTR(ret); @@ -281,7 +274,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, if (!ck) return NULL; -init: + INIT_LIST_HEAD(&ck->list); bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0); diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 70639a1..a17256f 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -10,6 +10,9 @@ void bch2_btree_lock_init(struct btree_bkey_cached_common *b, enum six_lock_init_flags flags) { __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_set_no_check_recursion(&b->lock.dep_map); +#endif } #ifdef CONFIG_LOCKDEP @@ -738,11 +741,8 @@ bool bch2_trans_locked(struct btree_trans *trans) int __bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex *lock) { - int ret; + int ret = drop_locks_do(trans, (mutex_lock(lock), 0)); - bch2_trans_unlock(trans); - mutex_lock(lock); - ret = bch2_trans_relock(trans); if (ret) mutex_unlock(lock); return ret; diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 1ac3a81..e90cf29 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -4,6 +4,7 @@ #include "btree_iter.h" #include "journal.h" +#include "journal.h" struct bch_fs; struct btree; @@ -83,6 +84,28 @@ int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, struct bkey_i *, bool); +int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id, + struct bpos, struct bpos); + +/* + * For use when splitting extents in existing snapshots: + * + * If @old_pos is an interior snapshot node, iterate over descendent snapshot + * nodes: for every descendent snapshot in whiche @old_pos is overwritten and + * not visible, emit a whiteout at @new_pos. + */ +static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, + enum btree_id btree, + struct bpos old_pos, + struct bpos new_pos) +{ + if (!btree_type_has_snapshots(btree) || + bkey_eq(old_pos, new_pos)) + return 0; + + return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos); +} + int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *, struct bkey_i *, enum btree_update_flags); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 1319337..66da1da 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1083,16 +1083,14 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (flags & BTREE_INSERT_GC_LOCK_HELD) lockdep_assert_held(&c->gc_lock); else if (!down_read_trylock(&c->gc_lock)) { - bch2_trans_unlock(trans); - down_read(&c->gc_lock); - ret = bch2_trans_relock(trans); + ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0)); if (ret) { up_read(&c->gc_lock); return ERR_PTR(ret); } } - as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO); + as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS); memset(as, 0, sizeof(*as)); closure_init(&as->cl, NULL); as->c = c; @@ -1128,23 +1126,19 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, BTREE_UPDATE_JOURNAL_RES, journal_flags|JOURNAL_RES_GET_NONBLOCK); if (ret) { - bch2_trans_unlock(trans); - if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { ret = -BCH_ERR_journal_reclaim_would_deadlock; goto err; } - ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, + ret = drop_locks_do(trans, + bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, - journal_flags); - if (ret) { + journal_flags)); + if (ret == -BCH_ERR_journal_preres_get_blocked) { trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); - goto err; } - - ret = bch2_trans_relock(trans); if (ret) goto err; } @@ -2256,9 +2250,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite if (btree_ptr_hash_val(new_key) != b->hash_val) { ret = bch2_btree_cache_cannibalize_lock(c, &cl); if (ret) { - bch2_trans_unlock(trans); - closure_sync(&cl); - ret = bch2_trans_relock(trans); + ret = drop_locks_do(trans, (closure_sync(&cl), 0)); if (ret) return ret; } diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 3369346..779338e 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -316,25 +316,11 @@ static noinline int bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags, unsigned long trace_ip) { - struct bch_fs *c = trans->c; - int ret; - - bch2_trans_unlock(trans); - - ret = bch2_journal_preres_get(&c->journal, + return drop_locks_do(trans, + bch2_journal_preres_get(&trans->c->journal, &trans->journal_preres, trans->journal_preres_u64s, - (flags & JOURNAL_WATERMARK_MASK)); - if (ret) - return ret; - - ret = bch2_trans_relock(trans); - if (ret) { - trace_and_count(c, trans_restart_journal_preres_get, trans, trace_ip, 0); - return ret; - } - - return 0; + (flags & JOURNAL_WATERMARK_MASK))); } static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, @@ -961,48 +947,27 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path); break; case -BCH_ERR_btree_insert_need_mark_replicas: - bch2_trans_unlock(trans); - - ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas); - if (ret) - break; - - ret = bch2_trans_relock(trans); - if (ret) - trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip); + ret = drop_locks_do(trans, + bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas)); break; case -BCH_ERR_journal_res_get_blocked: - bch2_trans_unlock(trans); - if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) && !(flags & JOURNAL_WATERMARK_reserved)) { ret = -BCH_ERR_journal_reclaim_would_deadlock; break; } - ret = bch2_trans_journal_res_get(trans, + ret = drop_locks_do(trans, + bch2_trans_journal_res_get(trans, (flags & JOURNAL_WATERMARK_MASK)| - JOURNAL_RES_GET_CHECK); - if (ret) - break; - - ret = bch2_trans_relock(trans); - if (ret) - trace_and_count(c, trans_restart_journal_res_get, trans, trace_ip); + JOURNAL_RES_GET_CHECK)); break; case -BCH_ERR_btree_insert_need_journal_reclaim: - bch2_trans_unlock(trans); - trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); - wait_event_freezable(c->journal.reclaim_wait, - (ret = journal_reclaim_wait_done(c))); - if (ret < 0) - break; - - ret = bch2_trans_relock(trans); - if (ret) - trace_and_count(c, trans_restart_journal_reclaim, trans, trace_ip); + ret = drop_locks_do(trans, + (wait_event_freezable(c->journal.reclaim_wait, + (ret = journal_reclaim_wait_done(c))), ret)); break; case -BCH_ERR_btree_insert_need_flush_buffer: { struct btree_write_buffer *wb = &c->btree_write_buffer; @@ -1010,20 +975,20 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, ret = 0; if (wb->state.nr > wb->size * 3 / 4) { - bch2_trans_reset_updates(trans); bch2_trans_unlock(trans); - mutex_lock(&wb->flush_lock); - if (wb->state.nr > wb->size * 3 / 4) + if (wb->state.nr > wb->size * 3 / 4) { + bch2_trans_begin(trans); ret = __bch2_btree_write_buffer_flush(trans, flags|BTREE_INSERT_NOCHECK_RW, true); - else + if (!ret) { + trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); + } + } else { mutex_unlock(&wb->flush_lock); - - if (!ret) { - trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); + ret = bch2_trans_relock(trans); } } break; @@ -1053,10 +1018,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) test_bit(BCH_FS_STARTED, &c->flags)) return -BCH_ERR_erofs_trans_commit; - bch2_trans_unlock(trans); - - ret = bch2_fs_read_write_early(c) ?: - bch2_trans_relock(trans); + ret = drop_locks_do(trans, bch2_fs_read_write_early(c)); if (ret) return ret; @@ -1343,6 +1305,97 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, return ret; } + +static int pos_overwritten_in_snapshot(struct btree_trans *trans, enum btree_id btree, + struct bpos pos, u32 snapshot) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + for_each_btree_key_norestart(trans, iter, + btree, SPOS(pos.inode, pos.offset, snapshot), + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_NOPRESERVE, k, ret) { + if (bpos_ge(k.k->p, pos)) + break; + + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) { + ret = 1; + break; + } + } + bch2_trans_iter_exit(trans, &iter); + + return ret; +} + +int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, + enum btree_id btree, + struct bpos old_pos, + struct bpos new_pos) +{ + struct bch_fs *c = trans->c; + struct btree_iter old_iter, new_iter; + struct bkey_s_c k; + snapshot_id_list s; + struct bkey_i *update; + int ret; + + if (!bch2_snapshot_has_children(c, old_pos.snapshot)) + return 0; + + darray_init(&s); + + bch2_trans_iter_init(trans, &old_iter, btree, old_pos, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS); + while ((k = bch2_btree_iter_prev(&old_iter)).k && + !(ret = bkey_err(k)) && + bkey_eq(old_pos, k.k->p)) { + + if (!bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot) || + snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) + continue; + + ret = pos_overwritten_in_snapshot(trans, btree, + new_pos, k.k->p.snapshot); + if (ret < 0) + break; + + if (!ret) { + struct bpos whiteout_pos = + SPOS(new_pos.inode, new_pos.offset, k.k->p.snapshot);; + + bch2_trans_iter_init(trans, &new_iter, btree, whiteout_pos, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_INTENT); + update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); + ret = PTR_ERR_OR_ZERO(update); + if (ret) + break; + + bkey_init(&update->k); + update->k.p = whiteout_pos; + update->k.type = KEY_TYPE_whiteout; + + ret = bch2_btree_iter_traverse(&new_iter) ?: + bch2_trans_update(trans, &new_iter, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + bch2_trans_iter_exit(trans, &new_iter); + } + + ret = snapshot_list_add(c, &s, k.k->p.snapshot); + if (ret) + break; + } + bch2_trans_iter_exit(trans, &old_iter); + darray_exit(&s); + + return ret; +} + int bch2_trans_update_extent(struct btree_trans *trans, struct btree_iter *orig_iter, struct bkey_i *insert, @@ -1396,8 +1449,10 @@ int bch2_trans_update_extent(struct btree_trans *trans, bch2_cut_back(start, update); - ret = bch2_btree_insert_nonextent(trans, btree_id, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); + ret = bch2_insert_snapshot_whiteouts(trans, btree_id, + k.k->p, update->k.p) ?: + bch2_btree_insert_nonextent(trans, btree_id, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); if (ret) goto err; } @@ -1411,7 +1466,9 @@ int bch2_trans_update_extent(struct btree_trans *trans, bch2_cut_front(start, update); bch2_cut_back(insert->k.p, update); - ret = bch2_btree_insert_nonextent(trans, btree_id, update, + ret = bch2_insert_snapshot_whiteouts(trans, btree_id, + k.k->p, update->k.p) ?: + bch2_btree_insert_nonextent(trans, btree_id, update, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); if (ret) goto err; diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index bd14418..fbe0cd0 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -140,7 +140,7 @@ struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c) unsigned nr_replicas = READ_ONCE(c->replicas.nr); unsigned seq, i; retry: - ret = kmalloc(__fs_usage_online_u64s(nr_replicas) * sizeof(u64), GFP_NOFS); + ret = kmalloc(__fs_usage_online_u64s(nr_replicas) * sizeof(u64), GFP_KERNEL); if (unlikely(!ret)) return NULL; @@ -423,8 +423,8 @@ static inline int update_cached_sectors(struct bch_fs *c, return update_replicas(c, k, &r.e, sectors, journal_seq, gc); } -static struct replicas_delta_list * -replicas_deltas_realloc(struct btree_trans *trans, unsigned more) +static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more, + gfp_t gfp) { struct replicas_delta_list *d = trans->fs_usage_deltas; unsigned new_size = d ? (d->size + more) * 2 : 128; @@ -433,12 +433,16 @@ replicas_deltas_realloc(struct btree_trans *trans, unsigned more) WARN_ON_ONCE(alloc_size > REPLICAS_DELTA_LIST_MAX); if (!d || d->used + more > d->size) { - d = krealloc(d, alloc_size, GFP_NOIO|__GFP_ZERO); + d = krealloc(d, alloc_size, gfp|__GFP_ZERO); - BUG_ON(!d && alloc_size > REPLICAS_DELTA_LIST_MAX); + if (unlikely(!d)) { + if (alloc_size > REPLICAS_DELTA_LIST_MAX) + return -ENOMEM; + + d = mempool_alloc(&trans->c->replicas_delta_pool, gfp); + if (!d) + return -ENOMEM; - if (!d) { - d = mempool_alloc(&trans->c->replicas_delta_pool, GFP_NOIO); memset(d, 0, REPLICAS_DELTA_LIST_MAX); if (trans->fs_usage_deltas) @@ -452,39 +456,51 @@ replicas_deltas_realloc(struct btree_trans *trans, unsigned more) d->size = new_size; trans->fs_usage_deltas = d; } - return d; + + return 0; +} + +static int replicas_deltas_realloc(struct btree_trans *trans, unsigned more) +{ + return allocate_dropping_locks_errcode(trans, + __replicas_deltas_realloc(trans, more, _gfp)); } -static inline void update_replicas_list(struct btree_trans *trans, +static inline int update_replicas_list(struct btree_trans *trans, struct bch_replicas_entry *r, s64 sectors) { struct replicas_delta_list *d; struct replicas_delta *n; unsigned b; + int ret; if (!sectors) - return; + return 0; b = replicas_entry_bytes(r) + 8; - d = replicas_deltas_realloc(trans, b); + ret = replicas_deltas_realloc(trans, b); + if (ret) + return ret; + d = trans->fs_usage_deltas; n = (void *) d->d + d->used; n->delta = sectors; memcpy((void *) n + offsetof(struct replicas_delta, r), r, replicas_entry_bytes(r)); bch2_replicas_entry_sort(&n->r); d->used += b; + return 0; } -static inline void update_cached_sectors_list(struct btree_trans *trans, +static inline int update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 sectors) { struct bch_replicas_padded r; bch2_replicas_entry_cached(&r.e, dev); - update_replicas_list(trans, &r.e, sectors); + return update_replicas_list(trans, &r.e, sectors); } int bch2_mark_alloc(struct btree_trans *trans, @@ -1455,7 +1471,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, BTREE_ITER_WITH_UPDATES, stripe); ret = PTR_ERR_OR_ZERO(s); if (unlikely(ret)) { - bch2_trans_inconsistent_on(ret == -ENOENT, trans, + bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, "pointer to nonexistent stripe %llu", (u64) p.ec.idx); goto err; @@ -1475,7 +1491,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); r.e.data_type = data_type; - update_replicas_list(trans, &r.e, sectors); + ret = update_replicas_list(trans, &r.e, sectors); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -1502,7 +1518,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans, : k.k->size; s64 dirty_sectors = 0; bool stale; - int ret; + int ret = 0; r.e.data_type = data_type; r.e.nr_devs = 0; @@ -1521,9 +1537,12 @@ int bch2_trans_mark_extent(struct btree_trans *trans, stale = ret > 0; if (p.ptr.cached) { - if (!stale) - update_cached_sectors_list(trans, p.ptr.dev, - disk_sectors); + if (!stale) { + ret = update_cached_sectors_list(trans, p.ptr.dev, + disk_sectors); + if (ret) + return ret; + } } else if (!p.has_ec) { dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; @@ -1538,9 +1557,9 @@ int bch2_trans_mark_extent(struct btree_trans *trans, } if (r.e.nr_devs) - update_replicas_list(trans, &r.e, dirty_sectors); + ret = update_replicas_list(trans, &r.e, dirty_sectors); - return 0; + return ret; } static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, @@ -1657,14 +1676,18 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, s64 sectors = le16_to_cpu(new_s->sectors); bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new)); - update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); + ret = update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); + if (ret) + return ret; } if (old_s) { s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); bch2_bkey_to_replicas(&r.e, old); - update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + ret = update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + if (ret) + return ret; } for (i = 0; i < nr_blocks; i++) { @@ -1701,8 +1724,12 @@ int bch2_trans_mark_inode(struct btree_trans *trans, int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); if (nr) { - struct replicas_delta_list *d = - replicas_deltas_realloc(trans, 0); + int ret = replicas_deltas_realloc(trans, 0); + struct replicas_delta_list *d = trans->fs_usage_deltas; + + if (ret) + return ret; + d->nr_inodes += nr; } @@ -1721,13 +1748,17 @@ int bch2_trans_mark_reservation(struct btree_trans *trans, unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; struct replicas_delta_list *d; + int ret; if (flags & BTREE_TRIGGER_OVERWRITE) sectors = -sectors; sectors *= replicas; - d = replicas_deltas_realloc(trans, 0); + ret = replicas_deltas_realloc(trans, 0); + if (ret) + return ret; + d = trans->fs_usage_deltas; replicas = clamp_t(unsigned, replicas, 1, ARRAY_SIZE(d->persistent_reserved)); diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index eecc355..670f316 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -578,7 +578,7 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, return i; } - return -ENOENT; + return -BCH_ERR_ENOENT_dev_idx_not_found; } static long bch2_ioctl_disk_resize(struct bch_fs *c, diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 6bec384..38a3475 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -28,11 +28,11 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw) BUG_ON(size > c->opts.encoded_extent_max); - b = kmalloc(size, GFP_NOIO|__GFP_NOWARN); + b = kmalloc(size, GFP_NOFS|__GFP_NOWARN); if (b) return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw }; - b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO); + b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS); if (b) return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw }; @@ -94,7 +94,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio, BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages); pages = nr_pages > ARRAY_SIZE(stack_pages) - ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO) + ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS) : stack_pages; if (!pages) goto bounce; @@ -177,7 +177,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, .avail_out = dst_len, }; - workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO); + workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); zlib_set_workspace(&strm, workspace); zlib_inflateInit2(&strm, -MAX_WBITS); @@ -196,7 +196,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, if (real_src_len > src_len - 4) goto err; - workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO); + workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound()); ret = zstd_decompress_dctx(ctx, @@ -382,7 +382,7 @@ static unsigned __bio_compress(struct bch_fs *c, dst_data = bio_map_or_bounce(c, dst, WRITE); src_data = bio_map_or_bounce(c, src, READ); - workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO); + workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS); *src_len = src->bi_iter.bi_size; *dst_len = dst->bi_iter.bi_size; diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index c709538..c89ee14 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -16,81 +16,6 @@ #include "subvolume.h" #include "trace.h" -static int insert_snapshot_whiteouts(struct btree_trans *trans, - enum btree_id id, - struct bpos old_pos, - struct bpos new_pos) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter, iter2; - struct bkey_s_c k, k2; - snapshot_id_list s; - struct bkey_i *update; - int ret; - - if (!btree_type_has_snapshots(id)) - return 0; - - darray_init(&s); - - if (!bch2_snapshot_has_children(c, old_pos.snapshot)) - return 0; - - bch2_trans_iter_init(trans, &iter, id, old_pos, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS); - while (1) { - k = bch2_btree_iter_prev(&iter); - ret = bkey_err(k); - if (ret) - break; - - if (!k.k) - break; - - if (!bkey_eq(old_pos, k.k->p)) - break; - - if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot) && - !snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) { - struct bpos whiteout_pos = new_pos; - - whiteout_pos.snapshot = k.k->p.snapshot; - - k2 = bch2_bkey_get_iter(trans, &iter2, id, whiteout_pos, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_INTENT); - ret = bkey_err(k2); - - if (!ret && k2.k->type == KEY_TYPE_deleted) { - update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); - ret = PTR_ERR_OR_ZERO(update); - if (ret) - break; - - bkey_init(&update->k); - update->k.p = whiteout_pos; - update->k.type = KEY_TYPE_whiteout; - - ret = bch2_trans_update(trans, &iter2, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); - } - bch2_trans_iter_exit(trans, &iter2); - - if (ret) - break; - - ret = snapshot_list_add(c, &s, k.k->p.snapshot); - if (ret) - break; - } - } - bch2_trans_iter_exit(trans, &iter); - darray_exit(&s); - - return ret; -} - static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) { if (trace_move_extent_finish_enabled()) { @@ -327,19 +252,12 @@ restart_drop_extra_replicas: next_pos = insert->k.p; - if (!bkey_eq(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { - ret = insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, - bkey_start_pos(&insert->k)); - if (ret) - goto err; - } - - if (!bkey_eq(insert->k.p, k.k->p)) { - ret = insert_snapshot_whiteouts(trans, m->btree_id, - k.k->p, insert->k.p); - if (ret) - goto err; - } + ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, + k.k->p, bkey_start_pos(&insert->k)) ?: + bch2_insert_snapshot_whiteouts(trans, m->btree_id, + k.k->p, insert->k.p); + if (ret) + goto err; ret = bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: @@ -374,7 +292,7 @@ nowork: &m->ctxt->stats->sectors_raced); } - this_cpu_add(c->counters[BCH_COUNTER_move_extent_fail], new->k.size); + this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]); bch2_btree_iter_advance(&iter); goto next; diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index d1563ca..8981acc 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -47,7 +47,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, bio = bio_alloc_bioset(ca->disk_sb.bdev, buf_pages(n_sorted, btree_bytes(c)), REQ_OP_READ|REQ_META, - GFP_NOIO, + GFP_NOFS, &c->btree_bio); bio->bi_iter.bi_sector = pick.ptr.offset; bch2_bio_map(bio, n_sorted, btree_bytes(c)); @@ -211,7 +211,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, bio = bio_alloc_bioset(ca->disk_sb.bdev, buf_pages(n_ondisk, btree_bytes(c)), REQ_OP_READ|REQ_META, - GFP_NOIO, + GFP_NOFS, &c->btree_bio); bio->bi_iter.bi_sector = pick.ptr.offset; bch2_bio_map(bio, n_ondisk, btree_bytes(c)); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 439fa54..dfc0a61 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -485,7 +485,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) BUG_ON(!rbio->pick.has_ec); - buf = kzalloc(sizeof(*buf), GFP_NOIO); + buf = kzalloc(sizeof(*buf), GFP_NOFS); if (!buf) return -BCH_ERR_ENOMEM_ec_read_extent; @@ -578,15 +578,8 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) static int ec_stripe_mem_alloc(struct btree_trans *trans, struct btree_iter *iter) { - size_t idx = iter->pos.offset; - - if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_NOWAIT|__GFP_NOWARN)) - return 0; - - bch2_trans_unlock(trans); - - return __ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL) ?: - bch2_trans_relock(trans); + return allocate_dropping_locks_errcode(trans, + __ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp)); } /* diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index c8ac08e..12c0c44 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -94,6 +94,17 @@ x(ENOSPC, ENOSPC_sb_crypt) \ x(ENOSPC, ENOSPC_btree_slot) \ x(ENOSPC, ENOSPC_snapshot_tree) \ + x(ENOENT, ENOENT_bkey_type_mismatch) \ + x(ENOENT, ENOENT_str_hash_lookup) \ + x(ENOENT, ENOENT_str_hash_set_must_replace) \ + x(ENOENT, ENOENT_inode) \ + x(ENOENT, ENOENT_not_subvol) \ + x(ENOENT, ENOENT_directory_dead) \ + x(ENOENT, ENOENT_subvolume) \ + x(ENOENT, ENOENT_snapshot_tree) \ + x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ + x(ENOENT, ENOENT_dev_not_found) \ + x(ENOENT, ENOENT_dev_idx_not_found) \ x(0, open_buckets_empty) \ x(0, freelist_empty) \ x(BCH_ERR_freelist_empty, no_buckets_found) \ @@ -219,7 +230,7 @@ static inline bool _bch2_err_matches(int err, int class) #define bch2_err_matches(_err, _class) \ ({ \ BUILD_BUG_ON(!__builtin_constant_p(_class)); \ - _bch2_err_matches(_err, _class); \ + unlikely(_bch2_err_matches(_err, _class)); \ }) int __bch2_err_class(int); diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 545c55d..b08cd23 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -85,12 +85,13 @@ enum ask_yn bch2_fsck_ask_yn(void) bool ret; while (true) { - fputs(" (y,n,Y,N) ", stdout); + fputs(" (y,n, or Y,N for all errors of this type) ", stdout); fflush(stdout); if (getline(&buf, &buflen, stdin) < 0) die("error reading from standard input"); + strim(buf); if (strlen(buf) != 1) continue; diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index 1f2e1fc..bb53054 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -281,7 +281,7 @@ int bch2_unlink_trans(struct btree_trans *trans, } if (deleting_snapshot && !inode_u->bi_subvol) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_not_subvol; goto err; } diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 3e104bf..f3b2e3e 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -531,7 +531,7 @@ static struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) s = kzalloc(sizeof(*s) + sizeof(struct bch_folio_sector) * - folio_sectors(folio), GFP_NOFS|gfp); + folio_sectors(folio), gfp); if (!s) return NULL; @@ -558,7 +558,7 @@ static void __bch2_folio_set(struct folio *folio, unsigned pg_offset, unsigned pg_len, unsigned nr_ptrs, unsigned state) { - struct bch_folio *s = bch2_folio_create(folio, __GFP_NOFAIL); + struct bch_folio *s = bch2_folio(folio); unsigned i, sectors = folio_sectors(folio); BUG_ON(pg_offset >= sectors); @@ -587,11 +587,25 @@ static int bch2_folio_set(struct bch_fs *c, subvol_inum inum, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bch_folio *s; u64 offset = folio_sector(folios[0]); - unsigned folio_idx = 0; + unsigned folio_idx; u32 snapshot; + bool need_set = false; int ret; + for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { + s = bch2_folio_create(folios[folio_idx], GFP_KERNEL); + if (!s) + return -ENOMEM; + + need_set |= !s->uptodate; + } + + if (!need_set) + return 0; + + folio_idx = 0; bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -616,7 +630,7 @@ retry: BUG_ON(k.k->p.offset < folio_start); BUG_ON(bkey_start_offset(k.k) > folio_end); - if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) + if (!bch2_folio(folio)->uptodate) __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); if (k.k->p.offset < folio_end) @@ -1008,15 +1022,8 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); - if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) { - if (bch2_folio_set(c, inode_inum(inode), &folio, 1)) { - folio_unlock(folio); - ret = VM_FAULT_SIGBUS; - goto out; - } - } - - if (bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { + if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: + bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { folio_unlock(folio); ret = VM_FAULT_SIGBUS; goto out; @@ -1097,7 +1104,7 @@ static int readpages_iter_init(struct readpages_iter *iter, darray_for_each(iter->folios, fi) { ractl->_nr_pages -= 1U << folio_order(*fi); - __bch2_folio_create(*fi, __GFP_NOFAIL); + __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL); folio_put(*fi); folio_put(*fi); } @@ -1129,11 +1136,15 @@ static bool extent_partial_reads_expensive(struct bkey_s_c k) return false; } -static void readpage_bio_extend(struct readpages_iter *iter, - struct bio *bio, - unsigned sectors_this_extent, - bool get_more) +static int readpage_bio_extend(struct btree_trans *trans, + struct readpages_iter *iter, + struct bio *bio, + unsigned sectors_this_extent, + bool get_more) { + /* Don't hold btree locks while allocating memory: */ + bch2_trans_unlock(trans); + while (bio_sectors(bio) < sectors_this_extent && bio->bi_vcnt < bio->bi_max_vecs) { struct folio *folio = readpage_iter_peek(iter); @@ -1155,12 +1166,12 @@ static void readpage_bio_extend(struct readpages_iter *iter, if (!folio) break; - if (!__bch2_folio_create(folio, 0)) { + if (!__bch2_folio_create(folio, GFP_KERNEL)) { folio_put(folio); break; } - ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_NOFS); + ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL); if (ret) { __bch2_folio_release(folio); folio_put(folio); @@ -1174,6 +1185,8 @@ static void readpage_bio_extend(struct readpages_iter *iter, BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); } + + return bch2_trans_relock(trans); } static void bchfs_read(struct btree_trans *trans, @@ -1241,9 +1254,12 @@ retry: sectors = min(sectors, k.k->size - offset_into_extent); - if (readpages_iter) - readpage_bio_extend(readpages_iter, &rbio->bio, sectors, - extent_partial_reads_expensive(k)); + if (readpages_iter) { + ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, + extent_partial_reads_expensive(k)); + if (ret) + break; + } bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; swap(rbio->bio.bi_iter.bi_size, bytes); @@ -1310,7 +1326,7 @@ void bch2_readahead(struct readahead_control *ractl) BIO_MAX_VECS); struct bch_read_bio *rbio = rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, - GFP_NOFS, &c->bio_read), + GFP_KERNEL, &c->bio_read), opts); readpage_iter_advance(&readpages_iter); @@ -1321,6 +1337,7 @@ void bch2_readahead(struct readahead_control *ractl) bchfs_read(&trans, rbio, inode_inum(inode), &readpages_iter); + bch2_trans_unlock(&trans); } bch2_pagecache_add_put(inode); @@ -1362,7 +1379,7 @@ static int bch2_read_single_folio(struct folio *folio, bch2_inode_opts_get(&opts, c, &inode->ei_inode); - rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &c->bio_read), + rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), opts); rbio->bio.bi_private = &done; rbio->bio.bi_end_io = bch2_read_single_folio_end_io; @@ -1498,7 +1515,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, REQ_OP_WRITE, - GFP_NOFS, + GFP_KERNEL, &c->writepage_bioset), struct bch_writepage_io, op.wbio.bio); @@ -1553,7 +1570,7 @@ static int __bch2_writepage(struct folio *folio, folio_size(folio)); do_io: f_sectors = folio_sectors(folio); - s = bch2_folio_create(folio, __GFP_NOFAIL); + s = bch2_folio(folio); if (f_sectors > w->tmp_sectors) { kfree(w->tmp); @@ -1735,11 +1752,9 @@ readpage: if (ret) goto err; out: - if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) { - ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); - if (ret) - goto err; - } + ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); + if (ret) + goto err; ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); if (ret) { @@ -1875,19 +1890,16 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, } } + ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr); + if (ret) + goto out; + f_pos = pos; f_offset = pos - folio_pos(darray_first(folios)); darray_for_each(folios, fi) { struct folio *f = *fi; u64 f_len = min(end, folio_end_pos(f)) - f_pos; - if (!bch2_folio_create(f, __GFP_NOFAIL)->uptodate) { - ret = bch2_folio_set(c, inode_inum(inode), fi, - folios.data + folios.nr - fi); - if (ret) - goto out; - } - /* * XXX: per POSIX and fstests generic/275, on -ENOSPC we're * supposed to write as much as we have disk space for. @@ -2839,11 +2851,9 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode, goto unlock; } - if (!s->uptodate) { - ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); - if (ret) - goto unlock; - } + ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); + if (ret) + goto unlock; for (i = round_up(start_offset, block_bytes(c)) >> 9; i < round_down(end_offset, block_bytes(c)) >> 9; diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c index 269af93..dfa1bf7 100644 --- a/libbcachefs/fs-ioctl.c +++ b/libbcachefs/fs-ioctl.c @@ -382,7 +382,7 @@ retry: dir = dst_path.dentry->d_inode; if (IS_DEADDIR(dir)) { - error = -ENOENT; + error = -BCH_ERR_ENOENT_directory_dead; goto err3; } diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 56091ce..8fc980e 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -105,7 +105,7 @@ retry: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_fs_fatal_err_on(ret == -ENOENT, c, + bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, "inode %u:%llu not found when updating", inode_inum(inode).subvol, inode_inum(inode).inum); @@ -1261,14 +1261,14 @@ retry: goto err; if (k.k->type != KEY_TYPE_dirent) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; goto err; } d = bkey_s_c_to_dirent(k); ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target); if (ret > 0) - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; if (ret) goto err; diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 1b3ee66..dcc55cb 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -78,7 +78,7 @@ static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, snapshot, &s); if (!ret) *subvol = le32_to_cpu(s.subvol); - else if (ret == -ENOENT) + else if (bch2_err_matches(ret, ENOENT)) bch_err(trans->c, "snapshot %u not fonud", snapshot); return ret; @@ -119,7 +119,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, goto err; if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_inode; goto err; } @@ -148,7 +148,7 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, ret = bkey_is_inode(k.k) ? bch2_inode_unpack(k, inode) - : -ENOENT; + : -BCH_ERR_ENOENT_inode; if (!ret) *snapshot = iter.pos.snapshot; err: @@ -333,7 +333,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, ret = __lookup_dirent(trans, root_hash_info, root_inum, &lostfound_str, &inum, &d_type); - if (ret == -ENOENT) { + if (bch2_err_matches(ret, ENOENT)) { bch_notice(c, "creating lost+found"); goto create_lostfound; } @@ -1088,7 +1088,7 @@ static int inode_backpointer_exists(struct btree_trans *trans, SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); ret = bkey_err(d); if (ret) - return ret == -ENOENT ? 0 : ret; + return bch2_err_matches(ret, ENOENT) ? 0 : ret; ret = dirent_points_to_inode(d, inode); bch2_trans_iter_exit(trans, &iter); @@ -1653,7 +1653,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __subvol_lookup(trans, target_subvol, &target_snapshot, &target_inum); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) goto err; if (fsck_err_on(ret, c, @@ -1665,7 +1665,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) goto err; if (fsck_err_on(ret, c, @@ -1846,7 +1846,7 @@ static int check_root_trans(struct btree_trans *trans) int ret; ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) return ret; if (mustfix_fsck_err_on(ret, c, "root subvol missing")) { @@ -1873,7 +1873,7 @@ static int check_root_trans(struct btree_trans *trans) } ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) return ret; if (mustfix_fsck_err_on(ret, c, "root directory missing") || @@ -1972,15 +1972,15 @@ static int check_path(struct btree_trans *trans, PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter, SPOS(inode->bi_dir, inode->bi_dir_offset, parent_snapshot))).k)); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) break; if (!ret && !dirent_points_to_inode(d, inode)) { bch2_trans_iter_exit(trans, &dirent_iter); - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; } - if (ret == -ENOENT) { + if (bch2_err_matches(ret, ENOENT)) { if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", inode->bi_inum, snapshot, bch2_d_type_str(inode_d_type(inode)), diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index ddcd7b1..64e8d1f 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -336,7 +336,7 @@ int bch2_inode_peek(struct btree_trans *trans, if (ret) return ret; - ret = bkey_is_inode(k.k) ? 0 : -ENOENT; + ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; if (ret) goto err; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 5a04ee5..77fe49f 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -163,7 +163,7 @@ static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool) struct page *page; if (likely(!*using_mempool)) { - page = alloc_page(GFP_NOIO); + page = alloc_page(GFP_NOFS); if (unlikely(!page)) { mutex_lock(&c->bio_bounce_pages_lock); *using_mempool = true; @@ -172,7 +172,7 @@ static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool) } } else { pool_alloc: - page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO); + page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS); } return page; @@ -660,7 +660,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, if (to_entry(ptr + 1) < ptrs.end) { n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, - GFP_NOIO, &ca->replica_set)); + GFP_NOFS, &ca->replica_set)); n->bio.bi_end_io = wbio->bio.bi_end_io; n->bio.bi_private = wbio->bio.bi_private; @@ -976,7 +976,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c, pages = min(pages, BIO_MAX_VECS); bio = bio_alloc_bioset(NULL, pages, 0, - GFP_NOIO, &c->bio_write); + GFP_NOFS, &c->bio_write); wbio = wbio_init(bio); wbio->put_bio = true; /* copy WRITE_SYNC flag */ @@ -1314,7 +1314,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, BUG_ON(total_output != total_input); dst = bio_split(src, total_input >> 9, - GFP_NOIO, &c->bio_write); + GFP_NOFS, &c->bio_write); wbio_init(dst)->put_bio = true; /* copy WRITE_SYNC flag */ dst->bi_opf = src->bi_opf; @@ -2013,7 +2013,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) return NULL; - op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO); + op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS); if (!op) goto err; @@ -2026,7 +2026,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, */ *rbio = kzalloc(sizeof(struct bch_read_bio) + sizeof(struct bio_vec) * pages, - GFP_NOIO); + GFP_NOFS); if (!*rbio) goto err; @@ -2034,7 +2034,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, - GFP_NOIO)) + GFP_NOFS)) goto err; (*rbio)->bounce = true; @@ -2057,14 +2057,16 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, }, btree_id, k); - if (ret == -BCH_ERR_nocow_lock_blocked) { + if (ret) { + WARN_ONCE(ret != -BCH_ERR_nocow_lock_blocked, + "%s: saw unknown error %s\n", __func__, bch2_err_str(ret)); + ret = rhashtable_remove_fast(&c->promote_table, &op->hash, bch_promote_params); BUG_ON(ret); goto err; } - BUG_ON(ret); op->write.op.end_io = promote_done; return op; @@ -2746,7 +2748,7 @@ get_bio: rbio = rbio_init(bio_alloc_bioset(NULL, DIV_ROUND_UP(sectors, PAGE_SECTORS), 0, - GFP_NOIO, + GFP_NOFS, &c->bio_read_split), orig->opts); @@ -2762,7 +2764,7 @@ get_bio: * from the whole bio, in which case we don't want to retry and * lose the error) */ - rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOIO, + rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, &c->bio_read_split), orig->opts); rbio->bio.bi_iter = iter; diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index b455ef0..8dc3786 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1438,7 +1438,7 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) if (buf->buf_size >= new_size) return; - new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN); + new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN); if (!new_buf) return; diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 29d843e..2c7f8ac 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -271,7 +271,7 @@ void bch2_journal_do_discards(struct journal *j) blkdev_issue_discard(ca->disk_sb.bdev, bucket_to_sector(ca, ja->buckets[ja->discard_idx]), - ca->mi.bucket_size, GFP_NOIO); + ca->mi.bucket_size, GFP_NOFS); spin_lock(&j->lock); ja->discard_idx = (ja->discard_idx + 1) % ja->nr; diff --git a/libbcachefs/keylist.c b/libbcachefs/keylist.c index cf5998e..5699cd4 100644 --- a/libbcachefs/keylist.c +++ b/libbcachefs/keylist.c @@ -18,7 +18,7 @@ int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s, (old_buf && roundup_pow_of_two(oldsize) == newsize)) return 0; - new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOIO); + new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOFS); if (!new_keys) return -ENOMEM; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 2ec30a3..fd62913 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -427,7 +427,7 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos, goto err; if (!k.k || !bkey_eq(k.k->p, pos)) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_inode; goto err; } diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 0d96346..6750767 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -236,7 +236,7 @@ err: darray_exit(&buckets); /* no entries in LRU btree found, or got to end: */ - if (ret == -ENOENT) + if (bch2_err_matches(ret, ENOENT)) ret = 0; if (ret < 0 && !bch2_err_matches(ret, EROFS)) diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index 310eb9d..d20ec97 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -576,6 +576,13 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, le32_to_cpu(s_t.master_subvol), k.k->p.offset, }, &u); + /* + * Inode might be deleted in this snapshot - the easiest way to handle + * that is to just skip it here: + */ + if (bch2_err_matches(ret, ENOENT)) + goto advance; + if (ret) return ret; @@ -615,7 +622,7 @@ int bch2_fs_quota_read(struct bch_fs *c) POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, bch2_fs_quota_read_inode(&trans, &iter, k)); if (ret) - bch_err(c, "err in quota_read: %s", bch2_err_str(ret)); + bch_err(c, "%s: err %s", __func__, bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; @@ -893,7 +900,7 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, ret = -ENOENT; found: mutex_unlock(&q->lock); - return ret; + return bch2_err_class(ret); } static int bch2_set_quota_trans(struct btree_trans *trans, @@ -953,7 +960,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); - return ret; + return bch2_err_class(ret); } const struct quotactl_ops bch2_quotactl_operations = { diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index af76c02..e4983d1 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1082,7 +1082,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) if (!bkey_is_inode(k.k)) { bch_err(trans->c, "root inode not found"); - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_inode; goto err; } diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 9430899..26f0275 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -167,24 +167,13 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, if (orig->k.type == KEY_TYPE_inline_data) bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); - for_each_btree_key_norestart(trans, reflink_iter, BTREE_ID_reflink, - POS(0, c->reflink_hint), - BTREE_ITER_SLOTS, k, ret) { - if (reflink_iter.pos.inode) { - bch2_btree_iter_set_pos(&reflink_iter, POS_MIN); - continue; - } - - if (bkey_deleted(k.k) && orig->k.size <= k.k->size) - break; - } - + bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX, + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_prev(&reflink_iter); + ret = bkey_err(k); if (ret) goto err; - /* rewind iter to start of hole, if necessary: */ - bch2_btree_iter_set_pos_to_extent_start(&reflink_iter); - r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); if (ret) @@ -226,7 +215,6 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); err: - c->reflink_hint = reflink_iter.pos.offset; bch2_trans_iter_exit(trans, &reflink_iter); return ret; diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 6178ae6..ae21a8c 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -181,7 +181,7 @@ bch2_hash_lookup(struct btree_trans *trans, } bch2_trans_iter_exit(trans, iter); - return ret ?: -ENOENT; + return ret ?: -BCH_ERR_ENOENT_str_hash_lookup; } static __always_inline int @@ -288,7 +288,7 @@ found: not_found: if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { ret = -EEXIST; } else { diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 388fa12..f07b3e2 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -37,8 +37,12 @@ int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k, int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, struct bch_snapshot_tree *s) { - return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), - BTREE_ITER_WITH_UPDATES, snapshot_tree, s); + int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), + BTREE_ITER_WITH_UPDATES, snapshot_tree, s); + + if (bch2_err_matches(ret, ENOENT)) + ret = -BCH_ERR_ENOENT_snapshot_tree; + return ret; } static struct bkey_i_snapshot_tree * @@ -284,6 +288,7 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_subvolume s; + bool found = false; int ret; for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, @@ -296,14 +301,14 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, continue; if (!BCH_SUBVOLUME_SNAP(s.v)) { *subvol_id = s.k->p.offset; - goto found; + found = true; + break; } } - ret = ret ?: -ENOENT; -found: + bch2_trans_iter_exit(trans, &iter); - if (bch2_err_matches(ret, ENOENT)) { + if (!ret && !found) { struct bkey_i_subvolume *s; *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); @@ -1217,7 +1222,7 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol), BTREE_ITER_CACHED| BTREE_ITER_WITH_UPDATES); - ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -ENOENT; + ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -BCH_ERR_ENOENT_subvolume; if (likely(!ret)) *snapid = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot); @@ -1444,7 +1449,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, BTREE_ITER_CACHED, subvolume); ret = PTR_ERR_OR_ZERO(src_subvol); if (unlikely(ret)) { - bch2_fs_inconsistent_on(ret == -ENOENT, c, + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, "subvolume %u not found", src_subvolid); goto err; } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 696ea13..a659f80 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1834,7 +1834,7 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) for_each_member_device_rcu(ca, c, i, NULL) if (!strcmp(name, ca->name)) goto found; - ca = ERR_PTR(-ENOENT); + ca = ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); found: rcu_read_unlock(); diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index cfb1779..a743ab4 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -905,13 +905,6 @@ DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, TP_ARGS(trans, caller_ip) ); -DEFINE_EVENT(transaction_event, trans_restart_journal_res_get, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) -); - - TRACE_EVENT(trans_restart_journal_preres_get, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -935,12 +928,6 @@ TRACE_EVENT(trans_restart_journal_preres_get, __entry->flags) ); -DEFINE_EVENT(transaction_event, trans_restart_journal_reclaim, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) -); - DEFINE_EVENT(transaction_event, trans_restart_fault_inject, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), @@ -953,12 +940,6 @@ DEFINE_EVENT(transaction_event, trans_traverse_all, TP_ARGS(trans, caller_ip) ); -DEFINE_EVENT(transaction_event, trans_restart_mark_replicas, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) -); - DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 9079686..38886bf 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -350,7 +350,7 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, if (time_after64(end, start)) { duration = end - start; - stats->duration_stats = mean_and_variance_update(stats->duration_stats, duration); + mean_and_variance_update(&stats->duration_stats, duration); mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration); stats->max_duration = max(stats->max_duration, duration); stats->min_duration = min(stats->min_duration, duration); @@ -359,7 +359,7 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, if (time_after64(end, stats->last_event)) { freq = end - stats->last_event; - stats->freq_stats = mean_and_variance_update(stats->freq_stats, freq); + mean_and_variance_update(&stats->freq_stats, freq); mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); stats->max_freq = max(stats->max_freq, freq); stats->min_freq = min(stats->min_freq, freq); diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 4fc6e06..10e545c 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -163,7 +163,7 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info err2: bch2_trans_iter_exit(trans, &iter); err1: - return ret == -ENOENT ? -ENODATA : ret; + return ret < 0 && bch2_err_matches(ret, ENOENT) ? -ENODATA : ret; } int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, @@ -229,7 +229,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, hash_info, inum, &search); } - if (ret == -ENOENT) + if (bch2_err_matches(ret, ENOENT)) ret = flags & XATTR_REPLACE ? -ENODATA : 0; return ret; @@ -589,7 +589,7 @@ err: opt_id == Opt_background_target)) bch2_rebalance_add_work(c, inode->v.i_blocks); - return ret; + return bch2_err_class(ret); } static const struct xattr_handler bch_xattr_bcachefs_handler = { diff --git a/linux/six.c b/linux/six.c index a47cd6d..d4f43f1 100644 --- a/linux/six.c +++ b/linux/six.c @@ -118,7 +118,7 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, { const struct six_lock_vals l[] = LOCK_VALS; int ret; - u32 old, new, v; + u32 old; EBUG_ON(type == SIX_LOCK_write && lock->owner != task); EBUG_ON(type == SIX_LOCK_write && @@ -179,19 +179,14 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, ret = -1 - SIX_LOCK_read; } } else { - v = atomic_read(&lock->state); + old = atomic_read(&lock->state); do { - new = old = v; - ret = !(old & l[type].lock_fail); - if (!ret || (type == SIX_LOCK_write && !try)) { smp_mb(); break; } - - new += l[type].lock_val; - } while ((v = atomic_cmpxchg_acquire(&lock->state, old, new)) != old); + } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val)); EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask)); } @@ -681,10 +676,10 @@ EXPORT_SYMBOL_GPL(six_lock_downgrade); bool six_lock_tryupgrade(struct six_lock *lock) { const struct six_lock_vals l[] = LOCK_VALS; - u32 old, new, v = atomic_read(&lock->state); + u32 old = atomic_read(&lock->state), new; do { - new = old = v; + new = old; if (new & SIX_LOCK_HELD_intent) return false; @@ -695,7 +690,7 @@ bool six_lock_tryupgrade(struct six_lock *lock) } new |= SIX_LOCK_HELD_intent; - } while ((v = atomic_cmpxchg_acquire(&lock->state, old, new)) != old); + } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new)); if (lock->readers) this_cpu_dec(*lock->readers); -- 2.39.2