X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_key_cache.c;h=298a674dbfd6e9963a624a410fe7d5a814954a80;hb=b0c9ad15f4e5cee60973a8f5f6dc49acfeec9755;hp=b8ed25b9998547d9d5abcc319cca92980e6bf91c;hpb=494421ee6e85514f90bb316d77e1dd4f7dad3420;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index b8ed25b..298a674 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "btree_cache.h" @@ -11,6 +12,7 @@ #include "journal_reclaim.h" #include +#include #include static inline bool btree_uses_pcpu_readers(enum btree_id id) @@ -26,8 +28,8 @@ static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg, const struct bkey_cached *ck = obj; const struct bkey_cached_key *key = arg->key; - return cmp_int(ck->key.btree_id, key->btree_id) ?: - bpos_cmp(ck->key.pos, key->pos); + return ck->key.btree_id != key->btree_id || + !bpos_eq(ck->key.pos, key->pos); } static const struct rhashtable_params bch2_btree_key_cache_params = { @@ -55,13 +57,12 @@ static bool bkey_cached_lock_for_evict(struct bkey_cached *ck) if (!six_trylock_intent(&ck->c.lock)) return false; - if (!six_trylock_write(&ck->c.lock)) { + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { six_unlock_intent(&ck->c.lock); return false; } - if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { - six_unlock_write(&ck->c.lock); + if (!six_trylock_write(&ck->c.lock)) { six_unlock_intent(&ck->c.lock); return false; } @@ -103,16 +104,34 @@ static void bkey_cached_free(struct btree_key_cache *bc, six_unlock_intent(&ck->c.lock); } +#ifdef __KERNEL__ +static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc, + struct bkey_cached *ck) +{ + struct bkey_cached *pos; + + list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) { + if (ULONG_CMP_GE(ck->btree_trans_barrier_seq, + pos->btree_trans_barrier_seq)) { + list_move(&ck->list, &pos->list); + return; + } + } + + list_move(&ck->list, &bc->freed_nonpcpu); +} +#endif + static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, struct bkey_cached *ck) { - struct btree_key_cache_freelist *f; - bool freed = false; - BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); if (!ck->c.lock.readers) { #ifdef __KERNEL__ + struct btree_key_cache_freelist *f; + bool freed = false; + preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); @@ -130,11 +149,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, while (f->nr > ARRAY_SIZE(f->objs) / 2) { struct bkey_cached *ck2 = f->objs[--f->nr]; - list_move_tail(&ck2->list, &bc->freed_nonpcpu); + __bkey_cached_move_to_freelist_ordered(bc, ck2); } preempt_enable(); - list_move_tail(&ck->list, &bc->freed_nonpcpu); + __bkey_cached_move_to_freelist_ordered(bc, ck); mutex_unlock(&bc->lock); } #else @@ -171,16 +190,19 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc, } static struct bkey_cached * -bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path) +bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, + bool *was_new) { struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck = NULL; - struct btree_key_cache_freelist *f; bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id); + int ret; if (!pcpu_readers) { #ifdef __KERNEL__ + struct btree_key_cache_freelist *f; + preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); if (f->nr) @@ -223,7 +245,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path) if (ck) { int ret; - ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent); + ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_); if (unlikely(ret)) { bkey_cached_move_to_freelist(bc, ck); return ERR_PTR(ret); @@ -243,21 +265,33 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path) return ck; } - /* GFP_NOFS because we're holding btree locks: */ - ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO); - if (likely(ck)) { - INIT_LIST_HEAD(&ck->list); - __six_lock_init(&ck->c.lock, "b->c.lock", &bch2_btree_node_lock_key); - if (pcpu_readers) - six_lock_pcpu_alloc(&ck->c.lock); + ck = kmem_cache_zalloc(bch2_key_cache, GFP_NOWAIT|__GFP_NOWARN); + if (likely(ck)) + goto init; - ck->c.cached = true; - BUG_ON(!six_trylock_intent(&ck->c.lock)); - BUG_ON(!six_trylock_write(&ck->c.lock)); - return ck; + bch2_trans_unlock(trans); + + ck = kmem_cache_zalloc(bch2_key_cache, GFP_KERNEL); + + ret = bch2_trans_relock(trans); + if (ret) { + kmem_cache_free(bch2_key_cache, ck); + return ERR_PTR(ret); } - return NULL; + if (!ck) + return NULL; +init: + INIT_LIST_HEAD(&ck->list); + bch2_btree_lock_init(&ck->c); + if (pcpu_readers) + six_lock_pcpu_alloc(&ck->c.lock); + + ck->c.cached = true; + BUG_ON(!six_trylock_intent(&ck->c.lock)); + BUG_ON(!six_trylock_write(&ck->c.lock)); + *was_new = true; + return ck; } static struct bkey_cached * @@ -292,10 +326,10 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck; - bool was_new = true; + bool was_new = false; - ck = bkey_cached_alloc(trans, path); - if (unlikely(IS_ERR(ck))) + ck = bkey_cached_alloc(trans, path, &was_new); + if (IS_ERR(ck)) return ck; if (unlikely(!ck)) { @@ -307,7 +341,6 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) } mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); - was_new = false; } else { if (path->btree_id == BTREE_ID_subvolumes) six_lock_pcpu_alloc(&ck->c.lock); @@ -328,12 +361,12 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) if (likely(was_new)) { six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); - mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); kfree(ck); } else { bkey_cached_free_fast(bc, ck); } + mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); return NULL; } @@ -348,24 +381,23 @@ static int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, struct bkey_cached *ck) { - struct btree_path *path; + struct btree_iter iter; struct bkey_s_c k; unsigned new_u64s = 0; struct bkey_i *new_k = NULL; - struct bkey u; int ret; - path = bch2_path_get(trans, ck->key.btree_id, - ck->key.pos, 0, 0, 0, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, path, 0); + bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos, + BTREE_ITER_KEY_CACHE_FILL| + BTREE_ITER_CACHED_NOFILL); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); if (ret) goto err; - k = bch2_btree_path_peek_slot(path, &u); - if (!bch2_btree_node_relock(trans, ck_path, 0)) { trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); goto err; } @@ -384,12 +416,30 @@ static int btree_key_cache_fill(struct btree_trans *trans, if (new_u64s > ck->u64s) { new_u64s = roundup_pow_of_two(new_u64s); - new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS); + new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); if (!new_k) { - bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", - bch2_btree_ids[ck->key.btree_id], new_u64s); - ret = -ENOMEM; - goto err; + bch2_trans_unlock(trans); + + new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); + if (!new_k) { + bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", + bch2_btree_ids[ck->key.btree_id], new_u64s); + ret = -ENOMEM; + goto err; + } + + if (!bch2_btree_node_relock(trans, ck_path, 0)) { + kfree(new_k); + trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); + goto err; + } + + ret = bch2_trans_relock(trans); + if (ret) { + kfree(new_k); + goto err; + } } } @@ -410,13 +460,13 @@ static int btree_key_cache_fill(struct btree_trans *trans, bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); /* We're not likely to need this iterator again: */ - path->preserve = false; + set_btree_iter_dontneed(&iter); err: - bch2_path_put(trans, path, 0); + bch2_trans_iter_exit(trans, &iter); return ret; } -noinline static int +static noinline int bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, unsigned flags) { @@ -428,7 +478,7 @@ bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree path->l[1].b = NULL; - if (bch2_btree_node_relock(trans, path, 0)) { + if (bch2_btree_node_relock_notrace(trans, path, 0)) { ck = (void *) path->l[0].b; goto fill; } @@ -455,7 +505,7 @@ retry: BUG_ON(ret); if (ck->key.btree_id != path->btree_id || - bpos_cmp(ck->key.pos, path->pos)) { + !bpos_eq(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } @@ -466,7 +516,9 @@ retry: path->l[0].lock_seq = ck->c.lock.state.seq; path->l[0].b = (void *) ck; fill: - if (!ck->valid) { + path->uptodate = BTREE_ITER_UPTODATE; + + if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { /* * Using the underscore version because we haven't set * path->uptodate yet: @@ -481,17 +533,23 @@ fill: ret = btree_key_cache_fill(trans, path, ck); if (ret) goto err; + + ret = bch2_btree_path_relock(trans, path, _THIS_IP_); + if (ret) + goto err; + + path->uptodate = BTREE_ITER_UPTODATE; } if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); - path->uptodate = BTREE_ITER_UPTODATE; - BUG_ON(!ck->valid); BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); + BUG_ON(path->uptodate); return ret; err: + path->uptodate = BTREE_ITER_NEED_TRAVERSE; if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(ret); @@ -510,7 +568,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock(trans, path, 0)) { + if (bch2_btree_node_relock_notrace(trans, path, 0)) { ck = (void *) path->l[0].b; goto fill; } @@ -529,7 +587,7 @@ retry: return ret; if (ck->key.btree_id != path->btree_id || - bpos_cmp(ck->key.pos, path->pos)) { + !bpos_eq(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } @@ -597,7 +655,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, * Since journal reclaim depends on us making progress here, and the * allocator/copygc depend on journal reclaim making progress, we need * to be using alloc reserves: - * */ + */ ret = bch2_btree_iter_traverse(&b_iter) ?: bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_KEY_CACHE_RECLAIM| @@ -675,6 +733,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, six_unlock_read(&ck->c.lock); goto unlock; } + + if (ck->seq != seq) { + bch2_journal_pin_update(&c->journal, ck->seq, &ck->journal, + bch2_btree_key_cache_journal_flush); + six_unlock_read(&ck->c.lock); + goto unlock; + } six_unlock_read(&ck->c.lock); ret = commit_do(&trans, NULL, NULL, 0, @@ -704,6 +769,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans, } bool bch2_btree_insert_key_cached(struct btree_trans *trans, + unsigned flags, struct btree_path *path, struct bkey_i *insert) { @@ -711,14 +777,14 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, struct bkey_cached *ck = (void *) path->l[0].b; bool kick_reclaim = false; - BUG_ON(insert->u64s > ck->u64s); + BUG_ON(insert->k.u64s > ck->u64s); - if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { + if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { int difference; - BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s); + BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s); - difference = jset_u64s(insert->u64s) - ck->res.u64s; + difference = jset_u64s(insert->k.u64s) - ck->res.u64s; if (difference > 0) { trans->journal_preres.u64s -= difference; ck->res.u64s += difference; @@ -736,8 +802,9 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, kick_reclaim = true; } - bch2_journal_pin_update(&c->journal, trans->journal_res.seq, - &ck->journal, bch2_btree_key_cache_journal_flush); + bch2_journal_pin_add(&c->journal, trans->journal_res.seq, + &ck->journal, bch2_btree_key_cache_journal_flush); + ck->seq = trans->journal_res.seq; if (kick_reclaim) journal_reclaim_kick(&c->journal); @@ -881,6 +948,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) struct bucket_table *tbl; struct bkey_cached *ck, *n; struct rhash_head *pos; + LIST_HEAD(items); unsigned i; #ifdef __KERNEL__ int cpu; @@ -901,7 +969,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) for (i = 0; i < tbl->size; i++) rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { bkey_cached_evict(bc, ck); - list_add(&ck->list, &bc->freed_nonpcpu); + list_add(&ck->list, &items); } rcu_read_unlock(); } @@ -913,14 +981,17 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) for (i = 0; i < f->nr; i++) { ck = f->objs[i]; - list_add(&ck->list, &bc->freed_nonpcpu); + list_add(&ck->list, &items); } } #endif - list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); + list_splice(&bc->freed_pcpu, &items); + list_splice(&bc->freed_nonpcpu, &items); + + mutex_unlock(&bc->lock); - list_for_each_entry_safe(ck, n, &bc->freed_nonpcpu, list) { + list_for_each_entry_safe(ck, n, &items, list) { cond_resched(); bch2_journal_pin_drop(&c->journal, &ck->journal); @@ -942,8 +1013,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) panic("btree key cache shutdown error: nr_keys nonzero (%li)\n", atomic_long_read(&bc->nr_keys)); - mutex_unlock(&bc->lock); - if (bc->table_init_done) rhashtable_destroy(&bc->table); @@ -957,12 +1026,16 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) INIT_LIST_HEAD(&c->freed_nonpcpu); } -static void bch2_btree_key_cache_shrinker_to_text(struct printbuf *out, struct shrinker *shrink) +static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink) { struct btree_key_cache *bc = container_of(shrink, struct btree_key_cache, shrink); + char *cbuf; + size_t buflen = seq_buf_get_buf(s, &cbuf); + struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen); - bch2_btree_key_cache_to_text(out, bc); + bch2_btree_key_cache_to_text(&out, bc); + seq_buf_commit(s, out.pos); } int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) @@ -982,7 +1055,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) bc->table_init_done = true; - bc->shrink.seeks = 1; + bc->shrink.seeks = 0; bc->shrink.count_objects = bch2_btree_key_cache_count; bc->shrink.scan_objects = bch2_btree_key_cache_scan; bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text; @@ -991,15 +1064,17 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) { - prt_printf(out, "nr_freed:\t%zu\n", atomic_long_read(&c->nr_freed)); - prt_printf(out, "nr_keys:\t%lu\n", atomic_long_read(&c->nr_keys)); - prt_printf(out, "nr_dirty:\t%lu\n", atomic_long_read(&c->nr_dirty)); + prt_printf(out, "nr_freed:\t%zu", atomic_long_read(&c->nr_freed)); + prt_newline(out); + prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys)); + prt_newline(out); + prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty)); + prt_newline(out); } void bch2_btree_key_cache_exit(void) { - if (bch2_key_cache) - kmem_cache_destroy(bch2_key_cache); + kmem_cache_destroy(bch2_key_cache); } int __init bch2_btree_key_cache_init(void)