X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_key_cache.c;h=f7c001d42391faa5151f1c807514db35392f3abe;hb=ded0160563b045b61e79949f07bed903e98b6528;hp=298a674dbfd6e9963a624a410fe7d5a814954a80;hpb=b0c9ad15f4e5cee60973a8f5f6dc49acfeec9755;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 298a674..f7c001d 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -10,10 +10,10 @@ #include "error.h" #include "journal.h" #include "journal_reclaim.h" +#include "trace.h" #include #include -#include static inline bool btree_uses_pcpu_readers(enum btree_id id) { @@ -252,7 +252,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, } path->l[0].b = (void *) ck; - path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].lock_seq = six_lock_seq(&ck->c.lock); mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); ret = bch2_btree_node_lock_write(trans, path, &ck->c); @@ -265,15 +265,8 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, return ck; } - ck = kmem_cache_zalloc(bch2_key_cache, GFP_NOWAIT|__GFP_NOWARN); - if (likely(ck)) - goto init; - - bch2_trans_unlock(trans); - - ck = kmem_cache_zalloc(bch2_key_cache, GFP_KERNEL); - - ret = bch2_trans_relock(trans); + ck = allocate_dropping_locks(trans, ret, + kmem_cache_zalloc(bch2_key_cache, _gfp)); if (ret) { kmem_cache_free(bch2_key_cache, ck); return ERR_PTR(ret); @@ -281,11 +274,9 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, if (!ck) return NULL; -init: + INIT_LIST_HEAD(&ck->list); - bch2_btree_lock_init(&ck->c); - if (pcpu_readers) - six_lock_pcpu_alloc(&ck->c.lock); + bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0); ck->c.cached = true; BUG_ON(!six_trylock_intent(&ck->c.lock)); @@ -337,13 +328,10 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", bch2_btree_ids[path->btree_id]); - return ERR_PTR(-ENOMEM); + return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create); } mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); - } else { - if (path->btree_id == BTREE_ID_subvolumes) - six_lock_pcpu_alloc(&ck->c.lock); } ck->c.level = 0; @@ -387,10 +375,9 @@ static int btree_key_cache_fill(struct btree_trans *trans, struct bkey_i *new_k = NULL; int ret; - bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos, - BTREE_ITER_KEY_CACHE_FILL| - BTREE_ITER_CACHED_NOFILL); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_bkey_get_iter(trans, &iter, ck->key.btree_id, ck->key.pos, + BTREE_ITER_KEY_CACHE_FILL| + BTREE_ITER_CACHED_NOFILL); ret = bkey_err(k); if (ret) goto err; @@ -424,7 +411,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, if (!new_k) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_ids[ck->key.btree_id], new_u64s); - ret = -ENOMEM; + ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; goto err; } @@ -513,7 +500,7 @@ retry: mark_btree_node_locked(trans, path, 0, lock_want); } - path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: path->uptodate = BTREE_ITER_UPTODATE; @@ -595,7 +582,7 @@ retry: mark_btree_node_locked(trans, path, 0, lock_want); } - path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: if (!ck->valid) @@ -664,9 +651,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| (ck->journal.seq == journal_last_seq(j) - ? JOURNAL_WATERMARK_reserved + ? BCH_WATERMARK_reclaim : 0)| commit_flags); @@ -770,11 +756,11 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans, bool bch2_btree_insert_key_cached(struct btree_trans *trans, unsigned flags, - struct btree_path *path, - struct bkey_i *insert) + struct btree_insert_entry *insert_entry) { struct bch_fs *c = trans->c; - struct bkey_cached *ck = (void *) path->l[0].b; + struct bkey_cached *ck = (void *) insert_entry->path->l[0].b; + struct bkey_i *insert = insert_entry->k; bool kick_reclaim = false; BUG_ON(insert->k.u64s > ck->u64s); @@ -795,6 +781,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, ck->valid = true; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { + EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); set_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_inc(&c->btree_key_cache.nr_dirty); @@ -802,9 +789,24 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, kick_reclaim = true; } + /* + * To minimize lock contention, we only add the journal pin here and + * defer pin updates to the flush callback via ->seq. Be careful not to + * update ->seq on nojournal commits because we don't want to update the + * pin to a seq that doesn't include journal updates on disk. Otherwise + * we risk losing the update after a crash. + * + * The only exception is if the pin is not active in the first place. We + * have to add the pin because journal reclaim drives key cache + * flushing. The flush callback will not proceed unless ->seq matches + * the latest pin, so make sure it starts with a consistent value. + */ + if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) || + !journal_pin_active(&ck->journal)) { + ck->seq = trans->journal_res.seq; + } bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &ck->journal, bch2_btree_key_cache_journal_flush); - ck->seq = trans->journal_res.seq; if (kick_reclaim) journal_reclaim_kick(&c->journal); @@ -858,7 +860,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, break; list_del(&ck->list); - six_lock_pcpu_free(&ck->c.lock); + six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); scanned++; @@ -874,7 +876,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, break; list_del(&ck->list); - six_lock_pcpu_free(&ck->c.lock); + six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); scanned++; @@ -954,8 +956,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) int cpu; #endif - if (bc->shrink.list.next) - unregister_shrinker(&bc->shrink); + unregister_shrinker(&bc->shrink); mutex_lock(&bc->lock); @@ -999,7 +1000,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) list_del(&ck->list); kfree(ck->k); - six_lock_pcpu_free(&ck->c.lock); + six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); } @@ -1041,17 +1042,15 @@ static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shri int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); - int ret; #ifdef __KERNEL__ bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); if (!bc->pcpu_freed) - return -ENOMEM; + return -BCH_ERR_ENOMEM_fs_btree_cache_init; #endif - ret = rhashtable_init(&bc->table, &bch2_btree_key_cache_params); - if (ret) - return ret; + if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params)) + return -BCH_ERR_ENOMEM_fs_btree_cache_init; bc->table_init_done = true; @@ -1059,7 +1058,9 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) bc->shrink.count_objects = bch2_btree_key_cache_count; bc->shrink.scan_objects = bch2_btree_key_cache_scan; bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text; - return register_shrinker(&bc->shrink, "%s/btree_key_cache", c->name); + if (register_shrinker(&bc->shrink, "%s/btree_key_cache", c->name)) + return -BCH_ERR_ENOMEM_fs_btree_cache_init; + return 0; } void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) @@ -1079,7 +1080,7 @@ void bch2_btree_key_cache_exit(void) int __init bch2_btree_key_cache_init(void) { - bch2_key_cache = KMEM_CACHE(bkey_cached, 0); + bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT); if (!bch2_key_cache) return -ENOMEM;