X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_key_cache.c;h=f7c001d42391faa5151f1c807514db35392f3abe;hb=ded0160563b045b61e79949f07bed903e98b6528;hp=298a674dbfd6e9963a624a410fe7d5a814954a80;hpb=b0c9ad15f4e5cee60973a8f5f6dc49acfeec9755;p=bcachefs-tools-debian

diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index 298a674..f7c001d 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -10,10 +10,10 @@
 #include "error.h"
 #include "journal.h"
 #include "journal_reclaim.h"
+#include "trace.h"
 
 #include <linux/sched/mm.h>
 #include <linux/seq_buf.h>
-#include <trace/events/bcachefs.h>
 
 static inline bool btree_uses_pcpu_readers(enum btree_id id)
 {
@@ -252,7 +252,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
 		}
 
 		path->l[0].b = (void *) ck;
-		path->l[0].lock_seq = ck->c.lock.state.seq;
+		path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
 		mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
 
 		ret = bch2_btree_node_lock_write(trans, path, &ck->c);
@@ -265,15 +265,8 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
 		return ck;
 	}
 
-	ck = kmem_cache_zalloc(bch2_key_cache, GFP_NOWAIT|__GFP_NOWARN);
-	if (likely(ck))
-		goto init;
-
-	bch2_trans_unlock(trans);
-
-	ck = kmem_cache_zalloc(bch2_key_cache, GFP_KERNEL);
-
-	ret = bch2_trans_relock(trans);
+	ck = allocate_dropping_locks(trans, ret,
+			kmem_cache_zalloc(bch2_key_cache, _gfp));
 	if (ret) {
 		kmem_cache_free(bch2_key_cache, ck);
 		return ERR_PTR(ret);
@@ -281,11 +274,9 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
 
 	if (!ck)
 		return NULL;
-init:
+
 	INIT_LIST_HEAD(&ck->list);
-	bch2_btree_lock_init(&ck->c);
-	if (pcpu_readers)
-		six_lock_pcpu_alloc(&ck->c.lock);
+	bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
 
 	ck->c.cached = true;
 	BUG_ON(!six_trylock_intent(&ck->c.lock));
@@ -337,13 +328,10 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
 		if (unlikely(!ck)) {
 			bch_err(c, "error allocating memory for key cache item, btree %s",
 				bch2_btree_ids[path->btree_id]);
-			return ERR_PTR(-ENOMEM);
+			return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
 		}
 
 		mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
-	} else {
-		if (path->btree_id == BTREE_ID_subvolumes)
-			six_lock_pcpu_alloc(&ck->c.lock);
 	}
 
 	ck->c.level		= 0;
@@ -387,10 +375,9 @@ static int btree_key_cache_fill(struct btree_trans *trans,
 	struct bkey_i *new_k = NULL;
 	int ret;
 
-	bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos,
-			     BTREE_ITER_KEY_CACHE_FILL|
-			     BTREE_ITER_CACHED_NOFILL);
-	k = bch2_btree_iter_peek_slot(&iter);
+	k = bch2_bkey_get_iter(trans, &iter, ck->key.btree_id, ck->key.pos,
+			       BTREE_ITER_KEY_CACHE_FILL|
+			       BTREE_ITER_CACHED_NOFILL);
 	ret = bkey_err(k);
 	if (ret)
 		goto err;
@@ -424,7 +411,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
 			if (!new_k) {
 				bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
 					bch2_btree_ids[ck->key.btree_id], new_u64s);
-				ret = -ENOMEM;
+				ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
 				goto err;
 			}
 
@@ -513,7 +500,7 @@ retry:
 		mark_btree_node_locked(trans, path, 0, lock_want);
 	}
 
-	path->l[0].lock_seq	= ck->c.lock.state.seq;
+	path->l[0].lock_seq	= six_lock_seq(&ck->c.lock);
 	path->l[0].b		= (void *) ck;
 fill:
 	path->uptodate = BTREE_ITER_UPTODATE;
@@ -595,7 +582,7 @@ retry:
 		mark_btree_node_locked(trans, path, 0, lock_want);
 	}
 
-	path->l[0].lock_seq	= ck->c.lock.state.seq;
+	path->l[0].lock_seq	= six_lock_seq(&ck->c.lock);
 	path->l[0].b		= (void *) ck;
 fill:
 	if (!ck->valid)
@@ -664,9 +651,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
 		bch2_trans_commit(trans, NULL, NULL,
 				  BTREE_INSERT_NOCHECK_RW|
 				  BTREE_INSERT_NOFAIL|
-				  BTREE_INSERT_USE_RESERVE|
 				  (ck->journal.seq == journal_last_seq(j)
-				   ? JOURNAL_WATERMARK_reserved
+				   ? BCH_WATERMARK_reclaim
 				   : 0)|
 				  commit_flags);
 
@@ -770,11 +756,11 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
 
 bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 				  unsigned flags,
-				  struct btree_path *path,
-				  struct bkey_i *insert)
+				  struct btree_insert_entry *insert_entry)
 {
 	struct bch_fs *c = trans->c;
-	struct bkey_cached *ck = (void *) path->l[0].b;
+	struct bkey_cached *ck = (void *) insert_entry->path->l[0].b;
+	struct bkey_i *insert = insert_entry->k;
 	bool kick_reclaim = false;
 
 	BUG_ON(insert->k.u64s > ck->u64s);
@@ -795,6 +781,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 	ck->valid = true;
 
 	if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+		EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
 		set_bit(BKEY_CACHED_DIRTY, &ck->flags);
 		atomic_long_inc(&c->btree_key_cache.nr_dirty);
 
@@ -802,9 +789,24 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 			kick_reclaim = true;
 	}
 
+	/*
+	 * To minimize lock contention, we only add the journal pin here and
+	 * defer pin updates to the flush callback via ->seq. Be careful not to
+	 * update ->seq on nojournal commits because we don't want to update the
+	 * pin to a seq that doesn't include journal updates on disk. Otherwise
+	 * we risk losing the update after a crash.
+	 *
+	 * The only exception is if the pin is not active in the first place. We
+	 * have to add the pin because journal reclaim drives key cache
+	 * flushing. The flush callback will not proceed unless ->seq matches
+	 * the latest pin, so make sure it starts with a consistent value.
+	 */
+	if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) ||
+	    !journal_pin_active(&ck->journal)) {
+		ck->seq = trans->journal_res.seq;
+	}
 	bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
 			     &ck->journal, bch2_btree_key_cache_journal_flush);
-	ck->seq = trans->journal_res.seq;
 
 	if (kick_reclaim)
 		journal_reclaim_kick(&c->journal);
@@ -858,7 +860,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
 			break;
 
 		list_del(&ck->list);
-		six_lock_pcpu_free(&ck->c.lock);
+		six_lock_exit(&ck->c.lock);
 		kmem_cache_free(bch2_key_cache, ck);
 		atomic_long_dec(&bc->nr_freed);
 		scanned++;
@@ -874,7 +876,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
 			break;
 
 		list_del(&ck->list);
-		six_lock_pcpu_free(&ck->c.lock);
+		six_lock_exit(&ck->c.lock);
 		kmem_cache_free(bch2_key_cache, ck);
 		atomic_long_dec(&bc->nr_freed);
 		scanned++;
@@ -954,8 +956,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 	int cpu;
 #endif
 
-	if (bc->shrink.list.next)
-		unregister_shrinker(&bc->shrink);
+	unregister_shrinker(&bc->shrink);
 
 	mutex_lock(&bc->lock);
 
@@ -999,7 +1000,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 
 		list_del(&ck->list);
 		kfree(ck->k);
-		six_lock_pcpu_free(&ck->c.lock);
+		six_lock_exit(&ck->c.lock);
 		kmem_cache_free(bch2_key_cache, ck);
 	}
 
@@ -1041,17 +1042,15 @@ static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shri
 int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
 {
 	struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
-	int ret;
 
 #ifdef __KERNEL__
 	bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
 	if (!bc->pcpu_freed)
-		return -ENOMEM;
+		return -BCH_ERR_ENOMEM_fs_btree_cache_init;
 #endif
 
-	ret = rhashtable_init(&bc->table, &bch2_btree_key_cache_params);
-	if (ret)
-		return ret;
+	if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params))
+		return -BCH_ERR_ENOMEM_fs_btree_cache_init;
 
 	bc->table_init_done = true;
 
@@ -1059,7 +1058,9 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
 	bc->shrink.count_objects	= bch2_btree_key_cache_count;
 	bc->shrink.scan_objects		= bch2_btree_key_cache_scan;
 	bc->shrink.to_text		= bch2_btree_key_cache_shrinker_to_text;
-	return register_shrinker(&bc->shrink, "%s/btree_key_cache", c->name);
+	if (register_shrinker(&bc->shrink, "%s/btree_key_cache", c->name))
+		return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+	return 0;
 }
 
 void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
@@ -1079,7 +1080,7 @@ void bch2_btree_key_cache_exit(void)
 
 int __init bch2_btree_key_cache_init(void)
 {
-	bch2_key_cache = KMEM_CACHE(bkey_cached, 0);
+	bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT);
 	if (!bch2_key_cache)
 		return -ENOMEM;