]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_cache.c
Move c_src dirs back to toplevel
[bcachefs-tools-debian] / libbcachefs / btree_cache.c
index 7c6769cd17b33338a2ba03b416aa8c7950a0fe77..8e2488a4b58d00a45f78a7c64a6c1e83f4b0ff59 100644 (file)
@@ -9,17 +9,11 @@
 #include "debug.h"
 #include "errcode.h"
 #include "error.h"
+#include "journal.h"
 #include "trace.h"
 
 #include <linux/prefetch.h>
 #include <linux/sched/mm.h>
-#include <linux/seq_buf.h>
-
-#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
-do {                                            \
-       if (shrinker_counter)                    \
-               bc->not_freed_##counter++;       \
-} while (0)
 
 const char * const bch2_btree_node_flags[] = {
 #define x(f)   #f,
@@ -208,7 +202,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
  * this version is for btree nodes that have already been freed (we're not
  * reaping a real btree node)
  */
-static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
 {
        struct btree_cache *bc = &c->btree_cache;
        int ret = 0;
@@ -218,64 +212,38 @@ wait_on_io:
        if (b->flags & ((1U << BTREE_NODE_dirty)|
                        (1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush) {
-                       if (btree_node_dirty(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
-                       else if (btree_node_read_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
-                       else if (btree_node_write_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
+               if (!flush)
                        return -BCH_ERR_ENOMEM_btree_node_reclaim;
-               }
 
                /* XXX: waiting on IO with btree cache lock held */
                bch2_btree_node_wait_on_read(b);
                bch2_btree_node_wait_on_write(b);
        }
 
-       if (!six_trylock_intent(&b->c.lock)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
+       if (!six_trylock_intent(&b->c.lock))
                return -BCH_ERR_ENOMEM_btree_node_reclaim;
-       }
 
-       if (!six_trylock_write(&b->c.lock)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
+       if (!six_trylock_write(&b->c.lock))
                goto out_unlock_intent;
-       }
 
        /* recheck under lock */
        if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush) {
-                       if (btree_node_read_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
-                       else if (btree_node_write_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
+               if (!flush)
                        goto out_unlock;
-               }
                six_unlock_write(&b->c.lock);
                six_unlock_intent(&b->c.lock);
                goto wait_on_io;
        }
 
-       if (btree_node_noevict(b)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
-               goto out_unlock;
-       }
-       if (btree_node_write_blocked(b)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
+       if (btree_node_noevict(b) ||
+           btree_node_write_blocked(b) ||
+           btree_node_will_make_reachable(b))
                goto out_unlock;
-       }
-       if (btree_node_will_make_reachable(b)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
-               goto out_unlock;
-       }
 
        if (btree_node_dirty(b)) {
-               if (!flush) {
-                       BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
+               if (!flush)
                        goto out_unlock;
-               }
                /*
                 * Using the underscore version because we don't want to compact
                 * bsets after the write, since this node is about to be evicted
@@ -305,21 +273,20 @@ out_unlock_intent:
        goto out;
 }
 
-static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
 {
-       return __btree_node_reclaim(c, b, false, shrinker_counter);
+       return __btree_node_reclaim(c, b, false);
 }
 
 static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
 {
-       return __btree_node_reclaim(c, b, true, false);
+       return __btree_node_reclaim(c, b, true);
 }
 
 static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
                                           struct shrink_control *sc)
 {
-       struct bch_fs *c = container_of(shrink, struct bch_fs,
-                                       btree_cache.shrink);
+       struct bch_fs *c = shrink->private_data;
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b, *t;
        unsigned long nr = sc->nr_to_scan;
@@ -361,12 +328,11 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
                if (touched >= nr)
                        goto out;
 
-               if (!btree_node_reclaim(c, b, true)) {
+               if (!btree_node_reclaim(c, b)) {
                        btree_node_data_free(c, b);
                        six_unlock_write(&b->c.lock);
                        six_unlock_intent(&b->c.lock);
                        freed++;
-                       bc->freed++;
                }
        }
 restart:
@@ -375,11 +341,9 @@ restart:
 
                if (btree_node_accessed(b)) {
                        clear_btree_node_accessed(b);
-                       bc->not_freed_access_bit++;
-               } else if (!btree_node_reclaim(c, b, true)) {
+               } else if (!btree_node_reclaim(c, b)) {
                        freed++;
                        btree_node_data_free(c, b);
-                       bc->freed++;
 
                        bch2_btree_node_hash_remove(bc, b);
                        six_unlock_write(&b->c.lock);
@@ -420,8 +384,7 @@ out_nounlock:
 static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
                                            struct shrink_control *sc)
 {
-       struct bch_fs *c = container_of(shrink, struct bch_fs,
-                                       btree_cache.shrink);
+       struct bch_fs *c = shrink->private_data;
        struct btree_cache *bc = &c->btree_cache;
 
        if (bch2_btree_shrinker_disabled)
@@ -430,25 +393,13 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
        return btree_cache_can_free(bc);
 }
 
-static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
-{
-       struct bch_fs *c = container_of(shrink, struct bch_fs,
-                                       btree_cache.shrink);
-       char *cbuf;
-       size_t buflen = seq_buf_get_buf(s, &cbuf);
-       struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
-
-       bch2_btree_cache_to_text(&out, &c->btree_cache);
-       seq_buf_commit(s, out.pos);
-}
-
 void bch2_fs_btree_cache_exit(struct bch_fs *c)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
        unsigned i, flags;
 
-       unregister_shrinker(&bc->shrink);
+       shrinker_free(bc->shrink);
 
        /* vfree() can allocate memory: */
        flags = memalloc_nofs_save();
@@ -474,14 +425,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
                BUG_ON(btree_node_read_in_flight(b) ||
                       btree_node_write_in_flight(b));
 
-               if (btree_node_dirty(b))
-                       bch2_btree_complete_write(c, b, btree_current_write(b));
-               clear_btree_node_dirty_acct(c, b);
-
                btree_node_data_free(c, b);
        }
 
-       BUG_ON(atomic_read(&c->btree_cache.dirty));
+       BUG_ON(!bch2_journal_error(&c->journal) &&
+              atomic_read(&c->btree_cache.dirty));
 
        list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
 
@@ -502,6 +450,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 int bch2_fs_btree_cache_init(struct bch_fs *c)
 {
        struct btree_cache *bc = &c->btree_cache;
+       struct shrinker *shrink;
        unsigned i;
        int ret = 0;
 
@@ -521,13 +470,15 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
 
        mutex_init(&c->verify_lock);
 
-       bc->shrink.count_objects        = bch2_btree_cache_count;
-       bc->shrink.scan_objects         = bch2_btree_cache_scan;
-       bc->shrink.to_text              = bch2_btree_cache_shrinker_to_text;
-       bc->shrink.seeks                = 4;
-       ret = register_shrinker(&bc->shrink, "%s/btree_cache", c->name);
-       if (ret)
+       shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
+       if (!shrink)
                goto err;
+       bc->shrink = shrink;
+       shrink->count_objects   = bch2_btree_cache_count;
+       shrink->scan_objects    = bch2_btree_cache_scan;
+       shrink->seeks           = 4;
+       shrink->private_data    = c;
+       shrinker_register(shrink);
 
        return 0;
 err:
@@ -549,19 +500,21 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
  * cannibalize_bucket() will take. This means every time we unlock the root of
  * the btree, we need to release this lock if we have it held.
  */
-void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
+void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
 {
+       struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
 
        if (bc->alloc_lock == current) {
-               trace_and_count(c, btree_cache_cannibalize_unlock, c);
+               trace_and_count(c, btree_cache_cannibalize_unlock, trans);
                bc->alloc_lock = NULL;
                closure_wake_up(&bc->alloc_wait);
        }
 }
 
-int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
+int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl)
 {
+       struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
        struct task_struct *old;
 
@@ -570,7 +523,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
                goto success;
 
        if (!cl) {
-               trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
+               trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
                return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
        }
 
@@ -584,11 +537,11 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
                goto success;
        }
 
-       trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
+       trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
        return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
 
 success:
-       trace_and_count(c, btree_cache_cannibalize_lock, c);
+       trace_and_count(c, btree_cache_cannibalize_lock, trans);
        return 0;
 }
 
@@ -598,7 +551,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
        struct btree *b;
 
        list_for_each_entry_reverse(b, &bc->live, list)
-               if (!btree_node_reclaim(c, b, false))
+               if (!btree_node_reclaim(c, b))
                        return b;
 
        while (1) {
@@ -634,7 +587,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
         * disk node. Check the freed list before allocating a new one:
         */
        list_for_each_entry(b, freed, list)
-               if (!btree_node_reclaim(c, b, false)) {
+               if (!btree_node_reclaim(c, b)) {
                        list_del_init(&b->list);
                        goto got_node;
                }
@@ -660,7 +613,7 @@ got_node:
         * the list. Check if there's any freed nodes there:
         */
        list_for_each_entry(b2, &bc->freeable, list)
-               if (!btree_node_reclaim(c, b2, false)) {
+               if (!btree_node_reclaim(c, b2)) {
                        swap(b->data, b2->data);
                        swap(b->aux_data, b2->aux_data);
                        btree_node_to_freedlist(bc, b2);
@@ -722,7 +675,7 @@ err:
 
                mutex_unlock(&bc->lock);
 
-               trace_and_count(c, btree_cache_cannibalize, c);
+               trace_and_count(c, btree_cache_cannibalize, trans);
                goto out;
        }
 
@@ -766,12 +719,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        if (IS_ERR(b))
                return b;
 
-       /*
-        * Btree nodes read in from disk should not have the accessed bit set
-        * initially, so that linear scans don't thrash the cache:
-        */
-       clear_btree_node_accessed(b);
-
        bkey_copy(&b->key, k);
        if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
                /* raced with another fill: */
@@ -798,7 +745,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        if (path && sync)
                bch2_trans_unlock_noassert(trans);
 
-       bch2_btree_node_read(c, b, sync);
+       bch2_btree_node_read(trans, b, sync);
 
        if (!sync)
                return NULL;
@@ -832,12 +779,12 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
               "btree node header doesn't match ptr\n"
               "btree %s level %u\n"
               "ptr: ",
-              bch2_btree_ids[b->c.btree_id], b->c.level);
+              bch2_btree_id_str(b->c.btree_id), b->c.level);
        bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
 
        prt_printf(&buf, "\nheader: btree %s level %llu\n"
               "min ",
-              bch2_btree_ids[BTREE_NODE_ID(b->data)],
+              bch2_btree_id_str(BTREE_NODE_ID(b->data)),
               BTREE_NODE_LEVEL(b->data));
        bch2_bpos_to_text(&buf, b->data->min_key);
 
@@ -1088,7 +1035,7 @@ retry:
                        goto retry;
 
                if (IS_ERR(b) &&
-                   !bch2_btree_cache_cannibalize_lock(c, NULL))
+                   !bch2_btree_cache_cannibalize_lock(trans, NULL))
                        goto retry;
 
                if (IS_ERR(b))
@@ -1136,7 +1083,7 @@ lock_node:
        EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
        btree_check_header(c, b);
 out:
-       bch2_btree_cache_cannibalize_unlock(c);
+       bch2_btree_cache_cannibalize_unlock(trans);
        return b;
 }
 
@@ -1200,8 +1147,21 @@ wait_on_io:
        six_unlock_intent(&b->c.lock);
 }
 
-void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
-                            const struct btree *b)
+const char *bch2_btree_id_str(enum btree_id btree)
+{
+       return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)";
+}
+
+void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
+{
+       prt_printf(out, "%s level %u/%u\n  ",
+              bch2_btree_id_str(b->c.btree_id),
+              b->c.level,
+              bch2_btree_id_root(c, b->c.btree_id)->level);
+       bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+}
+
+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
 {
        struct bset_stats stats;
 
@@ -1243,21 +1203,9 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
               stats.failed);
 }
 
-void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
+void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
 {
-       prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
-       prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
-       prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
-
-       prt_printf(out, "freed:\t\t\t\t%u\n", bc->freed);
-       prt_printf(out, "not freed, dirty:\t\t%u\n", bc->not_freed_dirty);
-       prt_printf(out, "not freed, write in flight:\t%u\n", bc->not_freed_write_in_flight);
-       prt_printf(out, "not freed, read in flight:\t%u\n", bc->not_freed_read_in_flight);
-       prt_printf(out, "not freed, lock intent failed:\t%u\n", bc->not_freed_lock_intent);
-       prt_printf(out, "not freed, lock write failed:\t%u\n", bc->not_freed_lock_write);
-       prt_printf(out, "not freed, access bit:\t\t%u\n", bc->not_freed_access_bit);
-       prt_printf(out, "not freed, no evict failed:\t%u\n", bc->not_freed_noevict);
-       prt_printf(out, "not freed, write blocked:\t%u\n", bc->not_freed_write_blocked);
-       prt_printf(out, "not freed, will make reachable:\t%u\n", bc->not_freed_will_make_reachable);
-
+       prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
+       prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
+       prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
 }