]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 6ddf061e68 bcachefs: Use a genradix for reading journal...
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 8 Apr 2022 23:19:05 +0000 (19:19 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 8 Apr 2022 23:26:30 +0000 (19:26 -0400)
15 files changed:
.bcachefs_revision
Makefile
cmd_debug.c
include/linux/generic-radix-tree.h
include/linux/kmemleak.h [new file with mode: 0644]
include/linux/types.h
libbcachefs/bcachefs.h
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_io.c
libbcachefs/journal_io.h
libbcachefs/recovery.c
libbcachefs/recovery.h
libbcachefs/super.c
linux/generic-radix-tree.c

index dbcc3d9135b9f2f44a07ff39a8e900b23a90150d..a7745cdfe46b9da3a3525cbac684e5ba16275d08 100644 (file)
@@ -1 +1 @@
-d3da360412f7ca4677975683d3999c038b0cacd7
+6ddf061e68560a2bb263b126af7e894a6c1afb5f
index 266f98a1ac1cbfe3c2741faf717037d6e182b787..bed43bdab229ce15a94b53c0dbc4b0f1235bad16 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -189,6 +189,12 @@ update-bcachefs-sources:
        git add include/linux/list_nulls.h
        cp $(LINUX_DIR)/include/linux/poison.h include/linux/
        git add include/linux/poison.h
+       cp $(LINUX_DIR)/include/linux/generic-radix-tree.h include/linux/
+       git add include/linux/generic-radix-tree.h
+       cp $(LINUX_DIR)/lib/generic-radix-tree.c linux/
+       git add linux/generic-radix-tree.c
+       cp $(LINUX_DIR)/include/linux/kmemleak.h include/linux/
+       git add include/linux/kmemleak.h
        cp $(LINUX_DIR)/scripts/Makefile.compiler ./
        git add Makefile.compiler
        $(RM) libbcachefs/*.mod.c
index 495c0687eada5fe800cae45889b13a8546554c64..37232481bc4f4a0edf11f48def37ebdd7bc21136 100644 (file)
@@ -624,16 +624,20 @@ int cmd_list_journal(int argc, char *argv[])
        if (IS_ERR(c))
                die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
 
-       struct journal_replay *p;
+       struct journal_replay *p, **_p;
+       struct genradix_iter iter;
        struct jset_entry *entry;
        struct printbuf buf = PRINTBUF;
 
-       list_for_each_entry(p, &c->journal_entries, list) {
+       genradix_for_each(&c->journal_entries, iter, _p) {
+               p = *_p;
+               if (!p)
+                       continue;
+
                bool blacklisted =
                        bch2_journal_seq_is_blacklisted(c,
                                        le64_to_cpu(p->j.seq), false);
 
-
                if (blacklisted)
                        printf("blacklisted ");
 
index f09689dafb008114f9199167f6d18918d0129a9d..c74b7376990d53301bfbd55acb414dbb491d8184 100644 (file)
@@ -2,7 +2,7 @@
 #define _LINUX_GENERIC_RADIX_TREE_H
 
 /**
- * DOC: Generic radix trees/sparse arrays:
+ * DOC: Generic radix trees/sparse arrays
  *
  * Very simple and minimalistic, supporting arbitrary size entries up to
  * PAGE_SIZE.
 
 #include <asm/page.h>
 #include <linux/bug.h>
-#include <linux/kernel.h>
+#include <linux/limits.h>
 #include <linux/log2.h>
+#include <linux/math.h>
+#include <linux/types.h>
 
 struct genradix_root;
 
 struct __genradix {
-       struct genradix_root __rcu      *root;
+       struct genradix_root            *root;
 };
 
 /*
@@ -115,6 +117,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
 
 #define __genradix_cast(_radix)                (typeof((_radix)->type[0]) *)
 #define __genradix_obj_size(_radix)    sizeof((_radix)->type[0])
+#define __genradix_objs_per_page(_radix)                       \
+       (PAGE_SIZE / sizeof((_radix)->type[0]))
+#define __genradix_page_remainder(_radix)                      \
+       (PAGE_SIZE % sizeof((_radix)->type[0]))
+
 #define __genradix_idx_to_offset(_radix, _idx)                 \
        __idx_to_offset(_idx, __genradix_obj_size(_radix))
 
@@ -178,14 +185,30 @@ void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
 #define genradix_iter_peek(_iter, _radix)                      \
        (__genradix_cast(_radix)                                \
         __genradix_iter_peek(_iter, &(_radix)->tree,           \
-                             PAGE_SIZE / __genradix_obj_size(_radix)))
+                       __genradix_objs_per_page(_radix)))
+
+void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *,
+                               size_t, size_t);
+
+/**
+ * genradix_iter_peek - get first entry at or below iterator's current
+ *                     position
+ * @_iter:     a genradix_iter
+ * @_radix:    genradix being iterated over
+ *
+ * If no more entries exist at or below @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek_prev(_iter, _radix)                 \
+       (__genradix_cast(_radix)                                \
+        __genradix_iter_peek_prev(_iter, &(_radix)->tree,      \
+                       __genradix_objs_per_page(_radix),       \
+                       __genradix_obj_size(_radix) +           \
+                       __genradix_page_remainder(_radix)))
 
 static inline void __genradix_iter_advance(struct genradix_iter *iter,
                                           size_t obj_size)
 {
-       size_t new_offset = iter->offset + obj_size;
-
-       if (new_offset < iter->offset) {
+       if (iter->offset + obj_size < iter->offset) {
                iter->offset    = SIZE_MAX;
                iter->pos       = SIZE_MAX;
                return;
@@ -203,6 +226,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
 #define genradix_iter_advance(_iter, _radix)                   \
        __genradix_iter_advance(_iter, __genradix_obj_size(_radix))
 
+static inline void __genradix_iter_rewind(struct genradix_iter *iter,
+                                         size_t obj_size)
+{
+       if (iter->offset == 0 ||
+           iter->offset == SIZE_MAX) {
+               iter->offset = SIZE_MAX;
+               return;
+       }
+
+       if ((iter->offset & (PAGE_SIZE - 1)) == 0)
+               iter->offset -= PAGE_SIZE % obj_size;
+
+       iter->offset -= obj_size;
+       iter->pos--;
+}
+
+#define genradix_iter_rewind(_iter, _radix)                    \
+       __genradix_iter_rewind(_iter, __genradix_obj_size(_radix))
+
 #define genradix_for_each_from(_radix, _iter, _p, _start)      \
        for (_iter = genradix_iter_init(_radix, _start);        \
             (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
@@ -220,6 +262,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
 #define genradix_for_each(_radix, _iter, _p)                   \
        genradix_for_each_from(_radix, _iter, _p, 0)
 
+#define genradix_last_pos(_radix)                              \
+       (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1)
+
+/**
+ * genradix_for_each_reverse - iterate over entry in a genradix, reverse order
+ * @_radix:    genradix to iterate over
+ * @_iter:     a genradix_iter to track current position
+ * @_p:                pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each_reverse(_radix, _iter, _p)           \
+       for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\
+            (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\
+            genradix_iter_rewind(&_iter, _radix))
+
 int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
 
 /**
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
new file mode 100644 (file)
index 0000000..34684b2
--- /dev/null
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * include/linux/kmemleak.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ */
+
+#ifndef __KMEMLEAK_H
+#define __KMEMLEAK_H
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#ifdef CONFIG_DEBUG_KMEMLEAK
+
+extern void kmemleak_init(void) __init;
+extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+                          gfp_t gfp) __ref;
+extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
+                                 gfp_t gfp) __ref;
+extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
+                            gfp_t gfp) __ref;
+extern void kmemleak_free(const void *ptr) __ref;
+extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
+extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
+extern void kmemleak_update_trace(const void *ptr) __ref;
+extern void kmemleak_not_leak(const void *ptr) __ref;
+extern void kmemleak_ignore(const void *ptr) __ref;
+extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
+extern void kmemleak_no_scan(const void *ptr) __ref;
+extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
+                               gfp_t gfp) __ref;
+extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref;
+extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref;
+extern void kmemleak_ignore_phys(phys_addr_t phys) __ref;
+
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+                                           int min_count, slab_flags_t flags,
+                                           gfp_t gfp)
+{
+       if (!(flags & SLAB_NOLEAKTRACE))
+               kmemleak_alloc(ptr, size, min_count, gfp);
+}
+
+static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags)
+{
+       if (!(flags & SLAB_NOLEAKTRACE))
+               kmemleak_free(ptr);
+}
+
+static inline void kmemleak_erase(void **ptr)
+{
+       *ptr = NULL;
+}
+
+#else
+
+static inline void kmemleak_init(void)
+{
+}
+static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+                                 gfp_t gfp)
+{
+}
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+                                           int min_count, slab_flags_t flags,
+                                           gfp_t gfp)
+{
+}
+static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
+                                        gfp_t gfp)
+{
+}
+static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
+                                   gfp_t gfp)
+{
+}
+static inline void kmemleak_free(const void *ptr)
+{
+}
+static inline void kmemleak_free_part(const void *ptr, size_t size)
+{
+}
+static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags)
+{
+}
+static inline void kmemleak_free_percpu(const void __percpu *ptr)
+{
+}
+static inline void kmemleak_update_trace(const void *ptr)
+{
+}
+static inline void kmemleak_not_leak(const void *ptr)
+{
+}
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
+static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
+{
+}
+static inline void kmemleak_erase(void **ptr)
+{
+}
+static inline void kmemleak_no_scan(const void *ptr)
+{
+}
+static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
+                                      int min_count, gfp_t gfp)
+{
+}
+static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size)
+{
+}
+static inline void kmemleak_not_leak_phys(phys_addr_t phys)
+{
+}
+static inline void kmemleak_ignore_phys(phys_addr_t phys)
+{
+}
+
+#endif /* CONFIG_DEBUG_KMEMLEAK */
+
+#endif /* __KMEMLEAK_H */
index 7eb2222f8ce265f8cffc85072e80f20b25053606..f0efcd2e437fdab46fb54caa545b8e39b4152f37 100644 (file)
@@ -79,4 +79,8 @@ typedef u64 sector_t;
 
 typedef int (*cmp_func_t)(const void *a, const void *b);
 
+typedef unsigned int __bitwise slab_flags_t;
+typedef u64 phys_addr_t;
+struct vm_struct;
+
 #endif /* _TOOLS_LINUX_TYPES_H_ */
index ab6df637ee26895678bfea8d18eea7e9da6026e4..e29a0891134bca2f9538c489d59562a9e2a7fa7e 100644 (file)
@@ -894,7 +894,8 @@ struct bch_fs {
        mempool_t               btree_bounce_pool;
 
        struct journal          journal;
-       struct list_head        journal_entries;
+       GENRADIX(struct journal_replay *) journal_entries;
+       u64                     journal_entries_base_seq;
        struct journal_keys     journal_keys;
        struct list_head        journal_iters;
 
index d01b1cd4000d48e8fad2d69e0494613f2a5d9dcc..75be0a5f708c089e496e123d7f67b4fd4879eff7 100644 (file)
@@ -1040,17 +1040,25 @@ void bch2_fs_journal_stop(struct journal *j)
        cancel_delayed_work_sync(&j->write_work);
 }
 
-int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
-                         struct list_head *journal_entries)
+int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct journal_entry_pin_list *p;
-       struct journal_replay *i;
+       struct journal_replay *i, **_i;
+       struct genradix_iter iter;
+       bool had_entries = false;
+       unsigned ptr;
        u64 last_seq = cur_seq, nr, seq;
 
-       if (!list_empty(journal_entries))
-               last_seq = le64_to_cpu(list_last_entry(journal_entries,
-                               struct journal_replay, list)->j.last_seq);
+       genradix_for_each_reverse(&c->journal_entries, iter, _i) {
+               i = *_i;
+
+               if (!i || i->ignore)
+                       continue;
+
+               last_seq = le64_to_cpu(i->j.last_seq);
+               break;
+       }
 
        nr = cur_seq - last_seq;
 
@@ -1072,14 +1080,14 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
        j->pin.back             = cur_seq;
        atomic64_set(&j->seq, cur_seq - 1);
 
-       if (list_empty(journal_entries))
-               j->last_empty_seq = cur_seq - 1;
-
        fifo_for_each_entry_ptr(p, &j->pin, seq)
                journal_pin_list_init(p, 1);
 
-       list_for_each_entry(i, journal_entries, list) {
-               unsigned ptr;
+       genradix_for_each(&c->journal_entries, iter, _i) {
+               i = *_i;
+
+               if (!i || i->ignore)
+                       continue;
 
                seq = le64_to_cpu(i->j.seq);
                BUG_ON(seq >= cur_seq);
@@ -1095,9 +1103,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
                p->devs.nr = 0;
                for (ptr = 0; ptr < i->nr_ptrs; ptr++)
                        bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
+
+               had_entries = true;
        }
 
-       if (list_empty(journal_entries))
+       if (!had_entries)
                j->last_empty_seq = cur_seq;
 
        spin_lock(&j->lock);
index e7321c327d9d137645d8e27db315a2ffc7e1b8d4..7fee0c05aa7d72069b49ff6a3872ec04d8ac33e8 100644 (file)
@@ -512,7 +512,7 @@ int bch2_dev_journal_alloc(struct bch_dev *);
 void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
 
 void bch2_fs_journal_stop(struct journal *);
-int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
+int bch2_fs_journal_start(struct journal *, u64);
 
 void bch2_dev_journal_exit(struct bch_dev *);
 int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
index 5e08932c1f7c7d19fe2b0cc082c1094dd444a456..eb20bf05cba84e3407ac15af8f9f1c19e291632b 100644 (file)
 
 #include <trace/events/bcachefs.h>
 
-static void __journal_replay_free(struct journal_replay *i)
+static inline u32 journal_entry_radix_idx(struct bch_fs *c,
+                                         struct jset *j)
 {
-       list_del(&i->list);
+       return (le64_to_cpu(j->seq) - c->journal_entries_base_seq) & (~0U >> 1);
+}
+
+static void __journal_replay_free(struct bch_fs *c,
+                                 struct journal_replay *i)
+{
+       struct journal_replay **p =
+               genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, &i->j));
+
+       BUG_ON(*p != i);
+       *p = NULL;
        kvpfree(i, offsetof(struct journal_replay, j) +
                vstruct_bytes(&i->j));
-
 }
 
 static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
@@ -30,13 +40,12 @@ static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
        i->ignore = true;
 
        if (!c->opts.read_entire_journal)
-               __journal_replay_free(i);
+               __journal_replay_free(c, i);
 }
 
 struct journal_list {
        struct closure          cl;
        struct mutex            lock;
-       struct list_head        *head;
        int                     ret;
 };
 
@@ -52,19 +61,30 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
                             struct journal_list *jlist, struct jset *j,
                             bool bad)
 {
-       struct journal_replay *i, *pos, *dup = NULL;
+       struct genradix_iter iter;
+       struct journal_replay **_i, *i, *dup;
        struct journal_ptr *ptr;
-       struct list_head *where;
        size_t bytes = vstruct_bytes(j);
        u64 last_seq = 0;
        int ret = JOURNAL_ENTRY_ADD_OK;
 
+       /*
+        * Xarrays are indexed by a ulong, not a u64, so we can't index them by
+        * sequence number directly:
+        * Assume instead that they will all fall within the range of +-2billion
+        * of the filrst one we find.
+        */
+       if (!c->journal_entries_base_seq)
+               c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX);
+
+#if 0
        list_for_each_entry_reverse(i, jlist->head, list) {
                if (!JSET_NO_FLUSH(&i->j)) {
                        last_seq = le64_to_cpu(i->j.last_seq);
                        break;
                }
        }
+#endif
 
        /* Is this entry older than the range we need? */
        if (!c->opts.read_entire_journal &&
@@ -75,28 +95,20 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
 
        /* Drop entries we don't need anymore */
        if (!JSET_NO_FLUSH(j)) {
-               list_for_each_entry_safe(i, pos, jlist->head, list) {
+               genradix_for_each(&c->journal_entries, iter, _i) {
+                       i = *_i;
+
+                       if (!i)
+                               continue;
+
                        if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
                                break;
                        journal_replay_free(c, i);
                }
        }
 
-       list_for_each_entry_reverse(i, jlist->head, list) {
-               if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
-                       where = &i->list;
-                       goto add;
-               }
-       }
-
-       where = jlist->head;
-add:
-       dup = where->next != jlist->head
-               ? container_of(where->next, struct journal_replay, list)
-               : NULL;
-
-       if (dup && le64_to_cpu(j->seq) != le64_to_cpu(dup->j.seq))
-               dup = NULL;
+       _i = genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, j));
+       dup = _i ? *_i : NULL;
 
        /*
         * Duplicate journal entries? If so we want the one that didn't have a
@@ -132,10 +144,19 @@ add:
        if (dup) {
                i->nr_ptrs = dup->nr_ptrs;
                memcpy(i->ptrs, dup->ptrs, sizeof(dup->ptrs));
-               __journal_replay_free(dup);
+               __journal_replay_free(c, dup);
        }
 
-       list_add(&i->list, where);
+       _i = genradix_ptr_alloc(&c->journal_entries,
+                               journal_entry_radix_idx(c, &i->j),
+                               GFP_KERNEL);
+       if (!_i) {
+               bch_err(c, "failed to allocate c->journal_entries entry");
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       *_i = i;
 found:
        for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) {
                if (ptr->dev == ca->dev_idx) {
@@ -914,7 +935,8 @@ static void bch2_journal_read_device(struct closure *cl)
        struct bch_fs *c = ca->fs;
        struct journal_list *jlist =
                container_of(cl->parent, struct journal_list, cl);
-       struct journal_replay *r;
+       struct journal_replay *r, **_r;
+       struct genradix_iter iter;
        struct journal_read_buf buf = { NULL, 0 };
        u64 min_seq = U64_MAX;
        unsigned i;
@@ -957,7 +979,12 @@ static void bch2_journal_read_device(struct closure *cl)
        ja->sectors_free = ca->mi.bucket_size;
 
        mutex_lock(&jlist->lock);
-       list_for_each_entry(r, jlist->head, list) {
+       genradix_for_each(&c->journal_entries, iter, _r) {
+               r = *_r;
+
+               if (!r)
+                       continue;
+
                for (i = 0; i < r->nr_ptrs; i++) {
                        if (r->ptrs[i].dev == ca->dev_idx &&
                            sector_to_bucket(ca, r->ptrs[i].sector) == ja->buckets[ja->cur_idx]) {
@@ -1023,11 +1050,11 @@ void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
        }
 }
 
-int bch2_journal_read(struct bch_fs *c, struct list_head *list,
-                     u64 *blacklist_seq, u64 *start_seq)
+int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
 {
        struct journal_list jlist;
-       struct journal_replay *i, *t;
+       struct journal_replay *i, **_i, *prev = NULL;
+       struct genradix_iter radix_iter;
        struct bch_dev *ca;
        unsigned iter;
        struct printbuf buf = PRINTBUF;
@@ -1038,7 +1065,6 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
 
        closure_init_stack(&jlist.cl);
        mutex_init(&jlist.lock);
-       jlist.head = list;
        jlist.ret = 0;
 
        for_each_member_device(ca, c, iter) {
@@ -1062,22 +1088,21 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
        if (jlist.ret)
                return jlist.ret;
 
-       if (list_empty(list)) {
-               bch_info(c, "journal read done, but no entries found");
-               return 0;
-       }
-
-       i = list_last_entry(list, struct journal_replay, list);
-       *start_seq = le64_to_cpu(i->j.seq) + 1;
+       *start_seq = 0;
 
        /*
         * Find most recent flush entry, and ignore newer non flush entries -
         * those entries will be blacklisted:
         */
-       list_for_each_entry_safe_reverse(i, t, list, list) {
-               if (i->ignore)
+       genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) {
+               i = *_i;
+
+               if (!i || i->ignore)
                        continue;
 
+               if (!*start_seq)
+                       *start_seq = le64_to_cpu(i->j.seq) + 1;
+
                if (!JSET_NO_FLUSH(&i->j)) {
                        last_seq        = le64_to_cpu(i->j.last_seq);
                        *blacklist_seq  = le64_to_cpu(i->j.seq) + 1;
@@ -1087,6 +1112,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
                journal_replay_free(c, i);
        }
 
+       if (!*start_seq) {
+               bch_info(c, "journal read done, but no entries found");
+               return 0;
+       }
+
        if (!last_seq) {
                fsck_err(c, "journal read done, but no entries found after dropping non-flushes");
                ret = -1;
@@ -1094,8 +1124,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
        }
 
        /* Drop blacklisted entries and entries older than last_seq: */
-       list_for_each_entry_safe(i, t, list, list) {
-               if (i->ignore)
+       genradix_for_each(&c->journal_entries, radix_iter, _i) {
+               i = *_i;
+
+               if (!i || i->ignore)
                        continue;
 
                seq = le64_to_cpu(i->j.seq);
@@ -1114,8 +1146,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
 
        /* Check for missing entries: */
        seq = last_seq;
-       list_for_each_entry(i, list, list) {
-               if (i->ignore)
+       genradix_for_each(&c->journal_entries, radix_iter, _i) {
+               i = *_i;
+
+               if (!i || i->ignore)
                        continue;
 
                BUG_ON(seq > le64_to_cpu(i->j.seq));
@@ -1137,11 +1171,9 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
                               !bch2_journal_seq_is_blacklisted(c, seq, false))
                                seq++;
 
-                       if (i->list.prev != list) {
-                               struct journal_replay *p = list_prev_entry(i, list);
-
-                               bch2_journal_ptrs_to_text(&buf1, c, p);
-                               pr_buf(&buf1, " size %zu", vstruct_sectors(&p->j, c->block_bits));
+                       if (prev) {
+                               bch2_journal_ptrs_to_text(&buf1, c, prev);
+                               pr_buf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
                        } else
                                pr_buf(&buf1, "(none)");
                        bch2_journal_ptrs_to_text(&buf2, c, i);
@@ -1158,10 +1190,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
                        printbuf_exit(&buf2);
                }
 
+               prev = i;
                seq++;
        }
 
-       list_for_each_entry(i, list, list) {
+       genradix_for_each(&c->journal_entries, radix_iter, _i) {
                struct jset_entry *entry;
                struct bkey_i *k, *_n;
                struct bch_replicas_padded replicas = {
@@ -1170,7 +1203,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
                };
                unsigned ptr;
 
-               if (i->ignore)
+               i = *_i;
+               if (!i || i->ignore)
                        continue;
 
                ret = jset_validate_entries(c, &i->j, READ);
index f2001835e43e215f725331f1cc591f736a29b6ef..30e995c81fc4d72e97efc1ea77977e681ec02fc7 100644 (file)
@@ -7,7 +7,6 @@
  * during cache_registration
  */
 struct journal_replay {
-       struct list_head        list;
        struct journal_ptr {
                u8              dev;
                u32             bucket;
@@ -53,7 +52,7 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
 void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
                               struct journal_replay *);
 
-int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *);
+int bch2_journal_read(struct bch_fs *, u64 *, u64 *);
 
 void bch2_journal_write(struct closure *);
 
index ac75f44d0dcd0cae2f714816344ba600fbe59778..9aa507c19198b8a3f21d5ef0297146beacbeb8c7 100644 (file)
@@ -198,7 +198,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
        if (keys->nr == keys->size) {
                struct journal_keys new_keys = {
                        .nr                     = keys->nr,
-                       .size                   = keys->size * 2,
+                       .size                   = max(keys->size, 8UL) * 2,
                        .journal_seq_base       = keys->journal_seq_base,
                };
 
@@ -433,16 +433,16 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
 
 /* sort and dedup all keys in the journal: */
 
-void bch2_journal_entries_free(struct list_head *list)
+void bch2_journal_entries_free(struct bch_fs *c)
 {
-
-       while (!list_empty(list)) {
-               struct journal_replay *i =
-                       list_first_entry(list, struct journal_replay, list);
-               list_del(&i->list);
-               kvpfree(i, offsetof(struct journal_replay, j) +
-                       vstruct_bytes(&i->j));
-       }
+       struct journal_replay **i;
+       struct genradix_iter iter;
+
+       genradix_for_each(&c->journal_entries, iter, i)
+               if (*i)
+                       kvpfree(*i, offsetof(struct journal_replay, j) +
+                               vstruct_bytes(&(*i)->j));
+       genradix_free(&c->journal_entries);
 }
 
 /*
@@ -474,57 +474,62 @@ void bch2_journal_keys_free(struct journal_keys *keys)
        keys->nr = keys->gap = keys->size = 0;
 }
 
-static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
+static int journal_keys_sort(struct bch_fs *c)
 {
-       struct journal_replay *i;
+       struct genradix_iter iter;
+       struct journal_replay *i, **_i;
        struct jset_entry *entry;
        struct bkey_i *k, *_n;
-       struct journal_keys keys = { NULL };
+       struct journal_keys *keys = &c->journal_keys;
        struct journal_key *src, *dst;
        size_t nr_keys = 0;
 
-       if (list_empty(journal_entries))
-               return keys;
+       genradix_for_each(&c->journal_entries, iter, _i) {
+               i = *_i;
 
-       list_for_each_entry(i, journal_entries, list) {
-               if (i->ignore)
+               if (!i || i->ignore)
                        continue;
 
-               if (!keys.journal_seq_base)
-                       keys.journal_seq_base = le64_to_cpu(i->j.seq);
+               if (!keys->journal_seq_base)
+                       keys->journal_seq_base = le64_to_cpu(i->j.seq);
 
                for_each_jset_key(k, _n, entry, &i->j)
                        nr_keys++;
        }
 
-       keys.size = roundup_pow_of_two(nr_keys);
+       if (!nr_keys)
+               return 0;
+
+       keys->size = roundup_pow_of_two(nr_keys);
 
-       keys.d = kvmalloc(sizeof(keys.d[0]) * keys.size, GFP_KERNEL);
-       if (!keys.d)
-               goto err;
+       keys->d = kvmalloc(sizeof(keys->d[0]) * keys->size, GFP_KERNEL);
+       if (!keys->d)
+               return -ENOMEM;
+
+       genradix_for_each(&c->journal_entries, iter, _i) {
+               i = *_i;
 
-       list_for_each_entry(i, journal_entries, list) {
-               if (i->ignore)
+               if (!i || i->ignore)
                        continue;
 
-               BUG_ON(le64_to_cpu(i->j.seq) - keys.journal_seq_base > U32_MAX);
+               BUG_ON(le64_to_cpu(i->j.seq) - keys->journal_seq_base > U32_MAX);
 
                for_each_jset_key(k, _n, entry, &i->j)
-                       keys.d[keys.nr++] = (struct journal_key) {
+                       keys->d[keys->nr++] = (struct journal_key) {
                                .btree_id       = entry->btree_id,
                                .level          = entry->level,
                                .k              = k,
                                .journal_seq    = le64_to_cpu(i->j.seq) -
-                                       keys.journal_seq_base,
+                                       keys->journal_seq_base,
                                .journal_offset = k->_data - i->j._data,
                        };
        }
 
-       sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_key_cmp, NULL);
+       sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL);
 
-       src = dst = keys.d;
-       while (src < keys.d + keys.nr) {
-               while (src + 1 < keys.d + keys.nr &&
+       src = dst = keys->d;
+       while (src < keys->d + keys->nr) {
+               while (src + 1 < keys->d + keys->nr &&
                       src[0].btree_id  == src[1].btree_id &&
                       src[0].level     == src[1].level &&
                       !bpos_cmp(src[0].k->k.p, src[1].k->k.p))
@@ -533,10 +538,9 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
                *dst++ = *src++;
        }
 
-       keys.nr = dst - keys.d;
-       keys.gap = keys.nr;
-err:
-       return keys;
+       keys->nr = dst - keys->d;
+       keys->gap = keys->nr;
+       return 0;
 }
 
 /* journal replay: */
@@ -752,10 +756,8 @@ static int journal_replay_entry_early(struct bch_fs *c,
 }
 
 static int journal_replay_early(struct bch_fs *c,
-                               struct bch_sb_field_clean *clean,
-                               struct list_head *journal)
+                               struct bch_sb_field_clean *clean)
 {
-       struct journal_replay *i;
        struct jset_entry *entry;
        int ret;
 
@@ -768,8 +770,13 @@ static int journal_replay_early(struct bch_fs *c,
                                return ret;
                }
        } else {
-               list_for_each_entry(i, journal, list) {
-                       if (i->ignore)
+               struct genradix_iter iter;
+               struct journal_replay *i, **_i;
+
+               genradix_for_each(&c->journal_entries, iter, _i) {
+                       i = *_i;
+
+                       if (!i || i->ignore)
                                continue;
 
                        vstruct_for_each(&i->j, entry) {
@@ -1094,17 +1101,17 @@ int bch2_fs_recovery(struct bch_fs *c)
        }
 
        if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
-               struct journal_replay *i;
+               struct genradix_iter iter;
+               struct journal_replay **i;
 
                bch_verbose(c, "starting journal read");
-               ret = bch2_journal_read(c, &c->journal_entries,
-                                       &blacklist_seq, &journal_seq);
+               ret = bch2_journal_read(c, &blacklist_seq, &journal_seq);
                if (ret)
                        goto err;
 
-               list_for_each_entry_reverse(i, &c->journal_entries, list)
-                       if (!i->ignore) {
-                               last_journal_entry = &i->j;
+               genradix_for_each_reverse(&c->journal_entries, iter, i)
+                       if (*i && !(*i)->ignore) {
+                               last_journal_entry = &(*i)->j;
                                break;
                        }
 
@@ -1122,11 +1129,9 @@ int bch2_fs_recovery(struct bch_fs *c)
                        goto use_clean;
                }
 
-               c->journal_keys = journal_keys_sort(&c->journal_entries);
-               if (!c->journal_keys.d) {
-                       ret = -ENOMEM;
+               ret = journal_keys_sort(c);
+               if (ret)
                        goto err;
-               }
 
                if (c->sb.clean && last_journal_entry) {
                        ret = verify_superblock_clean(c, &clean,
@@ -1155,7 +1160,7 @@ use_clean:
 
        zero_out_btree_mem_ptr(&c->journal_keys);
 
-       ret = journal_replay_early(c, clean, &c->journal_entries);
+       ret = journal_replay_early(c, clean);
        if (ret)
                goto err;
 
@@ -1178,8 +1183,7 @@ use_clean:
                }
        }
 
-       ret = bch2_fs_journal_start(&c->journal, journal_seq,
-                                   &c->journal_entries);
+       ret = bch2_fs_journal_start(&c->journal, journal_seq);
        if (ret)
                goto err;
 
@@ -1383,7 +1387,7 @@ out:
 
        if (!c->opts.keep_journal) {
                bch2_journal_keys_free(&c->journal_keys);
-               bch2_journal_entries_free(&c->journal_entries);
+               bch2_journal_entries_free(c);
        }
        kfree(clean);
        if (ret)
@@ -1404,7 +1408,6 @@ int bch2_fs_initialize(struct bch_fs *c)
        struct qstr lostfound = QSTR("lost+found");
        const char *err = "cannot allocate memory";
        struct bch_dev *ca;
-       LIST_HEAD(journal);
        unsigned i;
        int ret;
 
@@ -1444,7 +1447,7 @@ int bch2_fs_initialize(struct bch_fs *c)
         * journal_res_get() will crash if called before this has
         * set up the journal.pin FIFO and journal.cur pointer:
         */
-       bch2_fs_journal_start(&c->journal, 1, &journal);
+       bch2_fs_journal_start(&c->journal, 1);
        bch2_journal_set_replay_done(&c->journal);
 
        err = "error going read-write";
index 30580a8984a13b2de02f9ef8ace9d3999f28da02..ab8b116ac7db0cb486d2e0e475ee4856e5f9974d 100644 (file)
@@ -55,7 +55,7 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
                                                struct btree *);
 
 void bch2_journal_keys_free(struct journal_keys *);
-void bch2_journal_entries_free(struct list_head *);
+void bch2_journal_entries_free(struct bch_fs *);
 
 int bch2_fs_recovery(struct bch_fs *);
 int bch2_fs_initialize(struct bch_fs *);
index aee3206cd0a0c67e55b53b79ab6dc624513ac958..e03c03ffe493a592ed7e03c2efa981a24508f84c 100644 (file)
@@ -450,7 +450,7 @@ static void __bch2_fs_free(struct bch_fs *c)
        bch2_io_clock_exit(&c->io_clock[READ]);
        bch2_fs_compress_exit(c);
        bch2_journal_keys_free(&c->journal_keys);
-       bch2_journal_entries_free(&c->journal_entries);
+       bch2_journal_entries_free(c);
        percpu_free_rwsem(&c->mark_lock);
 
        if (c->btree_paths_bufs)
@@ -668,7 +668,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
        INIT_WORK(&c->journal_seq_blacklist_gc_work,
                  bch2_blacklist_entries_gc);
 
-       INIT_LIST_HEAD(&c->journal_entries);
        INIT_LIST_HEAD(&c->journal_iters);
 
        INIT_LIST_HEAD(&c->fsck_errors);
index 7857017c1d48a11c6ceab7e4069af4908b46a415..41f1bcdc44886c1cef8f25c080c006d22a45339d 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/export.h>
 #include <linux/generic-radix-tree.h>
 #include <linux/gfp.h>
+#include <linux/kmemleak.h>
 
 #define GENRADIX_ARY           (PAGE_SIZE / sizeof(struct genradix_node *))
 #define GENRADIX_ARY_SHIFT     ilog2(GENRADIX_ARY)
@@ -37,12 +38,12 @@ static inline size_t genradix_depth_size(unsigned depth)
 #define GENRADIX_DEPTH_MASK                            \
        ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
 
-unsigned genradix_root_to_depth(struct genradix_root *r)
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
 {
        return (unsigned long) r & GENRADIX_DEPTH_MASK;
 }
 
-struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
 {
        return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
 }
@@ -76,6 +77,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset)
 }
 EXPORT_SYMBOL(__genradix_ptr);
 
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+       struct genradix_node *node;
+
+       node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+       /*
+        * We're using pages (not slab allocations) directly for kernel data
+        * structures, so we need to explicitly inform kmemleak of them in order
+        * to avoid false positive memory leak reports.
+        */
+       kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+       return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+       kmemleak_free(node);
+       free_page((unsigned long)node);
+}
+
 /*
  * Returns pointer to the specified byte @offset within @radix, allocating it if
  * necessary - newly allocated slots are always zeroed out:
@@ -98,8 +120,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
                        break;
 
                if (!new_node) {
-                       new_node = (void *)
-                               __get_free_page(gfp_mask|__GFP_ZERO);
+                       new_node = genradix_alloc_node(gfp_mask);
                        if (!new_node)
                                return NULL;
                }
@@ -122,8 +143,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
                n = READ_ONCE(*p);
                if (!n) {
                        if (!new_node) {
-                               new_node = (void *)
-                                       __get_free_page(gfp_mask|__GFP_ZERO);
+                               new_node = genradix_alloc_node(gfp_mask);
                                if (!new_node)
                                        return NULL;
                        }
@@ -134,7 +154,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
        }
 
        if (new_node)
-               free_page((unsigned long) new_node);
+               genradix_free_node(new_node);
 
        return &n->data[offset];
 }
@@ -193,6 +213,64 @@ restart:
 }
 EXPORT_SYMBOL(__genradix_iter_peek);
 
+void *__genradix_iter_peek_prev(struct genradix_iter *iter,
+                               struct __genradix *radix,
+                               size_t objs_per_page,
+                               size_t obj_size_plus_page_remainder)
+{
+       struct genradix_root *r;
+       struct genradix_node *n;
+       unsigned level, i;
+
+       if (iter->offset == SIZE_MAX)
+               return NULL;
+
+restart:
+       r = READ_ONCE(radix->root);
+       if (!r)
+               return NULL;
+
+       n       = genradix_root_to_node(r);
+       level   = genradix_root_to_depth(r);
+
+       if (ilog2(iter->offset) >= genradix_depth_shift(level)) {
+               iter->offset = genradix_depth_size(level);
+               iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+
+               iter->offset -= obj_size_plus_page_remainder;
+               iter->pos--;
+       }
+
+       while (level) {
+               level--;
+
+               i = (iter->offset >> genradix_depth_shift(level)) &
+                       (GENRADIX_ARY - 1);
+
+               while (!n->children[i]) {
+                       size_t objs_per_ptr = genradix_depth_size(level);
+
+                       iter->offset = round_down(iter->offset, objs_per_ptr);
+                       iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+
+                       if (!iter->offset)
+                               return NULL;
+
+                       iter->offset -= obj_size_plus_page_remainder;
+                       iter->pos--;
+
+                       if (!i)
+                               goto restart;
+                       --i;
+               }
+
+               n = n->children[i];
+       }
+
+       return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek_prev);
+
 static void genradix_free_recurse(struct genradix_node *n, unsigned level)
 {
        if (level) {
@@ -203,7 +281,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level)
                                genradix_free_recurse(n->children[i], level - 1);
        }
 
-       free_page((unsigned long) n);
+       genradix_free_node(n);
 }
 
 int __genradix_prealloc(struct __genradix *radix, size_t size,