From 986533d8d5b21c8eb512bbb3f0496d3d2a087c5d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Apr 2022 19:19:05 -0400 Subject: [PATCH] Update bcachefs sources to 6ddf061e68 bcachefs: Use a genradix for reading journal entries --- .bcachefs_revision | 2 +- Makefile | 6 ++ cmd_debug.c | 10 ++- include/linux/generic-radix-tree.h | 73 ++++++++++++++-- include/linux/kmemleak.h | 125 ++++++++++++++++++++++++++ include/linux/types.h | 4 + libbcachefs/bcachefs.h | 3 +- libbcachefs/journal.c | 34 +++++--- libbcachefs/journal.h | 2 +- libbcachefs/journal_io.c | 136 ++++++++++++++++++----------- libbcachefs/journal_io.h | 3 +- libbcachefs/recovery.c | 117 +++++++++++++------------ libbcachefs/recovery.h | 2 +- libbcachefs/super.c | 3 +- linux/generic-radix-tree.c | 94 ++++++++++++++++++-- 15 files changed, 468 insertions(+), 146 deletions(-) create mode 100644 include/linux/kmemleak.h diff --git a/.bcachefs_revision b/.bcachefs_revision index dbcc3d9..a7745cd 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -d3da360412f7ca4677975683d3999c038b0cacd7 +6ddf061e68560a2bb263b126af7e894a6c1afb5f diff --git a/Makefile b/Makefile index 266f98a..bed43bd 100644 --- a/Makefile +++ b/Makefile @@ -189,6 +189,12 @@ update-bcachefs-sources: git add include/linux/list_nulls.h cp $(LINUX_DIR)/include/linux/poison.h include/linux/ git add include/linux/poison.h + cp $(LINUX_DIR)/include/linux/generic-radix-tree.h include/linux/ + git add include/linux/generic-radix-tree.h + cp $(LINUX_DIR)/lib/generic-radix-tree.c linux/ + git add linux/generic-radix-tree.c + cp $(LINUX_DIR)/include/linux/kmemleak.h include/linux/ + git add include/linux/kmemleak.h cp $(LINUX_DIR)/scripts/Makefile.compiler ./ git add Makefile.compiler $(RM) libbcachefs/*.mod.c diff --git a/cmd_debug.c b/cmd_debug.c index 495c068..3723248 100644 --- a/cmd_debug.c +++ b/cmd_debug.c @@ -624,16 +624,20 @@ int cmd_list_journal(int argc, char *argv[]) if (IS_ERR(c)) die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c))); - struct journal_replay *p; + struct journal_replay *p, **_p; + struct genradix_iter iter; struct jset_entry *entry; struct printbuf buf = PRINTBUF; - list_for_each_entry(p, &c->journal_entries, list) { + genradix_for_each(&c->journal_entries, iter, _p) { + p = *_p; + if (!p) + continue; + bool blacklisted = bch2_journal_seq_is_blacklisted(c, le64_to_cpu(p->j.seq), false); - if (blacklisted) printf("blacklisted "); diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index f09689d..c74b737 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -2,7 +2,7 @@ #define _LINUX_GENERIC_RADIX_TREE_H /** - * DOC: Generic radix trees/sparse arrays: + * DOC: Generic radix trees/sparse arrays * * Very simple and minimalistic, supporting arbitrary size entries up to * PAGE_SIZE. @@ -38,13 +38,15 @@ #include #include -#include +#include #include +#include +#include struct genradix_root; struct __genradix { - struct genradix_root __rcu *root; + struct genradix_root *root; }; /* @@ -115,6 +117,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) +#define __genradix_objs_per_page(_radix) \ + (PAGE_SIZE / sizeof((_radix)->type[0])) +#define __genradix_page_remainder(_radix) \ + (PAGE_SIZE % sizeof((_radix)->type[0])) + #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) @@ -178,14 +185,30 @@ void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); #define genradix_iter_peek(_iter, _radix) \ (__genradix_cast(_radix) \ __genradix_iter_peek(_iter, &(_radix)->tree, \ - PAGE_SIZE / __genradix_obj_size(_radix))) + __genradix_objs_per_page(_radix))) + +void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *, + size_t, size_t); + +/** + * genradix_iter_peek - get first entry at or below iterator's current + * position + * @_iter: a genradix_iter + * @_radix: genradix being iterated over + * + * If no more entries exist at or below @_iter's current position, returns NULL + */ +#define genradix_iter_peek_prev(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek_prev(_iter, &(_radix)->tree, \ + __genradix_objs_per_page(_radix), \ + __genradix_obj_size(_radix) + \ + __genradix_page_remainder(_radix))) static inline void __genradix_iter_advance(struct genradix_iter *iter, size_t obj_size) { - size_t new_offset = iter->offset + obj_size; - - if (new_offset < iter->offset) { + if (iter->offset + obj_size < iter->offset) { iter->offset = SIZE_MAX; iter->pos = SIZE_MAX; return; @@ -203,6 +226,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_iter_advance(_iter, _radix) \ __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) +static inline void __genradix_iter_rewind(struct genradix_iter *iter, + size_t obj_size) +{ + if (iter->offset == 0 || + iter->offset == SIZE_MAX) { + iter->offset = SIZE_MAX; + return; + } + + if ((iter->offset & (PAGE_SIZE - 1)) == 0) + iter->offset -= PAGE_SIZE % obj_size; + + iter->offset -= obj_size; + iter->pos--; +} + +#define genradix_iter_rewind(_iter, _radix) \ + __genradix_iter_rewind(_iter, __genradix_obj_size(_radix)) + #define genradix_for_each_from(_radix, _iter, _p, _start) \ for (_iter = genradix_iter_init(_radix, _start); \ (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ @@ -220,6 +262,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_for_each(_radix, _iter, _p) \ genradix_for_each_from(_radix, _iter, _p, 0) +#define genradix_last_pos(_radix) \ + (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1) + +/** + * genradix_for_each_reverse - iterate over entry in a genradix, reverse order + * @_radix: genradix to iterate over + * @_iter: a genradix_iter to track current position + * @_p: pointer to genradix entry type + * + * On every iteration, @_p will point to the current entry, and @_iter.pos + * will be the current entry's index. + */ +#define genradix_for_each_reverse(_radix, _iter, _p) \ + for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\ + (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\ + genradix_iter_rewind(&_iter, _radix)) + int __genradix_prealloc(struct __genradix *, size_t, gfp_t); /** diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h new file mode 100644 index 0000000..34684b2 --- /dev/null +++ b/include/linux/kmemleak.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * include/linux/kmemleak.h + * + * Copyright (C) 2008 ARM Limited + * Written by Catalin Marinas + */ + +#ifndef __KMEMLEAK_H +#define __KMEMLEAK_H + +#include +#include + +#ifdef CONFIG_DEBUG_KMEMLEAK + +extern void kmemleak_init(void) __init; +extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) __ref; +extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) __ref; +extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) __ref; +extern void kmemleak_free(const void *ptr) __ref; +extern void kmemleak_free_part(const void *ptr, size_t size) __ref; +extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; +extern void kmemleak_update_trace(const void *ptr) __ref; +extern void kmemleak_not_leak(const void *ptr) __ref; +extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; +extern void kmemleak_no_scan(const void *ptr) __ref; +extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, + gfp_t gfp) __ref; +extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref; +extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref; +extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; + +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, slab_flags_t flags, + gfp_t gfp) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_alloc(ptr, size, min_count, gfp); +} + +static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_free(ptr); +} + +static inline void kmemleak_erase(void **ptr) +{ + *ptr = NULL; +} + +#else + +static inline void kmemleak_init(void) +{ +} +static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) +{ +} +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, slab_flags_t flags, + gfp_t gfp) +{ +} +static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) +{ +} +static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) +{ +} +static inline void kmemleak_free(const void *ptr) +{ +} +static inline void kmemleak_free_part(const void *ptr, size_t size) +{ +} +static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) +{ +} +static inline void kmemleak_free_percpu(const void __percpu *ptr) +{ +} +static inline void kmemleak_update_trace(const void *ptr) +{ +} +static inline void kmemleak_not_leak(const void *ptr) +{ +} +static inline void kmemleak_ignore(const void *ptr) +{ +} +static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) +{ +} +static inline void kmemleak_erase(void **ptr) +{ +} +static inline void kmemleak_no_scan(const void *ptr) +{ +} +static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, + int min_count, gfp_t gfp) +{ +} +static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) +{ +} +static inline void kmemleak_not_leak_phys(phys_addr_t phys) +{ +} +static inline void kmemleak_ignore_phys(phys_addr_t phys) +{ +} + +#endif /* CONFIG_DEBUG_KMEMLEAK */ + +#endif /* __KMEMLEAK_H */ diff --git a/include/linux/types.h b/include/linux/types.h index 7eb2222..f0efcd2 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -79,4 +79,8 @@ typedef u64 sector_t; typedef int (*cmp_func_t)(const void *a, const void *b); +typedef unsigned int __bitwise slab_flags_t; +typedef u64 phys_addr_t; +struct vm_struct; + #endif /* _TOOLS_LINUX_TYPES_H_ */ diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index ab6df63..e29a089 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -894,7 +894,8 @@ struct bch_fs { mempool_t btree_bounce_pool; struct journal journal; - struct list_head journal_entries; + GENRADIX(struct journal_replay *) journal_entries; + u64 journal_entries_base_seq; struct journal_keys journal_keys; struct list_head journal_iters; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index d01b1cd..75be0a5 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1040,17 +1040,25 @@ void bch2_fs_journal_stop(struct journal *j) cancel_delayed_work_sync(&j->write_work); } -int bch2_fs_journal_start(struct journal *j, u64 cur_seq, - struct list_head *journal_entries) +int bch2_fs_journal_start(struct journal *j, u64 cur_seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_entry_pin_list *p; - struct journal_replay *i; + struct journal_replay *i, **_i; + struct genradix_iter iter; + bool had_entries = false; + unsigned ptr; u64 last_seq = cur_seq, nr, seq; - if (!list_empty(journal_entries)) - last_seq = le64_to_cpu(list_last_entry(journal_entries, - struct journal_replay, list)->j.last_seq); + genradix_for_each_reverse(&c->journal_entries, iter, _i) { + i = *_i; + + if (!i || i->ignore) + continue; + + last_seq = le64_to_cpu(i->j.last_seq); + break; + } nr = cur_seq - last_seq; @@ -1072,14 +1080,14 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq, j->pin.back = cur_seq; atomic64_set(&j->seq, cur_seq - 1); - if (list_empty(journal_entries)) - j->last_empty_seq = cur_seq - 1; - fifo_for_each_entry_ptr(p, &j->pin, seq) journal_pin_list_init(p, 1); - list_for_each_entry(i, journal_entries, list) { - unsigned ptr; + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; + + if (!i || i->ignore) + continue; seq = le64_to_cpu(i->j.seq); BUG_ON(seq >= cur_seq); @@ -1095,9 +1103,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq, p->devs.nr = 0; for (ptr = 0; ptr < i->nr_ptrs; ptr++) bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev); + + had_entries = true; } - if (list_empty(journal_entries)) + if (!had_entries) j->last_empty_seq = cur_seq; spin_lock(&j->lock); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index e7321c3..7fee0c0 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -512,7 +512,7 @@ int bch2_dev_journal_alloc(struct bch_dev *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); -int bch2_fs_journal_start(struct journal *, u64, struct list_head *); +int bch2_fs_journal_start(struct journal *, u64); void bch2_dev_journal_exit(struct bch_dev *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 5e08932..eb20bf0 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -17,12 +17,22 @@ #include -static void __journal_replay_free(struct journal_replay *i) +static inline u32 journal_entry_radix_idx(struct bch_fs *c, + struct jset *j) { - list_del(&i->list); + return (le64_to_cpu(j->seq) - c->journal_entries_base_seq) & (~0U >> 1); +} + +static void __journal_replay_free(struct bch_fs *c, + struct journal_replay *i) +{ + struct journal_replay **p = + genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, &i->j)); + + BUG_ON(*p != i); + *p = NULL; kvpfree(i, offsetof(struct journal_replay, j) + vstruct_bytes(&i->j)); - } static void journal_replay_free(struct bch_fs *c, struct journal_replay *i) @@ -30,13 +40,12 @@ static void journal_replay_free(struct bch_fs *c, struct journal_replay *i) i->ignore = true; if (!c->opts.read_entire_journal) - __journal_replay_free(i); + __journal_replay_free(c, i); } struct journal_list { struct closure cl; struct mutex lock; - struct list_head *head; int ret; }; @@ -52,19 +61,30 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, struct journal_list *jlist, struct jset *j, bool bad) { - struct journal_replay *i, *pos, *dup = NULL; + struct genradix_iter iter; + struct journal_replay **_i, *i, *dup; struct journal_ptr *ptr; - struct list_head *where; size_t bytes = vstruct_bytes(j); u64 last_seq = 0; int ret = JOURNAL_ENTRY_ADD_OK; + /* + * Xarrays are indexed by a ulong, not a u64, so we can't index them by + * sequence number directly: + * Assume instead that they will all fall within the range of +-2billion + * of the filrst one we find. + */ + if (!c->journal_entries_base_seq) + c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX); + +#if 0 list_for_each_entry_reverse(i, jlist->head, list) { if (!JSET_NO_FLUSH(&i->j)) { last_seq = le64_to_cpu(i->j.last_seq); break; } } +#endif /* Is this entry older than the range we need? */ if (!c->opts.read_entire_journal && @@ -75,28 +95,20 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, /* Drop entries we don't need anymore */ if (!JSET_NO_FLUSH(j)) { - list_for_each_entry_safe(i, pos, jlist->head, list) { + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; + + if (!i) + continue; + if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq)) break; journal_replay_free(c, i); } } - list_for_each_entry_reverse(i, jlist->head, list) { - if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) { - where = &i->list; - goto add; - } - } - - where = jlist->head; -add: - dup = where->next != jlist->head - ? container_of(where->next, struct journal_replay, list) - : NULL; - - if (dup && le64_to_cpu(j->seq) != le64_to_cpu(dup->j.seq)) - dup = NULL; + _i = genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, j)); + dup = _i ? *_i : NULL; /* * Duplicate journal entries? If so we want the one that didn't have a @@ -132,10 +144,19 @@ add: if (dup) { i->nr_ptrs = dup->nr_ptrs; memcpy(i->ptrs, dup->ptrs, sizeof(dup->ptrs)); - __journal_replay_free(dup); + __journal_replay_free(c, dup); } - list_add(&i->list, where); + _i = genradix_ptr_alloc(&c->journal_entries, + journal_entry_radix_idx(c, &i->j), + GFP_KERNEL); + if (!_i) { + bch_err(c, "failed to allocate c->journal_entries entry"); + ret = -ENOMEM; + goto out; + } + + *_i = i; found: for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) { if (ptr->dev == ca->dev_idx) { @@ -914,7 +935,8 @@ static void bch2_journal_read_device(struct closure *cl) struct bch_fs *c = ca->fs; struct journal_list *jlist = container_of(cl->parent, struct journal_list, cl); - struct journal_replay *r; + struct journal_replay *r, **_r; + struct genradix_iter iter; struct journal_read_buf buf = { NULL, 0 }; u64 min_seq = U64_MAX; unsigned i; @@ -957,7 +979,12 @@ static void bch2_journal_read_device(struct closure *cl) ja->sectors_free = ca->mi.bucket_size; mutex_lock(&jlist->lock); - list_for_each_entry(r, jlist->head, list) { + genradix_for_each(&c->journal_entries, iter, _r) { + r = *_r; + + if (!r) + continue; + for (i = 0; i < r->nr_ptrs; i++) { if (r->ptrs[i].dev == ca->dev_idx && sector_to_bucket(ca, r->ptrs[i].sector) == ja->buckets[ja->cur_idx]) { @@ -1023,11 +1050,11 @@ void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } -int bch2_journal_read(struct bch_fs *c, struct list_head *list, - u64 *blacklist_seq, u64 *start_seq) +int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) { struct journal_list jlist; - struct journal_replay *i, *t; + struct journal_replay *i, **_i, *prev = NULL; + struct genradix_iter radix_iter; struct bch_dev *ca; unsigned iter; struct printbuf buf = PRINTBUF; @@ -1038,7 +1065,6 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, closure_init_stack(&jlist.cl); mutex_init(&jlist.lock); - jlist.head = list; jlist.ret = 0; for_each_member_device(ca, c, iter) { @@ -1062,22 +1088,21 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, if (jlist.ret) return jlist.ret; - if (list_empty(list)) { - bch_info(c, "journal read done, but no entries found"); - return 0; - } - - i = list_last_entry(list, struct journal_replay, list); - *start_seq = le64_to_cpu(i->j.seq) + 1; + *start_seq = 0; /* * Find most recent flush entry, and ignore newer non flush entries - * those entries will be blacklisted: */ - list_for_each_entry_safe_reverse(i, t, list, list) { - if (i->ignore) + genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { + i = *_i; + + if (!i || i->ignore) continue; + if (!*start_seq) + *start_seq = le64_to_cpu(i->j.seq) + 1; + if (!JSET_NO_FLUSH(&i->j)) { last_seq = le64_to_cpu(i->j.last_seq); *blacklist_seq = le64_to_cpu(i->j.seq) + 1; @@ -1087,6 +1112,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, journal_replay_free(c, i); } + if (!*start_seq) { + bch_info(c, "journal read done, but no entries found"); + return 0; + } + if (!last_seq) { fsck_err(c, "journal read done, but no entries found after dropping non-flushes"); ret = -1; @@ -1094,8 +1124,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, } /* Drop blacklisted entries and entries older than last_seq: */ - list_for_each_entry_safe(i, t, list, list) { - if (i->ignore) + genradix_for_each(&c->journal_entries, radix_iter, _i) { + i = *_i; + + if (!i || i->ignore) continue; seq = le64_to_cpu(i->j.seq); @@ -1114,8 +1146,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, /* Check for missing entries: */ seq = last_seq; - list_for_each_entry(i, list, list) { - if (i->ignore) + genradix_for_each(&c->journal_entries, radix_iter, _i) { + i = *_i; + + if (!i || i->ignore) continue; BUG_ON(seq > le64_to_cpu(i->j.seq)); @@ -1137,11 +1171,9 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, !bch2_journal_seq_is_blacklisted(c, seq, false)) seq++; - if (i->list.prev != list) { - struct journal_replay *p = list_prev_entry(i, list); - - bch2_journal_ptrs_to_text(&buf1, c, p); - pr_buf(&buf1, " size %zu", vstruct_sectors(&p->j, c->block_bits)); + if (prev) { + bch2_journal_ptrs_to_text(&buf1, c, prev); + pr_buf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits)); } else pr_buf(&buf1, "(none)"); bch2_journal_ptrs_to_text(&buf2, c, i); @@ -1158,10 +1190,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, printbuf_exit(&buf2); } + prev = i; seq++; } - list_for_each_entry(i, list, list) { + genradix_for_each(&c->journal_entries, radix_iter, _i) { struct jset_entry *entry; struct bkey_i *k, *_n; struct bch_replicas_padded replicas = { @@ -1170,7 +1203,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, }; unsigned ptr; - if (i->ignore) + i = *_i; + if (!i || i->ignore) continue; ret = jset_validate_entries(c, &i->j, READ); diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h index f200183..30e995c 100644 --- a/libbcachefs/journal_io.h +++ b/libbcachefs/journal_io.h @@ -7,7 +7,6 @@ * during cache_registration */ struct journal_replay { - struct list_head list; struct journal_ptr { u8 dev; u32 bucket; @@ -53,7 +52,7 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, struct journal_replay *); -int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *); +int bch2_journal_read(struct bch_fs *, u64 *, u64 *); void bch2_journal_write(struct closure *); diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index ac75f44..9aa507c 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -198,7 +198,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, if (keys->nr == keys->size) { struct journal_keys new_keys = { .nr = keys->nr, - .size = keys->size * 2, + .size = max(keys->size, 8UL) * 2, .journal_seq_base = keys->journal_seq_base, }; @@ -433,16 +433,16 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i /* sort and dedup all keys in the journal: */ -void bch2_journal_entries_free(struct list_head *list) +void bch2_journal_entries_free(struct bch_fs *c) { - - while (!list_empty(list)) { - struct journal_replay *i = - list_first_entry(list, struct journal_replay, list); - list_del(&i->list); - kvpfree(i, offsetof(struct journal_replay, j) + - vstruct_bytes(&i->j)); - } + struct journal_replay **i; + struct genradix_iter iter; + + genradix_for_each(&c->journal_entries, iter, i) + if (*i) + kvpfree(*i, offsetof(struct journal_replay, j) + + vstruct_bytes(&(*i)->j)); + genradix_free(&c->journal_entries); } /* @@ -474,57 +474,62 @@ void bch2_journal_keys_free(struct journal_keys *keys) keys->nr = keys->gap = keys->size = 0; } -static struct journal_keys journal_keys_sort(struct list_head *journal_entries) +static int journal_keys_sort(struct bch_fs *c) { - struct journal_replay *i; + struct genradix_iter iter; + struct journal_replay *i, **_i; struct jset_entry *entry; struct bkey_i *k, *_n; - struct journal_keys keys = { NULL }; + struct journal_keys *keys = &c->journal_keys; struct journal_key *src, *dst; size_t nr_keys = 0; - if (list_empty(journal_entries)) - return keys; + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; - list_for_each_entry(i, journal_entries, list) { - if (i->ignore) + if (!i || i->ignore) continue; - if (!keys.journal_seq_base) - keys.journal_seq_base = le64_to_cpu(i->j.seq); + if (!keys->journal_seq_base) + keys->journal_seq_base = le64_to_cpu(i->j.seq); for_each_jset_key(k, _n, entry, &i->j) nr_keys++; } - keys.size = roundup_pow_of_two(nr_keys); + if (!nr_keys) + return 0; + + keys->size = roundup_pow_of_two(nr_keys); - keys.d = kvmalloc(sizeof(keys.d[0]) * keys.size, GFP_KERNEL); - if (!keys.d) - goto err; + keys->d = kvmalloc(sizeof(keys->d[0]) * keys->size, GFP_KERNEL); + if (!keys->d) + return -ENOMEM; + + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; - list_for_each_entry(i, journal_entries, list) { - if (i->ignore) + if (!i || i->ignore) continue; - BUG_ON(le64_to_cpu(i->j.seq) - keys.journal_seq_base > U32_MAX); + BUG_ON(le64_to_cpu(i->j.seq) - keys->journal_seq_base > U32_MAX); for_each_jset_key(k, _n, entry, &i->j) - keys.d[keys.nr++] = (struct journal_key) { + keys->d[keys->nr++] = (struct journal_key) { .btree_id = entry->btree_id, .level = entry->level, .k = k, .journal_seq = le64_to_cpu(i->j.seq) - - keys.journal_seq_base, + keys->journal_seq_base, .journal_offset = k->_data - i->j._data, }; } - sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_key_cmp, NULL); + sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL); - src = dst = keys.d; - while (src < keys.d + keys.nr) { - while (src + 1 < keys.d + keys.nr && + src = dst = keys->d; + while (src < keys->d + keys->nr) { + while (src + 1 < keys->d + keys->nr && src[0].btree_id == src[1].btree_id && src[0].level == src[1].level && !bpos_cmp(src[0].k->k.p, src[1].k->k.p)) @@ -533,10 +538,9 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) *dst++ = *src++; } - keys.nr = dst - keys.d; - keys.gap = keys.nr; -err: - return keys; + keys->nr = dst - keys->d; + keys->gap = keys->nr; + return 0; } /* journal replay: */ @@ -752,10 +756,8 @@ static int journal_replay_entry_early(struct bch_fs *c, } static int journal_replay_early(struct bch_fs *c, - struct bch_sb_field_clean *clean, - struct list_head *journal) + struct bch_sb_field_clean *clean) { - struct journal_replay *i; struct jset_entry *entry; int ret; @@ -768,8 +770,13 @@ static int journal_replay_early(struct bch_fs *c, return ret; } } else { - list_for_each_entry(i, journal, list) { - if (i->ignore) + struct genradix_iter iter; + struct journal_replay *i, **_i; + + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; + + if (!i || i->ignore) continue; vstruct_for_each(&i->j, entry) { @@ -1094,17 +1101,17 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { - struct journal_replay *i; + struct genradix_iter iter; + struct journal_replay **i; bch_verbose(c, "starting journal read"); - ret = bch2_journal_read(c, &c->journal_entries, - &blacklist_seq, &journal_seq); + ret = bch2_journal_read(c, &blacklist_seq, &journal_seq); if (ret) goto err; - list_for_each_entry_reverse(i, &c->journal_entries, list) - if (!i->ignore) { - last_journal_entry = &i->j; + genradix_for_each_reverse(&c->journal_entries, iter, i) + if (*i && !(*i)->ignore) { + last_journal_entry = &(*i)->j; break; } @@ -1122,11 +1129,9 @@ int bch2_fs_recovery(struct bch_fs *c) goto use_clean; } - c->journal_keys = journal_keys_sort(&c->journal_entries); - if (!c->journal_keys.d) { - ret = -ENOMEM; + ret = journal_keys_sort(c); + if (ret) goto err; - } if (c->sb.clean && last_journal_entry) { ret = verify_superblock_clean(c, &clean, @@ -1155,7 +1160,7 @@ use_clean: zero_out_btree_mem_ptr(&c->journal_keys); - ret = journal_replay_early(c, clean, &c->journal_entries); + ret = journal_replay_early(c, clean); if (ret) goto err; @@ -1178,8 +1183,7 @@ use_clean: } } - ret = bch2_fs_journal_start(&c->journal, journal_seq, - &c->journal_entries); + ret = bch2_fs_journal_start(&c->journal, journal_seq); if (ret) goto err; @@ -1383,7 +1387,7 @@ out: if (!c->opts.keep_journal) { bch2_journal_keys_free(&c->journal_keys); - bch2_journal_entries_free(&c->journal_entries); + bch2_journal_entries_free(c); } kfree(clean); if (ret) @@ -1404,7 +1408,6 @@ int bch2_fs_initialize(struct bch_fs *c) struct qstr lostfound = QSTR("lost+found"); const char *err = "cannot allocate memory"; struct bch_dev *ca; - LIST_HEAD(journal); unsigned i; int ret; @@ -1444,7 +1447,7 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - bch2_fs_journal_start(&c->journal, 1, &journal); + bch2_fs_journal_start(&c->journal, 1); bch2_journal_set_replay_done(&c->journal); err = "error going read-write"; diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h index 30580a8..ab8b116 100644 --- a/libbcachefs/recovery.h +++ b/libbcachefs/recovery.h @@ -55,7 +55,7 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, struct btree *); void bch2_journal_keys_free(struct journal_keys *); -void bch2_journal_entries_free(struct list_head *); +void bch2_journal_entries_free(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index aee3206..e03c03f 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -450,7 +450,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); bch2_journal_keys_free(&c->journal_keys); - bch2_journal_entries_free(&c->journal_entries); + bch2_journal_entries_free(c); percpu_free_rwsem(&c->mark_lock); if (c->btree_paths_bufs) @@ -668,7 +668,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_WORK(&c->journal_seq_blacklist_gc_work, bch2_blacklist_entries_gc); - INIT_LIST_HEAD(&c->journal_entries); INIT_LIST_HEAD(&c->journal_iters); INIT_LIST_HEAD(&c->fsck_errors); diff --git a/linux/generic-radix-tree.c b/linux/generic-radix-tree.c index 7857017..41f1bcd 100644 --- a/linux/generic-radix-tree.c +++ b/linux/generic-radix-tree.c @@ -3,6 +3,7 @@ #include #include #include +#include #define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *)) #define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) @@ -37,12 +38,12 @@ static inline size_t genradix_depth_size(unsigned depth) #define GENRADIX_DEPTH_MASK \ ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) -unsigned genradix_root_to_depth(struct genradix_root *r) +static inline unsigned genradix_root_to_depth(struct genradix_root *r) { return (unsigned long) r & GENRADIX_DEPTH_MASK; } -struct genradix_node *genradix_root_to_node(struct genradix_root *r) +static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r) { return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); } @@ -76,6 +77,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset) } EXPORT_SYMBOL(__genradix_ptr); +static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) +{ + struct genradix_node *node; + + node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO); + + /* + * We're using pages (not slab allocations) directly for kernel data + * structures, so we need to explicitly inform kmemleak of them in order + * to avoid false positive memory leak reports. + */ + kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask); + return node; +} + +static inline void genradix_free_node(struct genradix_node *node) +{ + kmemleak_free(node); + free_page((unsigned long)node); +} + /* * Returns pointer to the specified byte @offset within @radix, allocating it if * necessary - newly allocated slots are always zeroed out: @@ -98,8 +120,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, break; if (!new_node) { - new_node = (void *) - __get_free_page(gfp_mask|__GFP_ZERO); + new_node = genradix_alloc_node(gfp_mask); if (!new_node) return NULL; } @@ -122,8 +143,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, n = READ_ONCE(*p); if (!n) { if (!new_node) { - new_node = (void *) - __get_free_page(gfp_mask|__GFP_ZERO); + new_node = genradix_alloc_node(gfp_mask); if (!new_node) return NULL; } @@ -134,7 +154,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, } if (new_node) - free_page((unsigned long) new_node); + genradix_free_node(new_node); return &n->data[offset]; } @@ -193,6 +213,64 @@ restart: } EXPORT_SYMBOL(__genradix_iter_peek); +void *__genradix_iter_peek_prev(struct genradix_iter *iter, + struct __genradix *radix, + size_t objs_per_page, + size_t obj_size_plus_page_remainder) +{ + struct genradix_root *r; + struct genradix_node *n; + unsigned level, i; + + if (iter->offset == SIZE_MAX) + return NULL; + +restart: + r = READ_ONCE(radix->root); + if (!r) + return NULL; + + n = genradix_root_to_node(r); + level = genradix_root_to_depth(r); + + if (ilog2(iter->offset) >= genradix_depth_shift(level)) { + iter->offset = genradix_depth_size(level); + iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; + + iter->offset -= obj_size_plus_page_remainder; + iter->pos--; + } + + while (level) { + level--; + + i = (iter->offset >> genradix_depth_shift(level)) & + (GENRADIX_ARY - 1); + + while (!n->children[i]) { + size_t objs_per_ptr = genradix_depth_size(level); + + iter->offset = round_down(iter->offset, objs_per_ptr); + iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; + + if (!iter->offset) + return NULL; + + iter->offset -= obj_size_plus_page_remainder; + iter->pos--; + + if (!i) + goto restart; + --i; + } + + n = n->children[i]; + } + + return &n->data[iter->offset & (PAGE_SIZE - 1)]; +} +EXPORT_SYMBOL(__genradix_iter_peek_prev); + static void genradix_free_recurse(struct genradix_node *n, unsigned level) { if (level) { @@ -203,7 +281,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level) genradix_free_recurse(n->children[i], level - 1); } - free_page((unsigned long) n); + genradix_free_node(n); } int __genradix_prealloc(struct __genradix *radix, size_t size, -- 2.39.2