From 0206d42daf4c4bd3bbcfa15a2bef34319524db49 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 Mar 2023 02:35:56 -0500 Subject: [PATCH] Update bcachefs sources to 3856459b1b bcachefs: bch2_btree_iter_peek_node_and_restart() Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- Makefile.compiler | 4 +- include/linux/slab.h | 9 ++- libbcachefs/acl.c | 13 ++-- libbcachefs/acl.h | 4 +- libbcachefs/alloc_background.h | 17 +++-- libbcachefs/alloc_foreground.c | 21 ++++-- libbcachefs/bkey.h | 4 +- libbcachefs/bset.c | 4 +- libbcachefs/bset.h | 4 +- libbcachefs/btree_cache.c | 4 +- libbcachefs/btree_cache.h | 4 +- libbcachefs/btree_io.c | 8 ++- libbcachefs/btree_iter.c | 11 +++ libbcachefs/btree_iter.h | 15 +--- libbcachefs/clock.c | 4 +- libbcachefs/debug.c | 119 ++++++++++++++++++++++++++++++ libbcachefs/debug.h | 2 + libbcachefs/ec.c | 88 ++++++++++++----------- libbcachefs/ec.h | 16 ++++- libbcachefs/errcode.h | 2 +- libbcachefs/fs-io.c | 4 +- libbcachefs/io.c | 4 ++ libbcachefs/move.c | 40 +++++++---- libbcachefs/move.h | 1 + libbcachefs/move_types.h | 6 ++ libbcachefs/movinggc.c | 128 ++++++++++++++++++--------------- libbcachefs/opts.h | 28 ++++---- libbcachefs/reflink.c | 6 ++ libbcachefs/util.c | 4 +- libbcachefs/util.h | 23 +++--- 31 files changed, 404 insertions(+), 195 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 48ce699..2845be6 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -171da96d76d03a12872c8c9e2d02602c3ddfcb5f +3856459b1b9f37cebee2bca3c9edcafaf393aa98 diff --git a/Makefile.compiler b/Makefile.compiler index 20d353d..3d8adfd 100644 --- a/Makefile.compiler +++ b/Makefile.compiler @@ -63,11 +63,11 @@ cc-disable-warning = $(call try-run,\ # gcc-min-version # Usage: cflags-$(call gcc-min-version, 70100) += -foo -gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y) +gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1) # clang-min-version # Usage: cflags-$(call clang-min-version, 110000) += -foo -clang-min-version = $(shell [ $(CONFIG_CLANG_VERSION)0 -ge $(1)0 ] && echo y) +clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1) # ld-option # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) diff --git a/include/linux/slab.h b/include/linux/slab.h index ff122ff..144e333 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -15,10 +15,12 @@ #include #include +#define alloc_hooks(_do, ...) _do + #define ARCH_KMALLOC_MINALIGN 16 #define KMALLOC_MAX_SIZE SIZE_MAX -static inline void *kmalloc(size_t size, gfp_t flags) +static inline void *_kmalloc(size_t size, gfp_t flags) { unsigned i; void *p; @@ -44,6 +46,7 @@ static inline void *kmalloc(size_t size, gfp_t flags) return p; } +#define kmalloc _kmalloc static inline void *krealloc(void *old, size_t size, gfp_t flags) { @@ -94,7 +97,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t #define kvzalloc(size, flags) kzalloc(size, flags) #define kvfree(p) kfree(p) -static inline struct page *alloc_pages(gfp_t flags, unsigned int order) +static inline struct page *_alloc_pages(gfp_t flags, unsigned int order) { size_t size = PAGE_SIZE << order; unsigned i; @@ -114,9 +117,11 @@ static inline struct page *alloc_pages(gfp_t flags, unsigned int order) return p; } +#define alloc_pages _alloc_pages #define alloc_page(gfp) alloc_pages(gfp, 0) +#define _get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order)) #define __get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order)) #define __get_free_page(gfp) __get_free_pages(gfp, 0) diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 9592541..5cb06ac 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -212,9 +212,10 @@ bch2_acl_to_xattr(struct btree_trans *trans, return xattr; } -struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) +struct posix_acl *bch2_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type) { - struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct btree_trans trans; @@ -224,9 +225,6 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) struct bkey_s_c k; int ret; - if (rcu) - return ERR_PTR(-ECHILD); - bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -293,9 +291,10 @@ int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum, } int bch2_set_acl(struct user_namespace *mnt_userns, - struct inode *vinode, struct posix_acl *_acl, int type) + struct dentry *dentry, + struct posix_acl *_acl, int type) { - struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; struct btree_iter inode_iter = { NULL }; diff --git a/libbcachefs/acl.h b/libbcachefs/acl.h index 2d76a48..ac206f6 100644 --- a/libbcachefs/acl.h +++ b/libbcachefs/acl.h @@ -26,12 +26,12 @@ typedef struct { __le32 a_version; } bch_acl_header; -struct posix_acl *bch2_get_acl(struct inode *, int, bool); +struct posix_acl *bch2_get_acl(struct user_namespace *, struct dentry *, int); int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct posix_acl *, int); -int bch2_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int); +int bch2_set_acl(struct user_namespace *, struct dentry *, struct posix_acl *, int); int bch2_acl_chmod(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, umode_t, struct posix_acl **); diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 1aa7c7a..c9ff590 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -74,14 +74,21 @@ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; } +#define DATA_TYPES_MOVABLE \ + ((1U << BCH_DATA_btree)| \ + (1U << BCH_DATA_user)| \ + (1U << BCH_DATA_stripe)) + +static inline bool data_type_movable(enum bch_data_type type) +{ + return (1U << type) & DATA_TYPES_MOVABLE; +} + static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, struct bch_dev *ca) { - if (a.data_type != BCH_DATA_btree && - a.data_type != BCH_DATA_user) - return 0; - - if (a.dirty_sectors >= ca->mi.bucket_size) + if (!data_type_movable(a.data_type) || + a.dirty_sectors >= ca->mi.bucket_size) return 0; return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index b2755c1..3a67ac0 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -97,7 +97,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); if (ob->ec) { - bch2_ec_bucket_written(c, ob); + ec_stripe_new_put(c, ob->ec); return; } @@ -796,11 +796,11 @@ got_bucket: ob->ec_idx = ec_idx; ob->ec = h->s; + ec_stripe_new_get(h->s); ret = add_new_bucket(c, ptrs, devs_may_alloc, nr_replicas, nr_effective, have_cache, flags, ob); - atomic_inc(&h->s->pin); out_put_head: bch2_ec_stripe_head_put(c, h); return ret; @@ -1383,19 +1383,24 @@ static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, str unsigned data_type = ob->data_type; barrier(); /* READ_ONCE() doesn't work on bitfields */ - prt_printf(out, "%zu ref %u %s%s%s %u:%llu gen %u\n", + prt_printf(out, "%zu ref %u %s %u:%llu gen %u", ob - c->open_buckets, atomic_read(&ob->pin), data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type", - ob->ec ? " ec" : "", - ob->on_partial_list ? " partial" : "", ob->dev, ob->bucket, ob->gen); + if (ob->ec) + prt_printf(out, " ec idx %llu", ob->ec->idx); + if (ob->on_partial_list) + prt_str(out, " partial"); + prt_newline(out); } void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) { struct open_bucket *ob; + out->atomic++; + for (ob = c->open_buckets; ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { @@ -1404,17 +1409,23 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } + + --out->atomic; } void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c) { unsigned i; + out->atomic++; spin_lock(&c->freelist_lock); + for (i = 0; i < c->open_buckets_partial_nr; i++) bch2_open_bucket_to_text(out, c, c->open_buckets + c->open_buckets_partial[i]); + spin_unlock(&c->freelist_lock); + --out->atomic; } static const char * const bch2_write_point_states[] = { diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index 983572e..2650bd6 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -497,7 +497,7 @@ static inline struct bpos bkey_unpack_pos(const struct btree *b, /* Disassembled bkeys */ -static inline struct bkey_s_c bkey_disassemble(struct btree *b, +static inline struct bkey_s_c bkey_disassemble(const struct btree *b, const struct bkey_packed *k, struct bkey *u) { @@ -507,7 +507,7 @@ static inline struct bkey_s_c bkey_disassemble(struct btree *b, } /* non const version: */ -static inline struct bkey_s __bkey_disassemble(struct btree *b, +static inline struct bkey_s __bkey_disassemble(const struct btree *b, struct bkey_packed *k, struct bkey *u) { diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index 3bd50f1..0216ad9 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -1536,9 +1536,9 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter, /* Mergesort */ -void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats) +void bch2_btree_keys_stats(const struct btree *b, struct bset_stats *stats) { - struct bset_tree *t; + const struct bset_tree *t; for_each_bset(b, t) { enum bset_aux_tree_type type = bset_aux_tree_type(t); diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h index 2105e78..632c2b8 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/bset.h @@ -213,7 +213,7 @@ static inline size_t btree_aux_data_u64s(const struct btree *b) _k != btree_bkey_last(_b, _t); \ _k = bkey_p_next(_k)) -static inline bool bset_has_ro_aux_tree(struct bset_tree *t) +static inline bool bset_has_ro_aux_tree(const struct bset_tree *t) { return bset_aux_tree_type(t) == BSET_RO_AUX_TREE; } @@ -504,7 +504,7 @@ struct bset_stats { size_t failed; }; -void bch2_btree_keys_stats(struct btree *, struct bset_stats *); +void bch2_btree_keys_stats(const struct btree *, struct bset_stats *); void bch2_bfloat_to_text(struct printbuf *, struct btree *, struct bkey_packed *); diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index a26331d..e8530cc 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -1202,7 +1202,7 @@ wait_on_io: } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, - struct btree *b) + const struct btree *b) { const struct bkey_format *f = &b->format; struct bset_stats stats; @@ -1247,7 +1247,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, stats.failed); } -void bch2_btree_cache_to_text(struct printbuf *out, struct btree_cache *bc) +void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc) { prt_printf(out, "nr nodes:\t\t%u\n", bc->used); prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty)); diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index a0b9231..4900ed4 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -100,7 +100,7 @@ static inline unsigned btree_blocks(struct bch_fs *c) #define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->c.btree_id].b) void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, - struct btree *); -void bch2_btree_cache_to_text(struct printbuf *, struct btree_cache *); + const struct btree *); +void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *); #endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 29163b4..7a9cc37 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -105,8 +105,8 @@ static void btree_bounce_free(struct bch_fs *c, size_t size, vpfree(p, size); } -static void *btree_bounce_alloc(struct bch_fs *c, size_t size, - bool *used_mempool) +static void *_btree_bounce_alloc(struct bch_fs *c, size_t size, + bool *used_mempool) { unsigned flags = memalloc_nofs_save(); void *p; @@ -114,7 +114,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, BUG_ON(size > btree_bytes(c)); *used_mempool = false; - p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); + p = _vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); if (!p) { *used_mempool = true; p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); @@ -122,6 +122,8 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, memalloc_nofs_restore(flags); return p; } +#define btree_bounce_alloc(_c, _size, _used_mempool) \ + alloc_hooks(_btree_bounce_alloc(_c, _size, _used_mempool), void *, NULL) static void sort_bkey_ptrs(const struct btree *bt, struct bkey_packed **ptrs, unsigned nr) diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index fdb267d..2d34499 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1723,6 +1723,17 @@ err: goto out; } +struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) +{ + struct btree *b; + + while (b = bch2_btree_iter_peek_node(iter), + bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) + bch2_trans_begin(iter->trans); + + return b; +} + struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 448be08..6b7cef1 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -295,6 +295,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); int __must_check bch2_btree_iter_traverse(struct btree_iter *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *); +struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); struct btree *bch2_btree_iter_next_node(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); @@ -521,18 +522,6 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, u32 bch2_trans_begin(struct btree_trans *); -static inline struct btree * -__btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter) -{ - struct btree *b; - - while (b = bch2_btree_iter_peek_node(iter), - bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) - bch2_trans_begin(trans); - - return b; -} - /* * XXX * this does not handle transaction restarts from bch2_btree_iter_next_node() @@ -542,7 +531,7 @@ __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter _locks_want, _depth, _flags, _b, _ret) \ for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ _start, _locks_want, _depth, _flags); \ - (_b) = __btree_iter_peek_node_and_restart((_trans), &(_iter)),\ + (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ (_b) = bch2_btree_iter_next_node(&(_iter))) diff --git a/libbcachefs/clock.c b/libbcachefs/clock.c index f3ffdbc..00d0e67 100644 --- a/libbcachefs/clock.c +++ b/libbcachefs/clock.c @@ -122,7 +122,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, } __set_current_state(TASK_RUNNING); - del_singleshot_timer_sync(&wait.cpu_timer); + del_timer_sync(&wait.cpu_timer); destroy_timer_on_stack(&wait.cpu_timer); bch2_io_timer_del(clock, &wait.io_timer); } @@ -157,6 +157,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) unsigned long now; unsigned i; + out->atomic++; spin_lock(&clock->timer_lock); now = atomic64_read(&clock->now); @@ -165,6 +166,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) clock->timers.data[i]->fn, clock->timers.data[i]->expire - now); spin_unlock(&clock->timer_lock); + --out->atomic; } void bch2_io_clock_exit(struct io_clock *clock) diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 0035fe8..d1563ca 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -181,6 +181,125 @@ out: bch2_btree_node_io_unlock(b); } +void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, + const struct btree *b) +{ + struct btree_node *n_ondisk = NULL; + struct extent_ptr_decoded pick; + struct bch_dev *ca; + struct bio *bio = NULL; + unsigned offset = 0; + int ret; + + if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) { + prt_printf(out, "error getting device to read from: invalid device\n"); + return; + } + + ca = bch_dev_bkey_exists(c, pick.ptr.dev); + if (!bch2_dev_get_ioref(ca, READ)) { + prt_printf(out, "error getting device to read from: not online\n"); + return; + } + + n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); + if (!n_ondisk) { + prt_printf(out, "memory allocation failure\n"); + goto out; + } + + bio = bio_alloc_bioset(ca->disk_sb.bdev, + buf_pages(n_ondisk, btree_bytes(c)), + REQ_OP_READ|REQ_META, + GFP_NOIO, + &c->btree_bio); + bio->bi_iter.bi_sector = pick.ptr.offset; + bch2_bio_map(bio, n_ondisk, btree_bytes(c)); + + ret = submit_bio_wait(bio); + if (ret) { + prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret)); + goto out; + } + + while (offset < btree_sectors(c)) { + struct bset *i; + struct nonce nonce; + struct bch_csum csum; + struct bkey_packed *k; + unsigned sectors; + + if (!offset) { + i = &n_ondisk->keys; + + if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { + prt_printf(out, "unknown checksum type at offset %u: %llu\n", + offset, BSET_CSUM_TYPE(i)); + goto out; + } + + nonce = btree_nonce(i, offset << 9); + csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk); + + if (bch2_crc_cmp(csum, n_ondisk->csum)) { + prt_printf(out, "invalid checksum\n"); + goto out; + } + + bset_encrypt(c, i, offset << 9); + + sectors = vstruct_sectors(n_ondisk, c->block_bits); + } else { + struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9); + + i = &bne->keys; + + if (i->seq != n_ondisk->keys.seq) + break; + + if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { + prt_printf(out, "unknown checksum type at offset %u: %llu\n", + offset, BSET_CSUM_TYPE(i)); + goto out; + } + + nonce = btree_nonce(i, offset << 9); + csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); + + if (bch2_crc_cmp(csum, bne->csum)) { + prt_printf(out, "invalid checksum"); + goto out; + } + + bset_encrypt(c, i, offset << 9); + + sectors = vstruct_sectors(bne, c->block_bits); + } + + prt_printf(out, " offset %u version %u, journal seq %llu\n", + offset, + le16_to_cpu(i->version), + le64_to_cpu(i->journal_seq)); + offset += sectors; + + printbuf_indent_add(out, 4); + + for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { + struct bkey u; + + bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); + prt_newline(out); + } + + printbuf_indent_sub(out, 4); + } +out: + if (bio) + bio_put(bio); + kvpfree(n_ondisk, btree_bytes(c)); + percpu_ref_put(&ca->io_ref); +} + #ifdef CONFIG_DEBUG_FS /* XXX: bch_fs refcounting */ diff --git a/libbcachefs/debug.h b/libbcachefs/debug.h index 0b86736..2c37143 100644 --- a/libbcachefs/debug.h +++ b/libbcachefs/debug.h @@ -9,6 +9,8 @@ struct btree; struct bch_fs; void __bch2_btree_verify(struct bch_fs *, struct btree *); +void bch2_btree_node_ondisk_to_text(struct printbuf *, struct bch_fs *, + const struct btree *); static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b) { diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index c0342e6..7d43fd4 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -213,8 +213,9 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf) } } +/* XXX: this is a non-mempoolified memory allocation: */ static int ec_stripe_buf_init(struct ec_stripe_buf *buf, - unsigned offset, unsigned size) + unsigned offset, unsigned size) { struct bch_stripe *v = &buf->key.v; unsigned csum_granularity = 1U << v->csum_granularity_bits; @@ -241,7 +242,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf, return 0; err: ec_stripe_buf_exit(buf); - return -ENOMEM; + return -BCH_ERR_ENOMEM_stripe_buf; } /* Checksumming: */ @@ -914,6 +915,9 @@ static int ec_stripe_update_extent(struct btree_trans *trans, b = bch2_backpointer_get_node(trans, &node_iter, bucket, *bp_offset, bp); bch2_trans_iter_exit(trans, &node_iter); + if (!b) + return 0; + prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); bch2_backpointer_to_text(&buf, &bp); @@ -1099,6 +1103,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) } BUG_ON(!s->allocated); + BUG_ON(!s->idx); ec_generate_ec(&s->new_stripe); @@ -1143,7 +1148,12 @@ err: } } - bch2_stripe_close(c, s); + mutex_lock(&c->ec_stripe_new_lock); + list_del(&s->list); + mutex_unlock(&c->ec_stripe_new_lock); + + if (s->idx) + bch2_stripe_close(c, s); ec_stripe_buf_exit(&s->existing_stripe); ec_stripe_buf_exit(&s->new_stripe); @@ -1157,10 +1167,8 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c) mutex_lock(&c->ec_stripe_new_lock); list_for_each_entry(s, &c->ec_stripe_new_list, list) - if (!atomic_read(&s->pin)) { - list_del(&s->list); + if (!atomic_read(&s->pin)) goto out; - } s = NULL; out: mutex_unlock(&c->ec_stripe_new_lock); @@ -1188,14 +1196,6 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c) bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create); } -static void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s) -{ - BUG_ON(atomic_read(&s->pin) <= 0); - - if (atomic_dec_and_test(&s->pin)) - bch2_ec_do_stripe_creates(c); -} - static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s = h->s; @@ -1212,14 +1212,6 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) ec_stripe_new_put(c, s); } -/* have a full bucket - hand it off to be erasure coded: */ -void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob) -{ - struct ec_stripe_new *s = ob->ec; - - ec_stripe_new_put(c, s); -} - void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob) { struct ec_stripe_new *s = ob->ec; @@ -1236,6 +1228,8 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp) if (!ob) return NULL; + BUG_ON(!ob->ec->new_stripe.data[ob->ec_idx]); + ca = bch_dev_bkey_exists(c, ob->dev); offset = ca->mi.bucket_size - ob->sectors_free; @@ -1436,6 +1430,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ bool have_cache = true; int ret = 0; + BUG_ON(h->s->new_stripe.key.v.nr_blocks != h->s->nr_data + h->s->nr_parity); + BUG_ON(h->s->new_stripe.key.v.nr_redundant != h->s->nr_parity); + for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) { __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d); if (i < h->s->nr_data) @@ -1546,9 +1543,13 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri s64 idx; int ret; + /* + * If we can't allocate a new stripe, and there's no stripes with empty + * blocks for us to reuse, that means we have to wait on copygc: + */ idx = get_existing_stripe(c, h); if (idx < 0) - return -BCH_ERR_ENOSPC_stripe_reuse; + return -BCH_ERR_stripe_alloc_blocked; ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); if (ret) { @@ -1558,12 +1559,14 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri return ret; } - if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) { - /* - * this is a problem: we have deleted from the - * stripes heap already - */ - BUG(); + BUG_ON(h->s->existing_stripe.key.v.nr_redundant != h->s->nr_parity); + h->s->nr_data = h->s->existing_stripe.key.v.nr_blocks - + h->s->existing_stripe.key.v.nr_redundant; + + ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); + if (ret) { + bch2_stripe_close(c, h->s); + return ret; } BUG_ON(h->s->existing_stripe.size != h->blocksize); @@ -1675,9 +1678,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, bch_err(c, "failed to allocate new stripe"); goto err; } - - if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) - BUG(); } if (h->s->allocated) @@ -1690,7 +1690,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, ret = new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?: __bch2_ec_stripe_head_reserve(trans, h); if (!ret) - goto allocated; + goto allocate_buf; if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || bch2_err_matches(ret, ENOMEM)) goto err; @@ -1703,8 +1703,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, ret = __bch2_ec_stripe_head_reuse(trans, h); if (!ret) break; - if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl) - ret = -BCH_ERR_stripe_alloc_blocked; if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) goto err; @@ -1723,10 +1721,16 @@ alloc_existing: ret = new_stripe_alloc_buckets(trans, h, reserve, cl); if (ret) goto err; -allocated: + +allocate_buf: + ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); + if (ret) + goto err; + h->s->allocated = true; +allocated: BUG_ON(!h->s->idx); - + BUG_ON(!h->s->new_stripe.data[0]); BUG_ON(trans->restarted); return h; err: @@ -1839,8 +1843,8 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) h->target, h->algo, h->redundancy); if (h->s) - prt_printf(out, "\tpending: blocks %u+%u allocated %u\n", - h->s->nr_data, h->s->nr_parity, + prt_printf(out, "\tpending: idx %llu blocks %u+%u allocated %u\n", + h->s->idx, h->s->nr_data, h->s->nr_parity, bitmap_weight(h->s->blocks_allocated, h->s->nr_data)); } @@ -1848,9 +1852,9 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) mutex_lock(&c->ec_stripe_new_lock); list_for_each_entry(s, &c->ec_stripe_new_list, list) { - prt_printf(out, "\tin flight: blocks %u+%u pin %u\n", - s->nr_data, s->nr_parity, - atomic_read(&s->pin)); + prt_printf(out, "\tin flight: idx %llu blocks %u+%u pin %u\n", + s->idx, s->nr_data, s->nr_parity, + atomic_read(&s->pin)); } mutex_unlock(&c->ec_stripe_new_lock); } diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 56d1b5e..d112aea 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -198,7 +198,6 @@ int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *); void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *); -void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *); void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *); int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *); @@ -213,6 +212,21 @@ void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t); void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t); void bch2_do_stripe_deletes(struct bch_fs *); +void bch2_ec_do_stripe_creates(struct bch_fs *); + +static inline void ec_stripe_new_get(struct ec_stripe_new *s) +{ + atomic_inc(&s->pin); +} + +static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s) +{ + BUG_ON(atomic_read(&s->pin) <= 0); + BUG_ON(!s->err && !s->idx); + + if (atomic_dec_and_test(&s->pin)) + bch2_ec_do_stripe_creates(c); +} void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 283303d..162e315 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -3,11 +3,11 @@ #define _BCACHEFS_ERRCODE_H #define BCH_ERRCODES() \ + x(ENOMEM, ENOMEM_stripe_buf) \ x(ENOSPC, ENOSPC_disk_reservation) \ x(ENOSPC, ENOSPC_bucket_alloc) \ x(ENOSPC, ENOSPC_disk_label_add) \ x(ENOSPC, ENOSPC_stripe_create) \ - x(ENOSPC, ENOSPC_stripe_reuse) \ x(ENOSPC, ENOSPC_inode_create) \ x(ENOSPC, ENOSPC_str_hash_create) \ x(ENOSPC, ENOSPC_snapshot_create) \ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index e088bbc..b511735 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1217,7 +1217,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, bch2_page_state_create(page, __GFP_NOFAIL); - bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC); + rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; rbio->bio.bi_iter.bi_sector = (sector_t) page->index << PAGE_SECTORS_SHIFT; BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0)); @@ -2017,7 +2017,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) &c->bio_read); bio->bi_end_io = bch2_direct_IO_read_split_endio; start: - bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC); + bio->bi_opf = REQ_OP_READ|REQ_SYNC; bio->bi_iter.bi_sector = offset >> 9; bio->bi_private = dio; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index de30dac..ea0fd63 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -835,6 +835,10 @@ static void bch2_write_index(struct closure *cl) struct write_point *wp = op->wp; struct workqueue_struct *wq = index_update_wq(op); + if ((op->flags & BCH_WRITE_DONE) && + (op->flags & BCH_WRITE_MOVE)) + bch2_bio_free_pages_pool(op->c, &op->wbio.bio); + barrier(); /* diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 11ea109..5e952d6 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -41,18 +41,19 @@ static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats) } struct moving_io { - struct list_head list; - struct closure cl; - bool read_completed; + struct list_head list; + struct move_bucket_in_flight *b; + struct closure cl; + bool read_completed; - unsigned read_sectors; - unsigned write_sectors; + unsigned read_sectors; + unsigned write_sectors; - struct bch_read_bio rbio; + struct bch_read_bio rbio; - struct data_update write; + struct data_update write; /* Must be last since it is variable size */ - struct bio_vec bi_inline_vecs[0]; + struct bio_vec bi_inline_vecs[0]; }; static void move_free(struct moving_io *io) @@ -60,6 +61,9 @@ static void move_free(struct moving_io *io) struct moving_context *ctxt = io->write.ctxt; struct bch_fs *c = ctxt->c; + if (io->b) + atomic_dec(&io->b->count); + bch2_data_update_exit(&io->write); wake_up(&ctxt->wait); bch2_write_ref_put(c, BCH_WRITE_REF_move); @@ -235,6 +239,7 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans, static int bch2_move_extent(struct btree_trans *trans, struct btree_iter *iter, struct moving_context *ctxt, + struct move_bucket_in_flight *bucket_in_flight, struct bch_io_opts io_opts, enum btree_id btree_id, struct bkey_s_c k, @@ -295,7 +300,7 @@ static int bch2_move_extent(struct btree_trans *trans, bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); io->rbio.bio.bi_iter.bi_size = sectors << 9; - bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0); + io->rbio.bio.bi_opf = REQ_OP_READ; io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); io->rbio.bio.bi_end_io = move_read_endio; @@ -320,6 +325,11 @@ static int bch2_move_extent(struct btree_trans *trans, atomic64_add(k.k->size, &ctxt->stats->sectors_moved); } + if (bucket_in_flight) { + io->b = bucket_in_flight; + atomic_inc(&io->b->count); + } + this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size); trace_move_extent_read(k.k); @@ -522,8 +532,8 @@ static int __bch2_move_data(struct moving_context *ctxt, k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); - ret2 = bch2_move_extent(&trans, &iter, ctxt, io_opts, - btree_id, k, data_opts); + ret2 = bch2_move_extent(&trans, &iter, ctxt, NULL, + io_opts, btree_id, k, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; @@ -665,6 +675,7 @@ failed_to_evacuate: int __bch2_evacuate_bucket(struct btree_trans *trans, struct moving_context *ctxt, + struct move_bucket_in_flight *bucket_in_flight, struct bpos bucket, int gen, struct data_update_opts _data_opts) { @@ -753,8 +764,9 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, i++; } - ret = bch2_move_extent(trans, &iter, ctxt, io_opts, - bp.btree_id, k, data_opts); + ret = bch2_move_extent(trans, &iter, ctxt, + bucket_in_flight, + io_opts, bp.btree_id, k, data_opts); bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -834,7 +846,7 @@ int bch2_evacuate_bucket(struct bch_fs *c, bch2_trans_init(&trans, c, 0, 0); bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts); + ret = __bch2_evacuate_bucket(&trans, &ctxt, NULL, bucket, gen, data_opts); bch2_moving_ctxt_exit(&ctxt); bch2_trans_exit(&trans); diff --git a/libbcachefs/move.h b/libbcachefs/move.h index 3b283af..4c00138 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -70,6 +70,7 @@ int bch2_move_data(struct bch_fs *, int __bch2_evacuate_bucket(struct btree_trans *, struct moving_context *, + struct move_bucket_in_flight *, struct bpos, int, struct data_update_opts); int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, diff --git a/libbcachefs/move_types.h b/libbcachefs/move_types.h index 9df6d18..285ffdb 100644 --- a/libbcachefs/move_types.h +++ b/libbcachefs/move_types.h @@ -16,4 +16,10 @@ struct bch_move_stats { atomic64_t sectors_raced; }; +struct move_bucket_in_flight { + struct bpos bucket; + u8 gen; + atomic_t count; +}; + #endif /* _BCACHEFS_MOVE_TYPES_H */ diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 80f9227..79aaa45 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -56,8 +56,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &_a); *gen = a->gen; - ret = (a->data_type == BCH_DATA_btree || - a->data_type == BCH_DATA_user) && + ret = data_type_movable(a->data_type) && a->fragmentation_lru && a->fragmentation_lru <= time; @@ -72,47 +71,44 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, return ret; } -struct copygc_bucket_in_flight { - struct bpos bucket; - u8 gen; - struct moving_context ctxt; -}; - -typedef FIFO(struct copygc_bucket_in_flight) copygc_buckets_in_flight; +typedef FIFO(struct move_bucket_in_flight) move_buckets_in_flight; -struct copygc_bucket { +struct move_bucket { struct bpos bucket; u8 gen; }; -typedef DARRAY(struct copygc_bucket) copygc_buckets; +typedef DARRAY(struct move_bucket) move_buckets; -static int copygc_bucket_cmp(const void *_l, const void *_r) +static int move_bucket_cmp(const void *_l, const void *_r) { - const struct copygc_bucket *l = _l; - const struct copygc_bucket *r = _r; + const struct move_bucket *l = _l; + const struct move_bucket *r = _r; return bpos_cmp(l->bucket, r->bucket) ?: cmp_int(l->gen, r->gen); } -static bool bucket_in_flight(copygc_buckets *buckets_sorted, struct copygc_bucket b) +static bool bucket_in_flight(move_buckets *buckets_sorted, struct move_bucket b) { return bsearch(&b, buckets_sorted->data, buckets_sorted->nr, sizeof(buckets_sorted->data[0]), - copygc_bucket_cmp) != NULL; + move_bucket_cmp) != NULL; } -static void copygc_buckets_wait(struct btree_trans *trans, - copygc_buckets_in_flight *buckets_in_flight, - size_t nr, bool verify_evacuated) +static void move_buckets_wait(struct btree_trans *trans, + struct moving_context *ctxt, + move_buckets_in_flight *buckets_in_flight, + size_t nr, bool verify_evacuated) { while (!fifo_empty(buckets_in_flight)) { - struct copygc_bucket_in_flight *i = &fifo_peek_front(buckets_in_flight); + struct move_bucket_in_flight *i = &fifo_peek_front(buckets_in_flight); - if (fifo_used(buckets_in_flight) <= nr && - closure_nr_remaining(&i->ctxt.cl) != 1) + if (fifo_used(buckets_in_flight) > nr) + move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count)); + + if (atomic_read(&i->count)) break; /* @@ -120,31 +116,34 @@ static void copygc_buckets_wait(struct btree_trans *trans, * reads, which inits another btree_trans; this one must be * unlocked: */ - bch2_trans_unlock(trans); - bch2_moving_ctxt_exit(&i->ctxt); if (verify_evacuated) bch2_verify_bucket_evacuated(trans, i->bucket, i->gen); buckets_in_flight->front++; } + + bch2_trans_unlock(trans); } static int bch2_copygc_get_buckets(struct btree_trans *trans, - copygc_buckets_in_flight *buckets_in_flight, - copygc_buckets *buckets) + struct moving_context *ctxt, + move_buckets_in_flight *buckets_in_flight, + move_buckets *buckets) { struct btree_iter iter; - copygc_buckets buckets_sorted = { 0 }; - struct copygc_bucket_in_flight *i; + move_buckets buckets_sorted = { 0 }; + struct move_bucket_in_flight *i; struct bkey_s_c k; - size_t fifo_iter; + size_t fifo_iter, nr_to_get; int ret; - copygc_buckets_wait(trans, buckets_in_flight, buckets_in_flight->size / 2, true); + move_buckets_wait(trans, ctxt, buckets_in_flight, buckets_in_flight->size / 2, true); + + nr_to_get = max(16UL, fifo_used(buckets_in_flight) / 4); fifo_for_each_entry_ptr(i, buckets_in_flight, fifo_iter) { - ret = darray_push(&buckets_sorted, ((struct copygc_bucket) {i->bucket, i->gen})); + ret = darray_push(&buckets_sorted, ((struct move_bucket) {i->bucket, i->gen})); if (ret) { - bch_err(trans->c, "error allocating copygc_buckets_sorted"); + bch_err(trans->c, "error allocating move_buckets_sorted"); goto err; } } @@ -152,19 +151,19 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans, sort(buckets_sorted.data, buckets_sorted.nr, sizeof(buckets_sorted.data[0]), - copygc_bucket_cmp, + move_bucket_cmp, NULL); ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), 0, k, ({ - struct copygc_bucket b = { .bucket = u64_to_bucket(k.k->p.offset) }; + struct move_bucket b = { .bucket = u64_to_bucket(k.k->p.offset) }; int ret = 0; if (!bucket_in_flight(&buckets_sorted, b) && bch2_bucket_is_movable(trans, b.bucket, lru_pos_time(k.k->p), &b.gen)) - ret = darray_push(buckets, b) ?: buckets->nr >= fifo_free(buckets_in_flight); + ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get; ret; })); @@ -175,16 +174,17 @@ err: } static int bch2_copygc(struct btree_trans *trans, - copygc_buckets_in_flight *buckets_in_flight, - struct bch_move_stats *stats) + struct moving_context *ctxt, + move_buckets_in_flight *buckets_in_flight) { struct bch_fs *c = trans->c; + struct bch_move_stats move_stats; struct data_update_opts data_opts = { .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, }; - copygc_buckets buckets = { 0 }; - struct copygc_bucket_in_flight *f; - struct copygc_bucket *i; + move_buckets buckets = { 0 }; + struct move_bucket_in_flight *f; + struct move_bucket *i; int ret = 0; ret = bch2_btree_write_buffer_flush(trans); @@ -192,7 +192,10 @@ static int bch2_copygc(struct btree_trans *trans, __func__, bch2_err_str(ret))) return ret; - ret = bch2_copygc_get_buckets(trans, buckets_in_flight, &buckets); + bch2_move_stats_init(&move_stats, "copygc"); + ctxt->stats = &move_stats; + + ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets); if (ret) goto err; @@ -203,11 +206,9 @@ static int bch2_copygc(struct btree_trans *trans, f = fifo_push_ref(buckets_in_flight); f->bucket = i->bucket; f->gen = i->gen; - bch2_moving_ctxt_init(&f->ctxt, c, NULL, NULL, //stats, - writepoint_ptr(&c->copygc_write_point), - false); + atomic_set(&f->count, 0); - ret = __bch2_evacuate_bucket(trans, &f->ctxt, f->bucket, f->gen, data_opts); + ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket, f->gen, data_opts); if (ret) goto err; } @@ -221,7 +222,8 @@ err: if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); - trace_and_count(c, copygc, c, atomic64_read(&stats->sectors_moved), 0, 0, 0); + trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0); + ctxt->stats = NULL; return ret; } @@ -244,13 +246,18 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) struct bch_dev *ca; unsigned dev_idx; s64 wait = S64_MAX, fragmented_allowed, fragmented; + unsigned i; for_each_rw_member(ca, c, dev_idx) { struct bch_dev_usage usage = bch2_dev_usage_read(ca); fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_stripe) * ca->mi.bucket_size) >> 1); - fragmented = usage.d[BCH_DATA_user].fragmented; + fragmented = 0; + + for (i = 0; i < BCH_DATA_NR; i++) + if (data_type_movable(i)) + fragmented += usage.d[i].fragmented; wait = min(wait, max(0LL, fragmented_allowed - fragmented)); } @@ -274,32 +281,34 @@ static int bch2_copygc_thread(void *arg) { struct bch_fs *c = arg; struct btree_trans trans; - struct bch_move_stats move_stats; + struct moving_context ctxt; struct io_clock *clock = &c->io_clock[WRITE]; - copygc_buckets_in_flight copygc_buckets; + move_buckets_in_flight move_buckets; u64 last, wait; int ret = 0; - if (!init_fifo(©gc_buckets, 1 << 14, GFP_KERNEL)) { + if (!init_fifo(&move_buckets, 1 << 14, GFP_KERNEL)) { bch_err(c, "error allocating copygc buckets in flight"); return -ENOMEM; } set_freezable(); - bch2_move_stats_init(&move_stats, "copygc"); bch2_trans_init(&trans, c, 0, 0); + bch2_moving_ctxt_init(&ctxt, c, NULL, NULL, + writepoint_ptr(&c->copygc_write_point), + false); while (!ret && !kthread_should_stop()) { bch2_trans_unlock(&trans); - - try_to_freeze(); cond_resched(); - kthread_wait(freezing(current) || c->copy_gc_enabled); + if (!c->copy_gc_enabled) { + move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true); + kthread_wait_freezable(c->copy_gc_enabled); + } if (unlikely(freezing(current))) { - copygc_buckets_wait(&trans, ©gc_buckets, 0, true); - bch2_trans_unlock(&trans); + move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true); __refrigerator(false); continue; } @@ -308,6 +317,7 @@ static int bch2_copygc_thread(void *arg) wait = bch2_copygc_wait_amount(c); if (wait > clock->max_slop) { + move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true); trace_and_count(c, copygc_wait, c, wait, last + wait); c->copygc_wait = last + wait; bch2_kthread_io_clock_wait(clock, last + wait, @@ -318,15 +328,15 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(&trans, ©gc_buckets, &move_stats); + ret = bch2_copygc(&trans, &ctxt, &move_buckets); c->copygc_running = false; wake_up(&c->copygc_running_wq); } - copygc_buckets_wait(&trans, ©gc_buckets, 0, !ret); - free_fifo(©gc_buckets); + bch2_moving_ctxt_exit(&ctxt); bch2_trans_exit(&trans); + free_fifo(&move_buckets); return 0; } diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 76c2691..afbf82d 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -329,22 +329,22 @@ enum opt_type { x(norecovery, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't replay the journal") \ x(keep_journal, u8, \ 0, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't free journal entries/keys after startup")\ x(read_entire_journal, u8, \ 0, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Read all journal entries, not just dirty ones")\ x(read_journal_only, u8, \ 0, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Only read the journal, skip the rest of recovery")\ x(journal_transaction_names, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ @@ -354,7 +354,7 @@ enum opt_type { x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't open device in exclusive mode") \ x(direct_io, u8, \ OPT_FS|OPT_MOUNT, \ @@ -364,38 +364,38 @@ enum opt_type { x(sb, u64, \ OPT_MOUNT, \ OPT_UINT(0, S64_MAX), \ - BCH2_NO_SB_OPT, BCH_SB_SECTOR, \ + BCH2_NO_SB_OPT, BCH_SB_SECTOR, \ "offset", "Sector offset of superblock") \ x(read_only, u8, \ OPT_FS, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, NULL) \ x(nostart, u8, \ 0, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don\'t start filesystem, only open devices") \ x(reconstruct_alloc, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Reconstruct alloc btree") \ x(version_upgrade, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Set superblock to latest version,\n" \ "allowing any new features to be used") \ x(buckets_nouse, u8, \ 0, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Allocate the buckets_nouse bitmap") \ x(project, u8, \ OPT_INODE, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, NULL) \ x(nocow, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ @@ -411,9 +411,9 @@ enum opt_type { NULL, "Enable nocow mode: enables runtime locking in\n"\ "data move path needed if nocow will ever be in use\n")\ x(no_data_io, u8, \ - OPT_FS|OPT_MOUNT, \ + OPT_MOUNT, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Skip submit_bio() for data reads and writes, " \ "for performance testing purposes") \ x(fs_size, u64, \ diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 87446f7..d2e6adc 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -233,7 +233,13 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, orig->k.type = KEY_TYPE_reflink_p; r_p = bkey_i_to_reflink_p(orig); set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); + + /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */ +#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) + __underlying_memset(&r_p->v, 0, sizeof(r_p->v)); +#else memset(&r_p->v, 0, sizeof(r_p->v)); +#endif r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); diff --git a/libbcachefs/util.c b/libbcachefs/util.c index bf5ffb4..56c21c6 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -761,10 +761,10 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size) } } -int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) +int _bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) { while (size) { - struct page *page = alloc_page(gfp_mask); + struct page *page = _alloc_pages(gfp_mask, 0); unsigned len = min_t(size_t, PAGE_SIZE, size); if (!page) diff --git a/libbcachefs/util.h b/libbcachefs/util.h index d994c15..ecfe540 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -60,12 +60,14 @@ static inline void vpfree(void *p, size_t size) free_pages((unsigned long) p, get_order(size)); } -static inline void *vpmalloc(size_t size, gfp_t gfp_mask) +static inline void *_vpmalloc(size_t size, gfp_t gfp_mask) { - return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN, + return (void *) _get_free_pages(gfp_mask|__GFP_NOWARN, get_order(size)) ?: __vmalloc(size, gfp_mask); } +#define vpmalloc(_size, _gfp) \ + alloc_hooks(_vpmalloc(_size, _gfp), void *, NULL) static inline void kvpfree(void *p, size_t size) { @@ -75,12 +77,14 @@ static inline void kvpfree(void *p, size_t size) vpfree(p, size); } -static inline void *kvpmalloc(size_t size, gfp_t gfp_mask) +static inline void *_kvpmalloc(size_t size, gfp_t gfp_mask) { return size < PAGE_SIZE - ? kmalloc(size, gfp_mask) - : vpmalloc(size, gfp_mask); + ? _kmalloc(size, gfp_mask) + : _vpmalloc(size, gfp_mask); } +#define kvpmalloc(_size, _gfp) \ + alloc_hooks(_kvpmalloc(_size, _gfp), void *, NULL) int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t); @@ -530,7 +534,9 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) } void bch2_bio_map(struct bio *bio, void *base, size_t); -int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t); +int _bch2_bio_alloc_pages(struct bio *, size_t, gfp_t); +#define bch2_bio_alloc_pages(_bio, _size, _gfp) \ + alloc_hooks(_bch2_bio_alloc_pages(_bio, _size, _gfp), int, -ENOMEM) static inline sector_t bdev_sectors(struct block_device *bdev) { @@ -566,11 +572,9 @@ do { \ #define kthread_wait_freezable(cond) \ ({ \ int _ret = 0; \ - bool frozen; \ - \ while (1) { \ set_current_state(TASK_INTERRUPTIBLE); \ - if (kthread_freezable_should_stop(&frozen)) { \ + if (kthread_should_stop()) { \ _ret = -1; \ break; \ } \ @@ -579,6 +583,7 @@ do { \ break; \ \ schedule(); \ + try_to_freeze(); \ } \ set_current_state(TASK_RUNNING); \ _ret; \ -- 2.39.2