]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 3856459b1b bcachefs: bch2_btree_iter_peek_node_and_restart()
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 6 Mar 2023 07:35:56 +0000 (02:35 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 6 Mar 2023 09:11:50 +0000 (04:11 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
31 files changed:
.bcachefs_revision
Makefile.compiler
include/linux/slab.h
libbcachefs/acl.c
libbcachefs/acl.h
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/bkey.h
libbcachefs/bset.c
libbcachefs/bset.h
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/clock.c
libbcachefs/debug.c
libbcachefs/debug.h
libbcachefs/ec.c
libbcachefs/ec.h
libbcachefs/errcode.h
libbcachefs/fs-io.c
libbcachefs/io.c
libbcachefs/move.c
libbcachefs/move.h
libbcachefs/move_types.h
libbcachefs/movinggc.c
libbcachefs/opts.h
libbcachefs/reflink.c
libbcachefs/util.c
libbcachefs/util.h

index 48ce6994bc10cf8905141a42cd0bcb8335580859..2845be6830c7c935f92ba55579d82278f04c41df 100644 (file)
@@ -1 +1 @@
-171da96d76d03a12872c8c9e2d02602c3ddfcb5f
+3856459b1b9f37cebee2bca3c9edcafaf393aa98
index 20d353dcabfbc50b0419936724c57a4a6f2f22e3..3d8adfd34af1baf9e9155af660cdafa708000612 100644 (file)
@@ -63,11 +63,11 @@ cc-disable-warning = $(call try-run,\
 
 # gcc-min-version
 # Usage: cflags-$(call gcc-min-version, 70100) += -foo
-gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y)
+gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1)
 
 # clang-min-version
 # Usage: cflags-$(call clang-min-version, 110000) += -foo
-clang-min-version = $(shell [ $(CONFIG_CLANG_VERSION)0 -ge $(1)0 ] && echo y)
+clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1)
 
 # ld-option
 # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
index ff122ff9abda259e6ad8574b7544c5491e875f45..144e333e0a0b39dfe7746aa2e494f6807eb7a2f3 100644 (file)
 #include <stdlib.h>
 #include <sys/mman.h>
 
+#define alloc_hooks(_do, ...)          _do
+
 #define ARCH_KMALLOC_MINALIGN          16
 #define KMALLOC_MAX_SIZE               SIZE_MAX
 
-static inline void *kmalloc(size_t size, gfp_t flags)
+static inline void *_kmalloc(size_t size, gfp_t flags)
 {
        unsigned i;
        void *p;
@@ -44,6 +46,7 @@ static inline void *kmalloc(size_t size, gfp_t flags)
 
        return p;
 }
+#define kmalloc                _kmalloc
 
 static inline void *krealloc(void *old, size_t size, gfp_t flags)
 {
@@ -94,7 +97,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t
 #define kvzalloc(size, flags)          kzalloc(size, flags)
 #define kvfree(p)                      kfree(p)
 
-static inline struct page *alloc_pages(gfp_t flags, unsigned int order)
+static inline struct page *_alloc_pages(gfp_t flags, unsigned int order)
 {
        size_t size = PAGE_SIZE << order;
        unsigned i;
@@ -114,9 +117,11 @@ static inline struct page *alloc_pages(gfp_t flags, unsigned int order)
 
        return p;
 }
+#define alloc_pages                    _alloc_pages
 
 #define alloc_page(gfp)                        alloc_pages(gfp, 0)
 
+#define _get_free_pages(gfp, order)    ((unsigned long) alloc_pages(gfp, order))
 #define __get_free_pages(gfp, order)   ((unsigned long) alloc_pages(gfp, order))
 #define __get_free_page(gfp)           __get_free_pages(gfp, 0)
 
index 9592541f7b5ce8159f77f70052a905e330125e78..5cb06ac589602da4abf45fa5506788290607bf16 100644 (file)
@@ -212,9 +212,10 @@ bch2_acl_to_xattr(struct btree_trans *trans,
        return xattr;
 }
 
-struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
+struct posix_acl *bch2_get_acl(struct user_namespace *mnt_userns,
+                              struct dentry *dentry, int type)
 {
-       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
        struct btree_trans trans;
@@ -224,9 +225,6 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
        struct bkey_s_c k;
        int ret;
 
-       if (rcu)
-               return ERR_PTR(-ECHILD);
-
        bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
@@ -293,9 +291,10 @@ int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
 }
 
 int bch2_set_acl(struct user_namespace *mnt_userns,
-                struct inode *vinode, struct posix_acl *_acl, int type)
+                struct dentry *dentry,
+                struct posix_acl *_acl, int type)
 {
-       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct btree_trans trans;
        struct btree_iter inode_iter = { NULL };
index 2d76a4897ba89ae3d4ef43135ecea243af9e1a7e..ac206f6584e9856828f061d3bbe6343251f1c901 100644 (file)
@@ -26,12 +26,12 @@ typedef struct {
        __le32          a_version;
 } bch_acl_header;
 
-struct posix_acl *bch2_get_acl(struct inode *, int, bool);
+struct posix_acl *bch2_get_acl(struct user_namespace *, struct dentry *, int);
 
 int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
                       struct bch_inode_unpacked *,
                       struct posix_acl *, int);
-int bch2_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int);
+int bch2_set_acl(struct user_namespace *, struct dentry *, struct posix_acl *, int);
 int bch2_acl_chmod(struct btree_trans *, subvol_inum,
                   struct bch_inode_unpacked *,
                   umode_t, struct posix_acl **);
index 1aa7c7a023b3d5f7839adc2759bc6d649adb028f..c9ff590ef978f7f17445c90055cb7f349ebf6501 100644 (file)
@@ -74,14 +74,21 @@ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
        return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
 }
 
+#define DATA_TYPES_MOVABLE             \
+       ((1U << BCH_DATA_btree)|        \
+        (1U << BCH_DATA_user)|         \
+        (1U << BCH_DATA_stripe))
+
+static inline bool data_type_movable(enum bch_data_type type)
+{
+       return (1U << type) & DATA_TYPES_MOVABLE;
+}
+
 static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
                                              struct bch_dev *ca)
 {
-       if (a.data_type != BCH_DATA_btree &&
-           a.data_type != BCH_DATA_user)
-               return 0;
-
-       if (a.dirty_sectors >= ca->mi.bucket_size)
+       if (!data_type_movable(a.data_type) ||
+           a.dirty_sectors >= ca->mi.bucket_size)
                return 0;
 
        return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
index b2755c1e688955a4a326b78ed80394a043ba2d82..3a67ac0d913512f89e2ff61d2934af27155076cd 100644 (file)
@@ -97,7 +97,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
        struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
 
        if (ob->ec) {
-               bch2_ec_bucket_written(c, ob);
+               ec_stripe_new_put(c, ob->ec);
                return;
        }
 
@@ -796,11 +796,11 @@ got_bucket:
 
        ob->ec_idx      = ec_idx;
        ob->ec          = h->s;
+       ec_stripe_new_get(h->s);
 
        ret = add_new_bucket(c, ptrs, devs_may_alloc,
                             nr_replicas, nr_effective,
                             have_cache, flags, ob);
-       atomic_inc(&h->s->pin);
 out_put_head:
        bch2_ec_stripe_head_put(c, h);
        return ret;
@@ -1383,19 +1383,24 @@ static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, str
        unsigned data_type = ob->data_type;
        barrier(); /* READ_ONCE() doesn't work on bitfields */
 
-       prt_printf(out, "%zu ref %u %s%s%s %u:%llu gen %u\n",
+       prt_printf(out, "%zu ref %u %s %u:%llu gen %u",
                   ob - c->open_buckets,
                   atomic_read(&ob->pin),
                   data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
-                  ob->ec ? " ec" : "",
-                  ob->on_partial_list ? " partial" : "",
                   ob->dev, ob->bucket, ob->gen);
+       if (ob->ec)
+               prt_printf(out, " ec idx %llu", ob->ec->idx);
+       if (ob->on_partial_list)
+               prt_str(out, " partial");
+       prt_newline(out);
 }
 
 void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
 {
        struct open_bucket *ob;
 
+       out->atomic++;
+
        for (ob = c->open_buckets;
             ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
             ob++) {
@@ -1404,17 +1409,23 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
                        bch2_open_bucket_to_text(out, c, ob);
                spin_unlock(&ob->lock);
        }
+
+       --out->atomic;
 }
 
 void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
 {
        unsigned i;
 
+       out->atomic++;
        spin_lock(&c->freelist_lock);
+
        for (i = 0; i < c->open_buckets_partial_nr; i++)
                bch2_open_bucket_to_text(out, c,
                                c->open_buckets + c->open_buckets_partial[i]);
+
        spin_unlock(&c->freelist_lock);
+       --out->atomic;
 }
 
 static const char * const bch2_write_point_states[] = {
index 983572efd0814688d6fb1f6a184c1d247d70739d..2650bd639b55998c9b6a31f6ce2312520d687a22 100644 (file)
@@ -497,7 +497,7 @@ static inline struct bpos bkey_unpack_pos(const struct btree *b,
 
 /* Disassembled bkeys */
 
-static inline struct bkey_s_c bkey_disassemble(struct btree *b,
+static inline struct bkey_s_c bkey_disassemble(const struct btree *b,
                                               const struct bkey_packed *k,
                                               struct bkey *u)
 {
@@ -507,7 +507,7 @@ static inline struct bkey_s_c bkey_disassemble(struct btree *b,
 }
 
 /* non const version: */
-static inline struct bkey_s __bkey_disassemble(struct btree *b,
+static inline struct bkey_s __bkey_disassemble(const struct btree *b,
                                               struct bkey_packed *k,
                                               struct bkey *u)
 {
index 3bd50f12f5a4ce2306fcc666cddc6f27abc486ee..0216ad96777a370808d328634a2fe65cc44b54b4 100644 (file)
@@ -1536,9 +1536,9 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
 
 /* Mergesort */
 
-void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats)
+void bch2_btree_keys_stats(const struct btree *b, struct bset_stats *stats)
 {
-       struct bset_tree *t;
+       const struct bset_tree *t;
 
        for_each_bset(b, t) {
                enum bset_aux_tree_type type = bset_aux_tree_type(t);
index 2105e7836557c854c12070f309f5116b8562eaf7..632c2b8c54609b4be37f11e18868e4c41dcb736b 100644 (file)
@@ -213,7 +213,7 @@ static inline size_t btree_aux_data_u64s(const struct btree *b)
             _k != btree_bkey_last(_b, _t);                             \
             _k = bkey_p_next(_k))
 
-static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
+static inline bool bset_has_ro_aux_tree(const struct bset_tree *t)
 {
        return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
 }
@@ -504,7 +504,7 @@ struct bset_stats {
        size_t failed;
 };
 
-void bch2_btree_keys_stats(struct btree *, struct bset_stats *);
+void bch2_btree_keys_stats(const struct btree *, struct bset_stats *);
 void bch2_bfloat_to_text(struct printbuf *, struct btree *,
                         struct bkey_packed *);
 
index a26331dbdb79ee494527d8c9ddee7455803b6c6a..e8530cceacf46e87e02e64317165938db1242cf3 100644 (file)
@@ -1202,7 +1202,7 @@ wait_on_io:
 }
 
 void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
-                            struct btree *b)
+                            const struct btree *b)
 {
        const struct bkey_format *f = &b->format;
        struct bset_stats stats;
@@ -1247,7 +1247,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
               stats.failed);
 }
 
-void bch2_btree_cache_to_text(struct printbuf *out, struct btree_cache *bc)
+void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
 {
        prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
        prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
index a0b9231b9d2cdef229a9fc34c1db6a4a0e84070b..4900ed45422eae2db978437fc8f1e32db7bd044e 100644 (file)
@@ -100,7 +100,7 @@ static inline unsigned btree_blocks(struct bch_fs *c)
 #define btree_node_root(_c, _b)        ((_c)->btree_roots[(_b)->c.btree_id].b)
 
 void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
-                            struct btree *);
-void bch2_btree_cache_to_text(struct printbuf *, struct btree_cache *);
+                            const struct btree *);
+void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
 
 #endif /* _BCACHEFS_BTREE_CACHE_H */
index 29163b46ba5980033fdbc9da5a74db9019c5c063..7a9cc3787442dc4377c35a0bd3b6334a911e8bde 100644 (file)
@@ -105,8 +105,8 @@ static void btree_bounce_free(struct bch_fs *c, size_t size,
                vpfree(p, size);
 }
 
-static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
-                               bool *used_mempool)
+static void *_btree_bounce_alloc(struct bch_fs *c, size_t size,
+                                bool *used_mempool)
 {
        unsigned flags = memalloc_nofs_save();
        void *p;
@@ -114,7 +114,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
        BUG_ON(size > btree_bytes(c));
 
        *used_mempool = false;
-       p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
+       p = _vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
        if (!p) {
                *used_mempool = true;
                p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
@@ -122,6 +122,8 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
        memalloc_nofs_restore(flags);
        return p;
 }
+#define btree_bounce_alloc(_c, _size, _used_mempool)           \
+       alloc_hooks(_btree_bounce_alloc(_c, _size, _used_mempool), void *, NULL)
 
 static void sort_bkey_ptrs(const struct btree *bt,
                           struct bkey_packed **ptrs, unsigned nr)
index fdb267dd483b37ed31e3e7206922dd8cd711007b..2d344993674980851625796832382aeb417fe751 100644 (file)
@@ -1723,6 +1723,17 @@ err:
        goto out;
 }
 
+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
+{
+       struct btree *b;
+
+       while (b = bch2_btree_iter_peek_node(iter),
+              bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
+               bch2_trans_begin(iter->trans);
+
+       return b;
+}
+
 struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
 {
        struct btree_trans *trans = iter->trans;
index 448be089956968f1ad2440764ff768f3ce493c55..6b7cef145cedd72185381592ca90b22a7b33e23d 100644 (file)
@@ -295,6 +295,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
 int __must_check bch2_btree_iter_traverse(struct btree_iter *);
 
 struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
 struct btree *bch2_btree_iter_next_node(struct btree_iter *);
 
 struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
@@ -521,18 +522,6 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
 
 u32 bch2_trans_begin(struct btree_trans *);
 
-static inline struct btree *
-__btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter)
-{
-       struct btree *b;
-
-       while (b = bch2_btree_iter_peek_node(iter),
-              bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
-               bch2_trans_begin(trans);
-
-       return b;
-}
-
 /*
  * XXX
  * this does not handle transaction restarts from bch2_btree_iter_next_node()
@@ -542,7 +531,7 @@ __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter
                              _locks_want, _depth, _flags, _b, _ret)    \
        for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
                                _start, _locks_want, _depth, _flags);   \
-            (_b) = __btree_iter_peek_node_and_restart((_trans), &(_iter)),\
+            (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)),    \
             !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b);                   \
             (_b) = bch2_btree_iter_next_node(&(_iter)))
 
index f3ffdbc38485baacb66cdd39097e4018440b0fd0..00d0e6725910155f1adbef4ebe375db493ff0de1 100644 (file)
@@ -122,7 +122,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
        }
 
        __set_current_state(TASK_RUNNING);
-       del_singleshot_timer_sync(&wait.cpu_timer);
+       del_timer_sync(&wait.cpu_timer);
        destroy_timer_on_stack(&wait.cpu_timer);
        bch2_io_timer_del(clock, &wait.io_timer);
 }
@@ -157,6 +157,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
        unsigned long now;
        unsigned i;
 
+       out->atomic++;
        spin_lock(&clock->timer_lock);
        now = atomic64_read(&clock->now);
 
@@ -165,6 +166,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
                       clock->timers.data[i]->fn,
                       clock->timers.data[i]->expire - now);
        spin_unlock(&clock->timer_lock);
+       --out->atomic;
 }
 
 void bch2_io_clock_exit(struct io_clock *clock)
index 0035fe875a478d8b14a1c6ea0472083f22c5725a..d1563caf7fb7bbe31d438116a8d2312e63af6c0e 100644 (file)
@@ -181,6 +181,125 @@ out:
        bch2_btree_node_io_unlock(b);
 }
 
+void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
+                                   const struct btree *b)
+{
+       struct btree_node *n_ondisk = NULL;
+       struct extent_ptr_decoded pick;
+       struct bch_dev *ca;
+       struct bio *bio = NULL;
+       unsigned offset = 0;
+       int ret;
+
+       if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) {
+               prt_printf(out, "error getting device to read from: invalid device\n");
+               return;
+       }
+
+       ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+       if (!bch2_dev_get_ioref(ca, READ)) {
+               prt_printf(out, "error getting device to read from: not online\n");
+               return;
+       }
+
+       n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+       if (!n_ondisk) {
+               prt_printf(out, "memory allocation failure\n");
+               goto out;
+       }
+
+       bio = bio_alloc_bioset(ca->disk_sb.bdev,
+                              buf_pages(n_ondisk, btree_bytes(c)),
+                              REQ_OP_READ|REQ_META,
+                              GFP_NOIO,
+                              &c->btree_bio);
+       bio->bi_iter.bi_sector  = pick.ptr.offset;
+       bch2_bio_map(bio, n_ondisk, btree_bytes(c));
+
+       ret = submit_bio_wait(bio);
+       if (ret) {
+               prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret));
+               goto out;
+       }
+
+       while (offset < btree_sectors(c)) {
+               struct bset *i;
+               struct nonce nonce;
+               struct bch_csum csum;
+               struct bkey_packed *k;
+               unsigned sectors;
+
+               if (!offset) {
+                       i = &n_ondisk->keys;
+
+                       if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
+                               prt_printf(out, "unknown checksum type at offset %u: %llu\n",
+                                          offset, BSET_CSUM_TYPE(i));
+                               goto out;
+                       }
+
+                       nonce = btree_nonce(i, offset << 9);
+                       csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk);
+
+                       if (bch2_crc_cmp(csum, n_ondisk->csum)) {
+                               prt_printf(out, "invalid checksum\n");
+                               goto out;
+                       }
+
+                       bset_encrypt(c, i, offset << 9);
+
+                       sectors = vstruct_sectors(n_ondisk, c->block_bits);
+               } else {
+                       struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9);
+
+                       i = &bne->keys;
+
+                       if (i->seq != n_ondisk->keys.seq)
+                               break;
+
+                       if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
+                               prt_printf(out, "unknown checksum type at offset %u: %llu\n",
+                                          offset, BSET_CSUM_TYPE(i));
+                               goto out;
+                       }
+
+                       nonce = btree_nonce(i, offset << 9);
+                       csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
+                       if (bch2_crc_cmp(csum, bne->csum)) {
+                               prt_printf(out, "invalid checksum");
+                               goto out;
+                       }
+
+                       bset_encrypt(c, i, offset << 9);
+
+                       sectors = vstruct_sectors(bne, c->block_bits);
+               }
+
+               prt_printf(out, "  offset %u version %u, journal seq %llu\n",
+                          offset,
+                          le16_to_cpu(i->version),
+                          le64_to_cpu(i->journal_seq));
+               offset += sectors;
+
+               printbuf_indent_add(out, 4);
+
+               for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
+                       struct bkey u;
+
+                       bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
+                       prt_newline(out);
+               }
+
+               printbuf_indent_sub(out, 4);
+       }
+out:
+       if (bio)
+               bio_put(bio);
+       kvpfree(n_ondisk, btree_bytes(c));
+       percpu_ref_put(&ca->io_ref);
+}
+
 #ifdef CONFIG_DEBUG_FS
 
 /* XXX: bch_fs refcounting */
index 0b86736e5e1bea3ae631f1484a6e83fc7e5deba8..2c37143b5fd1803e20eeddead3ad5160711d7a27 100644 (file)
@@ -9,6 +9,8 @@ struct btree;
 struct bch_fs;
 
 void __bch2_btree_verify(struct bch_fs *, struct btree *);
+void bch2_btree_node_ondisk_to_text(struct printbuf *, struct bch_fs *,
+                                   const struct btree *);
 
 static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
 {
index c0342e6094a92bc04fd1f43f40056ccf9bd9cb75..7d43fd4a6bb79a2727ba35c4883ba4b278f5a0cd 100644 (file)
@@ -213,8 +213,9 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
        }
 }
 
+/* XXX: this is a non-mempoolified memory allocation: */
 static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
-                              unsigned offset, unsigned size)
+                             unsigned offset, unsigned size)
 {
        struct bch_stripe *v = &buf->key.v;
        unsigned csum_granularity = 1U << v->csum_granularity_bits;
@@ -241,7 +242,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
        return 0;
 err:
        ec_stripe_buf_exit(buf);
-       return -ENOMEM;
+       return -BCH_ERR_ENOMEM_stripe_buf;
 }
 
 /* Checksumming: */
@@ -914,6 +915,9 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
                b = bch2_backpointer_get_node(trans, &node_iter, bucket, *bp_offset, bp);
                bch2_trans_iter_exit(trans, &node_iter);
 
+               if (!b)
+                       return 0;
+
                prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
                bch2_backpointer_to_text(&buf, &bp);
 
@@ -1099,6 +1103,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        }
 
        BUG_ON(!s->allocated);
+       BUG_ON(!s->idx);
 
        ec_generate_ec(&s->new_stripe);
 
@@ -1143,7 +1148,12 @@ err:
                        }
                }
 
-       bch2_stripe_close(c, s);
+       mutex_lock(&c->ec_stripe_new_lock);
+       list_del(&s->list);
+       mutex_unlock(&c->ec_stripe_new_lock);
+
+       if (s->idx)
+               bch2_stripe_close(c, s);
 
        ec_stripe_buf_exit(&s->existing_stripe);
        ec_stripe_buf_exit(&s->new_stripe);
@@ -1157,10 +1167,8 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
 
        mutex_lock(&c->ec_stripe_new_lock);
        list_for_each_entry(s, &c->ec_stripe_new_list, list)
-               if (!atomic_read(&s->pin)) {
-                       list_del(&s->list);
+               if (!atomic_read(&s->pin))
                        goto out;
-               }
        s = NULL;
 out:
        mutex_unlock(&c->ec_stripe_new_lock);
@@ -1188,14 +1196,6 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
                bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
 }
 
-static void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
-{
-       BUG_ON(atomic_read(&s->pin) <= 0);
-
-       if (atomic_dec_and_test(&s->pin))
-               bch2_ec_do_stripe_creates(c);
-}
-
 static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
 {
        struct ec_stripe_new *s = h->s;
@@ -1212,14 +1212,6 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
        ec_stripe_new_put(c, s);
 }
 
-/* have a full bucket - hand it off to be erasure coded: */
-void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
-{
-       struct ec_stripe_new *s = ob->ec;
-
-       ec_stripe_new_put(c, s);
-}
-
 void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
 {
        struct ec_stripe_new *s = ob->ec;
@@ -1236,6 +1228,8 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
        if (!ob)
                return NULL;
 
+       BUG_ON(!ob->ec->new_stripe.data[ob->ec_idx]);
+
        ca      = bch_dev_bkey_exists(c, ob->dev);
        offset  = ca->mi.bucket_size - ob->sectors_free;
 
@@ -1436,6 +1430,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
        bool have_cache = true;
        int ret = 0;
 
+       BUG_ON(h->s->new_stripe.key.v.nr_blocks         != h->s->nr_data + h->s->nr_parity);
+       BUG_ON(h->s->new_stripe.key.v.nr_redundant      != h->s->nr_parity);
+
        for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) {
                __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
                if (i < h->s->nr_data)
@@ -1546,9 +1543,13 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
        s64 idx;
        int ret;
 
+       /*
+        * If we can't allocate a new stripe, and there's no stripes with empty
+        * blocks for us to reuse, that means we have to wait on copygc:
+        */
        idx = get_existing_stripe(c, h);
        if (idx < 0)
-               return -BCH_ERR_ENOSPC_stripe_reuse;
+               return -BCH_ERR_stripe_alloc_blocked;
 
        ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
        if (ret) {
@@ -1558,12 +1559,14 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
                return ret;
        }
 
-       if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
-               /*
-                * this is a problem: we have deleted from the
-                * stripes heap already
-                */
-               BUG();
+       BUG_ON(h->s->existing_stripe.key.v.nr_redundant != h->s->nr_parity);
+       h->s->nr_data = h->s->existing_stripe.key.v.nr_blocks -
+               h->s->existing_stripe.key.v.nr_redundant;
+
+       ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
+       if (ret) {
+               bch2_stripe_close(c, h->s);
+               return ret;
        }
 
        BUG_ON(h->s->existing_stripe.size != h->blocksize);
@@ -1675,9 +1678,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
                        bch_err(c, "failed to allocate new stripe");
                        goto err;
                }
-
-               if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize))
-                       BUG();
        }
 
        if (h->s->allocated)
@@ -1690,7 +1690,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
        ret =   new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?:
                __bch2_ec_stripe_head_reserve(trans, h);
        if (!ret)
-               goto allocated;
+               goto allocate_buf;
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
            bch2_err_matches(ret, ENOMEM))
                goto err;
@@ -1703,8 +1703,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
                ret = __bch2_ec_stripe_head_reuse(trans, h);
                if (!ret)
                        break;
-               if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl)
-                       ret = -BCH_ERR_stripe_alloc_blocked;
                if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
                        goto err;
 
@@ -1723,10 +1721,16 @@ alloc_existing:
        ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
        if (ret)
                goto err;
-allocated:
+
+allocate_buf:
+       ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize);
+       if (ret)
+               goto err;
+
        h->s->allocated = true;
+allocated:
        BUG_ON(!h->s->idx);
-
+       BUG_ON(!h->s->new_stripe.data[0]);
        BUG_ON(trans->restarted);
        return h;
 err:
@@ -1839,8 +1843,8 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
                       h->target, h->algo, h->redundancy);
 
                if (h->s)
-                       prt_printf(out, "\tpending: blocks %u+%u allocated %u\n",
-                              h->s->nr_data, h->s->nr_parity,
+                       prt_printf(out, "\tpending: idx %llu blocks %u+%u allocated %u\n",
+                              h->s->idx, h->s->nr_data, h->s->nr_parity,
                               bitmap_weight(h->s->blocks_allocated,
                                             h->s->nr_data));
        }
@@ -1848,9 +1852,9 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
 
        mutex_lock(&c->ec_stripe_new_lock);
        list_for_each_entry(s, &c->ec_stripe_new_list, list) {
-               prt_printf(out, "\tin flight: blocks %u+%u pin %u\n",
-                      s->nr_data, s->nr_parity,
-                      atomic_read(&s->pin));
+               prt_printf(out, "\tin flight: idx %llu blocks %u+%u pin %u\n",
+                          s->idx, s->nr_data, s->nr_parity,
+                          atomic_read(&s->pin));
        }
        mutex_unlock(&c->ec_stripe_new_lock);
 }
index 56d1b5e7d79733ada4b36192de92146fd8530da3..d112aea9ec5632f0339532fd39c3e2407fceedc2 100644 (file)
@@ -198,7 +198,6 @@ int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
 
 void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
 
-void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *);
 void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
 
 int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
@@ -213,6 +212,21 @@ void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
 void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
 
 void bch2_do_stripe_deletes(struct bch_fs *);
+void bch2_ec_do_stripe_creates(struct bch_fs *);
+
+static inline void ec_stripe_new_get(struct ec_stripe_new *s)
+{
+       atomic_inc(&s->pin);
+}
+
+static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
+{
+       BUG_ON(atomic_read(&s->pin) <= 0);
+       BUG_ON(!s->err && !s->idx);
+
+       if (atomic_dec_and_test(&s->pin))
+               bch2_ec_do_stripe_creates(c);
+}
 
 void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
 
index 283303db7dfda74919cc5c0d532972c30c6c47f0..162e315601f9de495700f2076ed1f59c4dcaf9f2 100644 (file)
@@ -3,11 +3,11 @@
 #define _BCACHEFS_ERRCODE_H
 
 #define BCH_ERRCODES()                                                         \
+       x(ENOMEM,                       ENOMEM_stripe_buf)                      \
        x(ENOSPC,                       ENOSPC_disk_reservation)                \
        x(ENOSPC,                       ENOSPC_bucket_alloc)                    \
        x(ENOSPC,                       ENOSPC_disk_label_add)                  \
        x(ENOSPC,                       ENOSPC_stripe_create)                   \
-       x(ENOSPC,                       ENOSPC_stripe_reuse)                    \
        x(ENOSPC,                       ENOSPC_inode_create)                    \
        x(ENOSPC,                       ENOSPC_str_hash_create)                 \
        x(ENOSPC,                       ENOSPC_snapshot_create)                 \
index e088bbcccc8d90f67693463ae3316cac91f7b798..b511735b377204f42cfd5bfa2ae50231fc593700 100644 (file)
@@ -1217,7 +1217,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
 
        bch2_page_state_create(page, __GFP_NOFAIL);
 
-       bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
+       rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
        rbio->bio.bi_iter.bi_sector =
                (sector_t) page->index << PAGE_SECTORS_SHIFT;
        BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
@@ -2017,7 +2017,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
                                       &c->bio_read);
                bio->bi_end_io          = bch2_direct_IO_read_split_endio;
 start:
-               bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
+               bio->bi_opf             = REQ_OP_READ|REQ_SYNC;
                bio->bi_iter.bi_sector  = offset >> 9;
                bio->bi_private         = dio;
 
index de30daca4277b5883a6ef40c31c8682657bbfc7d..ea0fd6310b6e09353fd0eef999abffaa90889cf5 100644 (file)
@@ -835,6 +835,10 @@ static void bch2_write_index(struct closure *cl)
        struct write_point *wp = op->wp;
        struct workqueue_struct *wq = index_update_wq(op);
 
+       if ((op->flags & BCH_WRITE_DONE) &&
+           (op->flags & BCH_WRITE_MOVE))
+               bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
+
        barrier();
 
        /*
index 11ea109fefec5cf23b55b3655b5a3cea2417828b..5e952d6c0944a193a5fa70ce5df6240c206d01a2 100644 (file)
@@ -41,18 +41,19 @@ static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats)
 }
 
 struct moving_io {
-       struct list_head        list;
-       struct closure          cl;
-       bool                    read_completed;
+       struct list_head                list;
+       struct move_bucket_in_flight    *b;
+       struct closure                  cl;
+       bool                            read_completed;
 
-       unsigned                read_sectors;
-       unsigned                write_sectors;
+       unsigned                        read_sectors;
+       unsigned                        write_sectors;
 
-       struct bch_read_bio     rbio;
+       struct bch_read_bio             rbio;
 
-       struct data_update      write;
+       struct data_update              write;
        /* Must be last since it is variable size */
-       struct bio_vec          bi_inline_vecs[0];
+       struct bio_vec                  bi_inline_vecs[0];
 };
 
 static void move_free(struct moving_io *io)
@@ -60,6 +61,9 @@ static void move_free(struct moving_io *io)
        struct moving_context *ctxt = io->write.ctxt;
        struct bch_fs *c = ctxt->c;
 
+       if (io->b)
+               atomic_dec(&io->b->count);
+
        bch2_data_update_exit(&io->write);
        wake_up(&ctxt->wait);
        bch2_write_ref_put(c, BCH_WRITE_REF_move);
@@ -235,6 +239,7 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
 static int bch2_move_extent(struct btree_trans *trans,
                            struct btree_iter *iter,
                            struct moving_context *ctxt,
+                           struct move_bucket_in_flight *bucket_in_flight,
                            struct bch_io_opts io_opts,
                            enum btree_id btree_id,
                            struct bkey_s_c k,
@@ -295,7 +300,7 @@ static int bch2_move_extent(struct btree_trans *trans,
        bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
        io->rbio.bio.bi_iter.bi_size = sectors << 9;
 
-       bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
+       io->rbio.bio.bi_opf             = REQ_OP_READ;
        io->rbio.bio.bi_iter.bi_sector  = bkey_start_offset(k.k);
        io->rbio.bio.bi_end_io          = move_read_endio;
 
@@ -320,6 +325,11 @@ static int bch2_move_extent(struct btree_trans *trans,
                atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
        }
 
+       if (bucket_in_flight) {
+               io->b = bucket_in_flight;
+               atomic_inc(&io->b->count);
+       }
+
        this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
        this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
        trace_move_extent_read(k.k);
@@ -522,8 +532,8 @@ static int __bch2_move_data(struct moving_context *ctxt,
                k = bkey_i_to_s_c(sk.k);
                bch2_trans_unlock(&trans);
 
-               ret2 = bch2_move_extent(&trans, &iter, ctxt, io_opts,
-                                       btree_id, k, data_opts);
+               ret2 = bch2_move_extent(&trans, &iter, ctxt, NULL,
+                                       io_opts, btree_id, k, data_opts);
                if (ret2) {
                        if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
                                continue;
@@ -665,6 +675,7 @@ failed_to_evacuate:
 
 int __bch2_evacuate_bucket(struct btree_trans *trans,
                           struct moving_context *ctxt,
+                          struct move_bucket_in_flight *bucket_in_flight,
                           struct bpos bucket, int gen,
                           struct data_update_opts _data_opts)
 {
@@ -753,8 +764,9 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
                                i++;
                        }
 
-                       ret = bch2_move_extent(trans, &iter, ctxt, io_opts,
-                                              bp.btree_id, k, data_opts);
+                       ret = bch2_move_extent(trans, &iter, ctxt,
+                                       bucket_in_flight,
+                                       io_opts, bp.btree_id, k, data_opts);
                        bch2_trans_iter_exit(trans, &iter);
 
                        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -834,7 +846,7 @@ int bch2_evacuate_bucket(struct bch_fs *c,
 
        bch2_trans_init(&trans, c, 0, 0);
        bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
-       ret = __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts);
+       ret = __bch2_evacuate_bucket(&trans, &ctxt, NULL, bucket, gen, data_opts);
        bch2_moving_ctxt_exit(&ctxt);
        bch2_trans_exit(&trans);
 
index 3b283af3bdb642339df220eb229cbeeed5de4f5e..4c0013872347ff4d767998e223110e1d88d3c85a 100644 (file)
@@ -70,6 +70,7 @@ int bch2_move_data(struct bch_fs *,
 
 int __bch2_evacuate_bucket(struct btree_trans *,
                           struct moving_context *,
+                          struct move_bucket_in_flight *,
                           struct bpos, int,
                           struct data_update_opts);
 int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
index 9df6d18137a5e02655d6c34f10730b896f9d48d5..285ffdb762aca17ab9769ca2ab36b7e404c2dd3e 100644 (file)
@@ -16,4 +16,10 @@ struct bch_move_stats {
        atomic64_t              sectors_raced;
 };
 
+struct move_bucket_in_flight {
+       struct bpos             bucket;
+       u8                      gen;
+       atomic_t                count;
+};
+
 #endif /* _BCACHEFS_MOVE_TYPES_H */
index 80f922767da94d20998be44f6b54b1e6a4c9e9b7..79aaa45f5348663b0bd7177ef7676fd0667c4bc9 100644 (file)
@@ -56,8 +56,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
 
        a = bch2_alloc_to_v4(k, &_a);
        *gen = a->gen;
-       ret = (a->data_type == BCH_DATA_btree ||
-              a->data_type == BCH_DATA_user) &&
+       ret = data_type_movable(a->data_type) &&
                a->fragmentation_lru &&
                a->fragmentation_lru <= time;
 
@@ -72,47 +71,44 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
        return ret;
 }
 
-struct copygc_bucket_in_flight {
-       struct bpos             bucket;
-       u8                      gen;
-       struct moving_context   ctxt;
-};
-
-typedef FIFO(struct copygc_bucket_in_flight) copygc_buckets_in_flight;
+typedef FIFO(struct move_bucket_in_flight) move_buckets_in_flight;
 
-struct copygc_bucket {
+struct move_bucket {
        struct bpos             bucket;
        u8                      gen;
 };
 
-typedef DARRAY(struct copygc_bucket) copygc_buckets;
+typedef DARRAY(struct move_bucket) move_buckets;
 
-static int copygc_bucket_cmp(const void *_l, const void *_r)
+static int move_bucket_cmp(const void *_l, const void *_r)
 {
-       const struct copygc_bucket *l = _l;
-       const struct copygc_bucket *r = _r;
+       const struct move_bucket *l = _l;
+       const struct move_bucket *r = _r;
 
        return bpos_cmp(l->bucket, r->bucket) ?: cmp_int(l->gen, r->gen);
 }
 
-static bool bucket_in_flight(copygc_buckets *buckets_sorted, struct copygc_bucket b)
+static bool bucket_in_flight(move_buckets *buckets_sorted, struct move_bucket b)
 {
        return bsearch(&b,
                       buckets_sorted->data,
                       buckets_sorted->nr,
                       sizeof(buckets_sorted->data[0]),
-                      copygc_bucket_cmp) != NULL;
+                      move_bucket_cmp) != NULL;
 }
 
-static void copygc_buckets_wait(struct btree_trans *trans,
-                               copygc_buckets_in_flight *buckets_in_flight,
-                               size_t nr, bool verify_evacuated)
+static void move_buckets_wait(struct btree_trans *trans,
+                             struct moving_context *ctxt,
+                             move_buckets_in_flight *buckets_in_flight,
+                             size_t nr, bool verify_evacuated)
 {
        while (!fifo_empty(buckets_in_flight)) {
-               struct copygc_bucket_in_flight *i = &fifo_peek_front(buckets_in_flight);
+               struct move_bucket_in_flight *i = &fifo_peek_front(buckets_in_flight);
 
-               if (fifo_used(buckets_in_flight) <= nr &&
-                   closure_nr_remaining(&i->ctxt.cl) != 1)
+               if (fifo_used(buckets_in_flight) > nr)
+                       move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count));
+
+               if (atomic_read(&i->count))
                        break;
 
                /*
@@ -120,31 +116,34 @@ static void copygc_buckets_wait(struct btree_trans *trans,
                 * reads, which inits another btree_trans; this one must be
                 * unlocked:
                 */
-               bch2_trans_unlock(trans);
-               bch2_moving_ctxt_exit(&i->ctxt);
                if (verify_evacuated)
                        bch2_verify_bucket_evacuated(trans, i->bucket, i->gen);
                buckets_in_flight->front++;
        }
+
+       bch2_trans_unlock(trans);
 }
 
 static int bch2_copygc_get_buckets(struct btree_trans *trans,
-                       copygc_buckets_in_flight *buckets_in_flight,
-                       copygc_buckets *buckets)
+                       struct moving_context *ctxt,
+                       move_buckets_in_flight *buckets_in_flight,
+                       move_buckets *buckets)
 {
        struct btree_iter iter;
-       copygc_buckets buckets_sorted = { 0 };
-       struct copygc_bucket_in_flight *i;
+       move_buckets buckets_sorted = { 0 };
+       struct move_bucket_in_flight *i;
        struct bkey_s_c k;
-       size_t fifo_iter;
+       size_t fifo_iter, nr_to_get;
        int ret;
 
-       copygc_buckets_wait(trans, buckets_in_flight, buckets_in_flight->size / 2, true);
+       move_buckets_wait(trans, ctxt, buckets_in_flight, buckets_in_flight->size / 2, true);
+
+       nr_to_get = max(16UL, fifo_used(buckets_in_flight) / 4);
 
        fifo_for_each_entry_ptr(i, buckets_in_flight, fifo_iter) {
-               ret = darray_push(&buckets_sorted, ((struct copygc_bucket) {i->bucket, i->gen}));
+               ret = darray_push(&buckets_sorted, ((struct move_bucket) {i->bucket, i->gen}));
                if (ret) {
-                       bch_err(trans->c, "error allocating copygc_buckets_sorted");
+                       bch_err(trans->c, "error allocating move_buckets_sorted");
                        goto err;
                }
        }
@@ -152,19 +151,19 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
        sort(buckets_sorted.data,
             buckets_sorted.nr,
             sizeof(buckets_sorted.data[0]),
-            copygc_bucket_cmp,
+            move_bucket_cmp,
             NULL);
 
        ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
                                  lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
                                  lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
                                  0, k, ({
-               struct copygc_bucket b = { .bucket = u64_to_bucket(k.k->p.offset) };
+               struct move_bucket b = { .bucket = u64_to_bucket(k.k->p.offset) };
                int ret = 0;
 
                if (!bucket_in_flight(&buckets_sorted, b) &&
                    bch2_bucket_is_movable(trans, b.bucket, lru_pos_time(k.k->p), &b.gen))
-                       ret = darray_push(buckets, b) ?: buckets->nr >= fifo_free(buckets_in_flight);
+                       ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
 
                ret;
        }));
@@ -175,16 +174,17 @@ err:
 }
 
 static int bch2_copygc(struct btree_trans *trans,
-                      copygc_buckets_in_flight *buckets_in_flight,
-                      struct bch_move_stats *stats)
+                      struct moving_context *ctxt,
+                      move_buckets_in_flight *buckets_in_flight)
 {
        struct bch_fs *c = trans->c;
+       struct bch_move_stats move_stats;
        struct data_update_opts data_opts = {
                .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
        };
-       copygc_buckets buckets = { 0 };
-       struct copygc_bucket_in_flight *f;
-       struct copygc_bucket *i;
+       move_buckets buckets = { 0 };
+       struct move_bucket_in_flight *f;
+       struct move_bucket *i;
        int ret = 0;
 
        ret = bch2_btree_write_buffer_flush(trans);
@@ -192,7 +192,10 @@ static int bch2_copygc(struct btree_trans *trans,
                                 __func__, bch2_err_str(ret)))
                return ret;
 
-       ret = bch2_copygc_get_buckets(trans, buckets_in_flight, &buckets);
+       bch2_move_stats_init(&move_stats, "copygc");
+       ctxt->stats = &move_stats;
+
+       ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets);
        if (ret)
                goto err;
 
@@ -203,11 +206,9 @@ static int bch2_copygc(struct btree_trans *trans,
                f = fifo_push_ref(buckets_in_flight);
                f->bucket       = i->bucket;
                f->gen          = i->gen;
-               bch2_moving_ctxt_init(&f->ctxt, c, NULL, NULL, //stats,
-                                     writepoint_ptr(&c->copygc_write_point),
-                                     false);
+               atomic_set(&f->count, 0);
 
-               ret = __bch2_evacuate_bucket(trans, &f->ctxt, f->bucket, f->gen, data_opts);
+               ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket, f->gen, data_opts);
                if (ret)
                        goto err;
        }
@@ -221,7 +222,8 @@ err:
        if (ret < 0 && !bch2_err_matches(ret, EROFS))
                bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
 
-       trace_and_count(c, copygc, c, atomic64_read(&stats->sectors_moved), 0, 0, 0);
+       trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
+       ctxt->stats = NULL;
        return ret;
 }
 
@@ -244,13 +246,18 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
        struct bch_dev *ca;
        unsigned dev_idx;
        s64 wait = S64_MAX, fragmented_allowed, fragmented;
+       unsigned i;
 
        for_each_rw_member(ca, c, dev_idx) {
                struct bch_dev_usage usage = bch2_dev_usage_read(ca);
 
                fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_stripe) *
                                       ca->mi.bucket_size) >> 1);
-               fragmented = usage.d[BCH_DATA_user].fragmented;
+               fragmented = 0;
+
+               for (i = 0; i < BCH_DATA_NR; i++)
+                       if (data_type_movable(i))
+                               fragmented += usage.d[i].fragmented;
 
                wait = min(wait, max(0LL, fragmented_allowed - fragmented));
        }
@@ -274,32 +281,34 @@ static int bch2_copygc_thread(void *arg)
 {
        struct bch_fs *c = arg;
        struct btree_trans trans;
-       struct bch_move_stats move_stats;
+       struct moving_context ctxt;
        struct io_clock *clock = &c->io_clock[WRITE];
-       copygc_buckets_in_flight copygc_buckets;
+       move_buckets_in_flight move_buckets;
        u64 last, wait;
        int ret = 0;
 
-       if (!init_fifo(&copygc_buckets, 1 << 14, GFP_KERNEL)) {
+       if (!init_fifo(&move_buckets, 1 << 14, GFP_KERNEL)) {
                bch_err(c, "error allocating copygc buckets in flight");
                return -ENOMEM;
        }
 
        set_freezable();
-       bch2_move_stats_init(&move_stats, "copygc");
        bch2_trans_init(&trans, c, 0, 0);
+       bch2_moving_ctxt_init(&ctxt, c, NULL, NULL,
+                             writepoint_ptr(&c->copygc_write_point),
+                             false);
 
        while (!ret && !kthread_should_stop()) {
                bch2_trans_unlock(&trans);
-
-               try_to_freeze();
                cond_resched();
 
-               kthread_wait(freezing(current) || c->copy_gc_enabled);
+               if (!c->copy_gc_enabled) {
+                       move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
+                       kthread_wait_freezable(c->copy_gc_enabled);
+               }
 
                if (unlikely(freezing(current))) {
-                       copygc_buckets_wait(&trans, &copygc_buckets, 0, true);
-                       bch2_trans_unlock(&trans);
+                       move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
                        __refrigerator(false);
                        continue;
                }
@@ -308,6 +317,7 @@ static int bch2_copygc_thread(void *arg)
                wait = bch2_copygc_wait_amount(c);
 
                if (wait > clock->max_slop) {
+                       move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
                        trace_and_count(c, copygc_wait, c, wait, last + wait);
                        c->copygc_wait = last + wait;
                        bch2_kthread_io_clock_wait(clock, last + wait,
@@ -318,15 +328,15 @@ static int bch2_copygc_thread(void *arg)
                c->copygc_wait = 0;
 
                c->copygc_running = true;
-               ret = bch2_copygc(&trans, &copygc_buckets, &move_stats);
+               ret = bch2_copygc(&trans, &ctxt, &move_buckets);
                c->copygc_running = false;
 
                wake_up(&c->copygc_running_wq);
        }
 
-       copygc_buckets_wait(&trans, &copygc_buckets, 0, !ret);
-       free_fifo(&copygc_buckets);
+       bch2_moving_ctxt_exit(&ctxt);
        bch2_trans_exit(&trans);
+       free_fifo(&move_buckets);
 
        return 0;
 }
index 76c2691aa8560ea0757a5b68df2747cb4b384d3b..afbf82d629779c4bd1013a12072e7c9761552da8 100644 (file)
@@ -329,22 +329,22 @@ enum opt_type {
        x(norecovery,                   u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Don't replay the journal")                     \
        x(keep_journal,                 u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Don't free journal entries/keys after startup")\
        x(read_entire_journal,          u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Read all journal entries, not just dirty ones")\
        x(read_journal_only,            u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Only read the journal, skip the rest of recovery")\
        x(journal_transaction_names,    u8,                             \
          OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                      \
@@ -354,7 +354,7 @@ enum opt_type {
        x(noexcl,                       u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Don't open device in exclusive mode")          \
        x(direct_io,                    u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
@@ -364,38 +364,38 @@ enum opt_type {
        x(sb,                           u64,                            \
          OPT_MOUNT,                                                    \
          OPT_UINT(0, S64_MAX),                                         \
-         BCH2_NO_SB_OPT,                       BCH_SB_SECTOR,                  \
+         BCH2_NO_SB_OPT,               BCH_SB_SECTOR,                  \
          "offset",     "Sector offset of superblock")                  \
        x(read_only,                    u8,                             \
          OPT_FS,                                                       \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         NULL)                                           \
        x(nostart,                      u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Don\'t start filesystem, only open devices")   \
        x(reconstruct_alloc,            u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Reconstruct alloc btree")                      \
        x(version_upgrade,              u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Set superblock to latest version,\n"           \
                        "allowing any new features to be used")         \
        x(buckets_nouse,                u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Allocate the buckets_nouse bitmap")            \
        x(project,                      u8,                             \
          OPT_INODE,                                                    \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                          \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         NULL)                                           \
        x(nocow,                        u8,                             \
          OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,            \
@@ -411,9 +411,9 @@ enum opt_type {
          NULL,         "Enable nocow mode: enables runtime locking in\n"\
                        "data move path needed if nocow will ever be in use\n")\
        x(no_data_io,                   u8,                             \
-         OPT_FS|OPT_MOUNT,                                             \
+         OPT_MOUNT,                                                    \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,                       false,                  \
+         BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Skip submit_bio() for data reads and writes, " \
                        "for performance testing purposes")             \
        x(fs_size,                      u64,                            \
index 87446f7bad4f81d2b0a71c60e32e20bf35dfb4a6..d2e6adc13fb17c519162cd55ddcd3e7c64e046c8 100644 (file)
@@ -233,7 +233,13 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
        orig->k.type = KEY_TYPE_reflink_p;
        r_p = bkey_i_to_reflink_p(orig);
        set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
+
+       /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+       __underlying_memset(&r_p->v, 0, sizeof(r_p->v));
+#else
        memset(&r_p->v, 0, sizeof(r_p->v));
+#endif
 
        r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
 
index bf5ffb47ea7d3f93684700ae7dbedd3c2f758bfe..56c21c618483e9ffa73bbe75e1345619cf65cfb0 100644 (file)
@@ -761,10 +761,10 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size)
        }
 }
 
-int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
+int _bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
 {
        while (size) {
-               struct page *page = alloc_page(gfp_mask);
+               struct page *page = _alloc_pages(gfp_mask, 0);
                unsigned len = min_t(size_t, PAGE_SIZE, size);
 
                if (!page)
index d994c1577c747bd688f89a060d8ad5b54f59c9b8..ecfe54012e3d0306b04e0c7c7e5ffa790b0a4742 100644 (file)
@@ -60,12 +60,14 @@ static inline void vpfree(void *p, size_t size)
                free_pages((unsigned long) p, get_order(size));
 }
 
-static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
+static inline void *_vpmalloc(size_t size, gfp_t gfp_mask)
 {
-       return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
+       return (void *) _get_free_pages(gfp_mask|__GFP_NOWARN,
                                         get_order(size)) ?:
                __vmalloc(size, gfp_mask);
 }
+#define vpmalloc(_size, _gfp)                  \
+       alloc_hooks(_vpmalloc(_size, _gfp), void *, NULL)
 
 static inline void kvpfree(void *p, size_t size)
 {
@@ -75,12 +77,14 @@ static inline void kvpfree(void *p, size_t size)
                vpfree(p, size);
 }
 
-static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
+static inline void *_kvpmalloc(size_t size, gfp_t gfp_mask)
 {
        return size < PAGE_SIZE
-               ? kmalloc(size, gfp_mask)
-               : vpmalloc(size, gfp_mask);
+               ? _kmalloc(size, gfp_mask)
+               : _vpmalloc(size, gfp_mask);
 }
+#define kvpmalloc(_size, _gfp)                 \
+       alloc_hooks(_kvpmalloc(_size, _gfp), void *, NULL)
 
 int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
 
@@ -530,7 +534,9 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
 }
 
 void bch2_bio_map(struct bio *bio, void *base, size_t);
-int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
+int _bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
+#define bch2_bio_alloc_pages(_bio, _size, _gfp)                                \
+       alloc_hooks(_bch2_bio_alloc_pages(_bio, _size, _gfp), int, -ENOMEM)
 
 static inline sector_t bdev_sectors(struct block_device *bdev)
 {
@@ -566,11 +572,9 @@ do {                                                                       \
 #define kthread_wait_freezable(cond)                                   \
 ({                                                                     \
        int _ret = 0;                                                   \
-       bool frozen;                                                    \
-                                                                       \
        while (1) {                                                     \
                set_current_state(TASK_INTERRUPTIBLE);                  \
-               if (kthread_freezable_should_stop(&frozen)) {           \
+               if (kthread_should_stop()) {                            \
                        _ret = -1;                                      \
                        break;                                          \
                }                                                       \
@@ -579,6 +583,7 @@ do {                                                                        \
                        break;                                          \
                                                                        \
                schedule();                                             \
+               try_to_freeze();                                        \
        }                                                               \
        set_current_state(TASK_RUNNING);                                \
        _ret;                                                           \