]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to b91a514413 bcachefs: Don't try to delete stripes when RO
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 10 Jul 2019 20:12:15 +0000 (16:12 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Thu, 11 Jul 2019 16:00:51 +0000 (12:00 -0400)
130 files changed:
.bcachefs_revision
cmd_migrate.c
include/linux/bio.h
include/linux/sched/mm.h [new file with mode: 0644]
include/trace/events/bcachefs.h
libbcachefs/acl.c
libbcachefs/acl.h
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/alloc_foreground.h
libbcachefs/alloc_types.h
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bcachefs_ioctl.h
libbcachefs/bkey.c
libbcachefs/bkey.h
libbcachefs/bkey_methods.c
libbcachefs/bkey_methods.h
libbcachefs/bkey_sort.c
libbcachefs/bkey_sort.h
libbcachefs/bset.c
libbcachefs/bset.h
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_gc.c
libbcachefs/btree_gc.h
libbcachefs/btree_io.c
libbcachefs/btree_io.h
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_interior.h
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/buckets_types.h
libbcachefs/chardev.c
libbcachefs/chardev.h
libbcachefs/checksum.c
libbcachefs/checksum.h
libbcachefs/clock.c
libbcachefs/clock.h
libbcachefs/clock_types.h
libbcachefs/compress.c
libbcachefs/compress.h
libbcachefs/debug.c
libbcachefs/debug.h
libbcachefs/dirent.c
libbcachefs/dirent.h
libbcachefs/disk_groups.c
libbcachefs/disk_groups.h
libbcachefs/ec.c
libbcachefs/ec.h
libbcachefs/ec_types.h
libbcachefs/error.c
libbcachefs/error.h
libbcachefs/extents.c
libbcachefs/extents.h
libbcachefs/extents_types.h
libbcachefs/eytzinger.h
libbcachefs/fifo.h
libbcachefs/fs-io.c
libbcachefs/fs-io.h
libbcachefs/fs-ioctl.c
libbcachefs/fs-ioctl.h
libbcachefs/fs.c
libbcachefs/fs.h
libbcachefs/fsck.c
libbcachefs/fsck.h
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/io.c
libbcachefs/io.h
libbcachefs/io_types.h
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_io.c
libbcachefs/journal_io.h
libbcachefs/journal_reclaim.c
libbcachefs/journal_reclaim.h
libbcachefs/journal_seq_blacklist.c
libbcachefs/journal_seq_blacklist.h
libbcachefs/journal_types.h
libbcachefs/keylist.c
libbcachefs/keylist.h
libbcachefs/keylist_types.h
libbcachefs/lz4.h [deleted file]
libbcachefs/lz4_decompress.c [deleted file]
libbcachefs/migrate.c
libbcachefs/migrate.h
libbcachefs/move.c
libbcachefs/move.h
libbcachefs/move_types.h
libbcachefs/movinggc.c
libbcachefs/movinggc.h
libbcachefs/opts.c
libbcachefs/opts.h
libbcachefs/quota.c
libbcachefs/quota.h
libbcachefs/quota_types.h
libbcachefs/rebalance.c
libbcachefs/rebalance.h
libbcachefs/rebalance_types.h
libbcachefs/recovery.c
libbcachefs/recovery.h
libbcachefs/replicas.c
libbcachefs/replicas.h
libbcachefs/siphash.c
libbcachefs/siphash.h
libbcachefs/str_hash.h
libbcachefs/super-io.c
libbcachefs/super-io.h
libbcachefs/super.c
libbcachefs/super.h
libbcachefs/super_types.h
libbcachefs/sysfs.c
libbcachefs/sysfs.h
libbcachefs/tests.c
libbcachefs/tests.h
libbcachefs/trace.c
libbcachefs/util.c
libbcachefs/util.h
libbcachefs/vstructs.h
libbcachefs/xattr.c
libbcachefs/xattr.h
linux/bio.c

index 6620636bf629d80a01741dbcf7e0535ceceb151c..5aeecbcdcacfcc0c746fe0af6362d8984826aa81 100644 (file)
@@ -1 +1 @@
-7e42539c80470cb655bbc46cd0f144de6c644523
+b91a514413ecdd15e0f9d8290761d24663a93425
index f630c142337389b4fa65022fe6aacd1508830b68..f281d19833ad626e5bbf4c9c3e719816d6813a43 100644 (file)
@@ -265,8 +265,7 @@ static void write_data(struct bch_fs *c,
        closure_init_stack(&cl);
 
        bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
-       o.op.wbio.bio.bi_iter.bi_size = len;
-       bch2_bio_map(&o.op.wbio.bio, buf);
+       bch2_bio_map(&o.op.wbio.bio, buf, len);
 
        bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
        o.op.write_point        = writepoint_hashed(0);
index 7736198febc5a49e8b06ac041c43050d5c5277fb..e93341e60cb60fcd75b1d69622a6f36e478818e9 100644 (file)
@@ -113,8 +113,13 @@ static inline void *bio_data(struct bio *bio)
 
 #define __bio_kunmap_atomic(addr)      kunmap_atomic(addr)
 
-#define bio_for_each_segment_all(bvl, bio, i)                          \
-       for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
+struct bvec_iter_all {
+       unsigned        done;
+};
+
+#define bio_for_each_segment_all(bvl, bio, i, iter)                    \
+       for (i = 0, bvl = (bio)->bi_io_vec, iter = (struct bvec_iter_all) { 0 };                \
+            i < (bio)->bi_vcnt; i++, bvl++)
 
 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
                                    unsigned bytes)
@@ -136,6 +141,9 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 #define bio_for_each_segment(bvl, bio, iter)                           \
        __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
 
+#define __bio_for_each_bvec(bvl, bio, iter, start)                     \
+       __bio_for_each_segment(bvl, bio, iter, start)
+
 #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
 
 static inline unsigned bio_segments(struct bio *bio)
@@ -228,6 +236,8 @@ enum {
 extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
 extern void bio_put(struct bio *);
 
+int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
+
 extern void __bio_clone_fast(struct bio *, struct bio *);
 extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
new file mode 100644 (file)
index 0000000..347105c
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef _LINUX_SCHED_MM_H
+#define _LINUX_SCHED_MM_H
+
+#define PF_MEMALLOC_NOFS 0
+
+static inline unsigned int memalloc_nofs_save(void)
+{
+       unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
+       current->flags |= PF_MEMALLOC_NOFS;
+       return flags;
+}
+
+static inline void memalloc_nofs_restore(unsigned int flags)
+{
+       current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
+}
+
+#endif /* _LINUX_SCHED_MM_H */
index 76673d9ab5bc00d1a8353a0679b4d0b5d8a7b825..d7e898b02491704900398e0b519db902235c97d8 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM bcachefs
 
index cdcccaad916d1b228f674c2123e4af1008a09181..59d4af1326ee01495a321c697ccc7d6946c49bd7 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifdef CONFIG_BCACHEFS_POSIX_ACL
 
 #include "bcachefs.h"
index e06724309ff8fa9a1b51e47bad5d30c33e10284b..cb62d502a7ff3b3fa6034ab7e474718d7cd42e2d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_ACL_H
 #define _BCACHEFS_ACL_H
 
index 744addb0019d3c12539cd173cfe960b455d07730..43dc2f270dc600b7bb4b3e5444729dad744629a0 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "alloc_background.h"
 #include "alloc_foreground.h"
@@ -355,11 +356,11 @@ restart:
 
                        old_u = bch2_alloc_unpack(k);
 
-                       percpu_down_read_preempt_disable(&c->mark_lock);
+                       percpu_down_read(&c->mark_lock);
                        g       = bucket(ca, b);
                        m       = READ_ONCE(g->mark);
                        new_u   = alloc_mem_to_key(g, m);
-                       percpu_up_read_preempt_enable(&c->mark_lock);
+                       percpu_up_read(&c->mark_lock);
 
                        if (!m.dirty)
                                continue;
@@ -889,7 +890,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
        b = ca->alloc_heap.data[0].bucket;
 
        /* first, put on free_inc and mark as owned by allocator: */
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        spin_lock(&c->freelist_lock);
 
        verify_not_on_freelist(c, ca, b);
@@ -899,7 +900,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
        bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
 
        spin_unlock(&c->freelist_lock);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
 
@@ -915,11 +916,11 @@ retry:
         * we have to trust the in memory bucket @m, not the version in the
         * btree:
         */
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        g = bucket(ca, b);
        m = READ_ONCE(g->mark);
        u = alloc_mem_to_key(g, m);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        invalidating_cached_data = m.cached_sectors != 0;
 
@@ -980,7 +981,7 @@ retry:
                size_t b2;
 
                /* remove from free_inc: */
-               percpu_down_read_preempt_disable(&c->mark_lock);
+               percpu_down_read(&c->mark_lock);
                spin_lock(&c->freelist_lock);
 
                bch2_mark_alloc_bucket(c, ca, b, false,
@@ -990,7 +991,7 @@ retry:
                BUG_ON(b != b2);
 
                spin_unlock(&c->freelist_lock);
-               percpu_up_read_preempt_enable(&c->mark_lock);
+               percpu_up_read(&c->mark_lock);
        }
 
        return ret;
@@ -1001,7 +1002,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
 {
        struct bucket_mark m;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        spin_lock(&c->freelist_lock);
 
        bch2_invalidate_bucket(c, ca, bucket, &m);
@@ -1014,7 +1015,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
        bucket_io_clock_reset(c, ca, bucket, READ);
        bucket_io_clock_reset(c, ca, bucket, WRITE);
 
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        *flush_seq = max(*flush_seq, bucket_journal_seq(c, m));
 
@@ -1563,10 +1564,10 @@ static bool bch2_fs_allocator_start_fast(struct bch_fs *c)
                             test_bit(bu, ca->buckets_nouse)))
                                continue;
 
-                       percpu_down_read_preempt_disable(&c->mark_lock);
+                       percpu_down_read(&c->mark_lock);
                        bch2_mark_alloc_bucket(c, ca, bu, true,
                                        gc_pos_alloc(c, NULL), 0);
-                       percpu_up_read_preempt_enable(&c->mark_lock);
+                       percpu_up_read(&c->mark_lock);
 
                        fifo_push(&ca->free_inc, bu);
 
index b5462646f57eb939ea6dc0a609aaf3f3ae75ddfd..0c1a0f0dd2ab558835474ec18a65b9d21a596a00 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_ALLOC_BACKGROUND_H
 #define _BCACHEFS_ALLOC_BACKGROUND_H
 
index 7fb1e5a4ca69d4dd4ee044eed3a57817c6cde40e..e64f8449462fcad1d19321e2e3cd0cbccce0c3de 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Primary bucket allocation code
  *
@@ -100,7 +101,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
                return;
        }
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        spin_lock(&ob->lock);
 
        bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
@@ -109,7 +110,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
        ob->type = 0;
 
        spin_unlock(&ob->lock);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        spin_lock(&c->freelist_lock);
        ob->freelist = c->open_buckets_freelist;
@@ -376,6 +377,25 @@ void bch2_dev_stripe_increment(struct bch_fs *c, struct bch_dev *ca,
 #define BUCKET_MAY_ALLOC_PARTIAL       (1 << 0)
 #define BUCKET_ALLOC_USE_DURABILITY    (1 << 1)
 
+static void add_new_bucket(struct bch_fs *c,
+                          struct open_buckets *ptrs,
+                          struct bch_devs_mask *devs_may_alloc,
+                          unsigned *nr_effective,
+                          bool *have_cache,
+                          unsigned flags,
+                          struct open_bucket *ob)
+{
+       unsigned durability =
+               bch_dev_bkey_exists(c, ob->ptr.dev)->mi.durability;
+
+       __clear_bit(ob->ptr.dev, devs_may_alloc->d);
+       *nr_effective   += (flags & BUCKET_ALLOC_USE_DURABILITY)
+               ? durability : 1;
+       *have_cache     |= !durability;
+
+       ob_push(c, ptrs, ob);
+}
+
 static int bch2_bucket_alloc_set(struct bch_fs *c,
                                 struct open_buckets *ptrs,
                                 struct dev_stripe_state *stripe,
@@ -391,7 +411,7 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
                bch2_dev_alloc_list(c, stripe, devs_may_alloc);
        struct bch_dev *ca;
        bool alloc_failure = false;
-       unsigned i, durability;
+       unsigned i;
 
        BUG_ON(*nr_effective >= nr_replicas);
 
@@ -421,14 +441,8 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
                        continue;
                }
 
-               durability = (flags & BUCKET_ALLOC_USE_DURABILITY)
-                       ? ca->mi.durability : 1;
-
-               __clear_bit(ca->dev_idx, devs_may_alloc->d);
-               *nr_effective   += durability;
-               *have_cache     |= !durability;
-
-               ob_push(c, ptrs, ob);
+               add_new_bucket(c, ptrs, devs_may_alloc,
+                              nr_effective, have_cache, flags, ob);
 
                bch2_dev_stripe_increment(c, ca, stripe);
 
@@ -464,7 +478,7 @@ static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
        open_bucket_for_each(c, &h->blocks, ob, i)
                __clear_bit(ob->ptr.dev, devs.d);
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        rcu_read_lock();
 
        if (h->parity.nr < h->redundancy) {
@@ -500,12 +514,12 @@ static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
        }
 
        rcu_read_unlock();
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        return bch2_ec_stripe_new_alloc(c, h);
 err:
        rcu_read_unlock();
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
        return -1;
 }
 
@@ -523,7 +537,8 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
                                     unsigned erasure_code,
                                     unsigned nr_replicas,
                                     unsigned *nr_effective,
-                                    bool *have_cache)
+                                    bool *have_cache,
+                                    unsigned flags)
 {
        struct dev_alloc_list devs_sorted;
        struct ec_stripe_head *h;
@@ -563,11 +578,8 @@ got_bucket:
        ob->ec_idx      = ec_idx;
        ob->ec          = h->s;
 
-       __clear_bit(ob->ptr.dev, devs_may_alloc->d);
-       *nr_effective   += ca->mi.durability;
-       *have_cache     |= !ca->mi.durability;
-
-       ob_push(c, ptrs, ob);
+       add_new_bucket(c, ptrs, devs_may_alloc,
+                      nr_effective, have_cache, flags, ob);
        atomic_inc(&h->s->pin);
 out_put_head:
        bch2_ec_stripe_head_put(h);
@@ -582,6 +594,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
                                        unsigned nr_replicas,
                                        unsigned *nr_effective,
                                        bool *have_cache,
+                                       unsigned flags,
                                        bool need_ec)
 {
        struct open_buckets ptrs_skip = { .nr = 0 };
@@ -596,11 +609,9 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
                    (ca->mi.durability ||
                     (wp->type == BCH_DATA_USER && !*have_cache)) &&
                    (ob->ec || !need_ec)) {
-                       __clear_bit(ob->ptr.dev, devs_may_alloc->d);
-                       *nr_effective   += ca->mi.durability;
-                       *have_cache     |= !ca->mi.durability;
-
-                       ob_push(c, ptrs, ob);
+                       add_new_bucket(c, ptrs, devs_may_alloc,
+                                      nr_effective, have_cache,
+                                      flags, ob);
                } else {
                        ob_push(c, &ptrs_skip, ob);
                }
@@ -618,17 +629,15 @@ static int open_bucket_add_buckets(struct bch_fs *c,
                                   unsigned *nr_effective,
                                   bool *have_cache,
                                   enum alloc_reserve reserve,
+                                  unsigned flags,
                                   struct closure *_cl)
 {
        struct bch_devs_mask devs;
        struct open_bucket *ob;
        struct closure *cl = NULL;
-       unsigned i, flags = BUCKET_ALLOC_USE_DURABILITY;
+       unsigned i;
        int ret;
 
-       if (wp->type == BCH_DATA_USER)
-               flags |= BUCKET_MAY_ALLOC_PARTIAL;
-
        rcu_read_lock();
        devs = target_rw_devs(c, wp->type, target);
        rcu_read_unlock();
@@ -643,25 +652,25 @@ static int open_bucket_add_buckets(struct bch_fs *c,
        if (erasure_code) {
                get_buckets_from_writepoint(c, ptrs, wp, &devs,
                                            nr_replicas, nr_effective,
-                                           have_cache, true);
+                                           have_cache, flags, true);
                if (*nr_effective >= nr_replicas)
                        return 0;
 
                bucket_alloc_from_stripe(c, ptrs, wp, &devs,
                                         target, erasure_code,
                                         nr_replicas, nr_effective,
-                                        have_cache);
+                                        have_cache, flags);
                if (*nr_effective >= nr_replicas)
                        return 0;
        }
 
        get_buckets_from_writepoint(c, ptrs, wp, &devs,
                                    nr_replicas, nr_effective,
-                                   have_cache, false);
+                                   have_cache, flags, false);
        if (*nr_effective >= nr_replicas)
                return 0;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        rcu_read_lock();
 
 retry_blocking:
@@ -678,7 +687,7 @@ retry_blocking:
        }
 
        rcu_read_unlock();
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        return ret;
 }
@@ -862,9 +871,13 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
        struct open_bucket *ob;
        struct open_buckets ptrs;
        unsigned nr_effective, write_points_nr;
+       unsigned ob_flags = 0;
        bool have_cache;
        int ret, i;
 
+       if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
+               ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
+
        BUG_ON(!nr_replicas || !nr_replicas_required);
 retry:
        ptrs.nr         = 0;
@@ -874,6 +887,9 @@ retry:
 
        wp = writepoint_find(c, write_point.v);
 
+       if (wp->type == BCH_DATA_USER)
+               ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
+
        /* metadata may not allocate on cache devices: */
        if (wp->type != BCH_DATA_USER)
                have_cache = true;
@@ -882,19 +898,22 @@ retry:
                ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
                                              target, erasure_code,
                                              nr_replicas, &nr_effective,
-                                             &have_cache, reserve, cl);
+                                             &have_cache, reserve,
+                                             ob_flags, cl);
        } else {
                ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
                                              target, erasure_code,
                                              nr_replicas, &nr_effective,
-                                             &have_cache, reserve, NULL);
+                                             &have_cache, reserve,
+                                             ob_flags, NULL);
                if (!ret)
                        goto alloc_done;
 
                ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
                                              0, erasure_code,
                                              nr_replicas, &nr_effective,
-                                             &have_cache, reserve, cl);
+                                             &have_cache, reserve,
+                                             ob_flags, cl);
        }
 alloc_done:
        BUG_ON(!ret && nr_effective < nr_replicas);
index 5224a52f8bebbee3731d75f9567b7f55e9331b63..6d8ffb0cd06dfb849cc7a274b95b45bd334d0c1c 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_ALLOC_FOREGROUND_H
 #define _BCACHEFS_ALLOC_FOREGROUND_H
 
index 66457fc722fd5ec2cbb1ec9312e515661ae5f7b8..832568dc9551565bbb8acd4e285bf06c130bb88b 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_ALLOC_TYPES_H
 #define _BCACHEFS_ALLOC_TYPES_H
 
index 09afbed9511f09c8aff91663abf1fa978e70352e..907d1b605cf4a7f240b4617a92495ebdab0575f8 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_H
 #define _BCACHEFS_H
 
index be6acec19671da2b7f913a0de740b9d53f0e00c6..13285936dd2dc02ad29e405116f971c0a379473c 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_FORMAT_H
 #define _BCACHEFS_FORMAT_H
 
@@ -847,6 +848,8 @@ static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
                     BCH_ALLOC_FIELDS(), sizeof(u64));
 #undef x
 
+#define BKEY_ALLOC_U64s_MAX    (BKEY_U64s + BKEY_ALLOC_VAL_U64s_MAX)
+
 /* Quotas: */
 
 enum quota_types {
index fb595dffbcc372e5908a7ed05c20e51e320430ec..d668ede5491a05d8fbf6610f3b38c30a8d10c9a5 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_IOCTL_H
 #define _BCACHEFS_IOCTL_H
 
index f1ddd189f0c8f71c2a82d62affe89b1576697f3c..0f9dfe37b0af3f4bde775b60f34e85ab0beda159 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "bkey.h"
index 280c9ec48e972a73103aebde315fa0a58bca643f..1acff9d0fd7efadec12883f12185a77dfea042d7 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BKEY_H
 #define _BCACHEFS_BKEY_H
 
index 711bc88fd95f074510233d528aaca80693dc3b8b..27f196ef0b186963df59c6523479db6593e11eee 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
@@ -81,9 +82,17 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
        if (k.k->u64s < BKEY_U64s)
                return "u64s too small";
 
+       if ((btree_node_type_is_extents(type) ||
+            type == BKEY_TYPE_BTREE) &&
+           bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
+               return "value too big";
+
        if (btree_node_type_is_extents(type)) {
                if ((k.k->size == 0) != bkey_deleted(k.k))
                        return "bad size field";
+
+               if (k.k->size > k.k->p.offset)
+                       return "size greater than offset";
        } else {
                if (k.k->size)
                        return "nonzero size field";
@@ -198,22 +207,22 @@ bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
 }
 
 enum merge_result bch2_bkey_merge(struct bch_fs *c,
-                                 struct bkey_i *l, struct bkey_i *r)
+                                 struct bkey_s l, struct bkey_s r)
 {
-       const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type];
+       const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
        enum merge_result ret;
 
        if (key_merging_disabled(c) ||
            !ops->key_merge ||
-           l->k.type != r->k.type ||
-           bversion_cmp(l->k.version, r->k.version) ||
-           bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
+           l.k->type != r.k->type ||
+           bversion_cmp(l.k->version, r.k->version) ||
+           bkey_cmp(l.k->p, bkey_start_pos(r.k)))
                return BCH_MERGE_NOMERGE;
 
        ret = ops->key_merge(c, l, r);
 
        if (ret != BCH_MERGE_NOMERGE)
-               l->k.needs_whiteout |= r->k.needs_whiteout;
+               l.k->needs_whiteout |= r.k->needs_whiteout;
        return ret;
 }
 
index cf7a9e9c782dc4b4dc02d18d67bd0c69b162e6f8..08b976633360583176961ef2e118a210d2b4cd24 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BKEY_METHODS_H
 #define _BCACHEFS_BKEY_METHODS_H
 
@@ -32,7 +33,7 @@ struct bkey_ops {
        void            (*swab)(const struct bkey_format *, struct bkey_packed *);
        bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
        enum merge_result (*key_merge)(struct bch_fs *,
-                                      struct bkey_i *, struct bkey_i *);
+                                      struct bkey_s, struct bkey_s);
 };
 
 const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
@@ -56,7 +57,7 @@ void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *);
 bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
 
 enum merge_result bch2_bkey_merge(struct bch_fs *,
-                                 struct bkey_i *, struct bkey_i *);
+                                 struct bkey_s, struct bkey_s);
 
 void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
 
index c47c862f12e188f2a1efa1188b52768708ed1790..9f5d9b4bf1c9528f0566cafc282de6cc2f37b5fa 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "bkey_sort.h"
 #include "bset.h"
@@ -240,35 +241,44 @@ static inline void extent_sort_next(struct btree_node_iter_large *iter,
        heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
 }
 
+static void extent_sort_advance_prev(struct bkey_format *f,
+                                    struct btree_nr_keys *nr,
+                                    struct bkey_packed *start,
+                                    struct bkey_packed **prev)
+{
+       if (*prev) {
+               bch2_bkey_pack(*prev, (void *) *prev, f);
+
+               btree_keys_account_key_add(nr, 0, *prev);
+               *prev = bkey_next(*prev);
+       } else {
+               *prev = start;
+       }
+}
+
 static void extent_sort_append(struct bch_fs *c,
-                              struct btree *b,
+                              struct bkey_format *f,
                               struct btree_nr_keys *nr,
                               struct bkey_packed *start,
                               struct bkey_packed **prev,
-                              struct bkey_packed *k)
+                              struct bkey_k)
 {
-       struct bkey_format *f = &b->format;
-       BKEY_PADDED(k) tmp;
-
-       if (bkey_whiteout(k))
+       if (bkey_whiteout(k.k))
                return;
 
-       bch2_bkey_unpack(b, &tmp.k, k);
+       /*
+        * prev is always unpacked, for key merging - until right before we
+        * advance it:
+        */
 
        if (*prev &&
-           bch2_bkey_merge(c, (void *) *prev, &tmp.k))
+           bch2_bkey_merge(c, bkey_i_to_s((void *) *prev), k) ==
+           BCH_MERGE_MERGE)
                return;
 
-       if (*prev) {
-               bch2_bkey_pack(*prev, (void *) *prev, f);
-
-               btree_keys_account_key_add(nr, 0, *prev);
-               *prev = bkey_next(*prev);
-       } else {
-               *prev = start;
-       }
+       extent_sort_advance_prev(f, nr, start, prev);
 
-       bkey_copy(*prev, &tmp.k);
+       bkey_reassemble((void *) *prev, k.s_c);
 }
 
 struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
@@ -278,7 +288,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 {
        struct bkey_format *f = &b->format;
        struct btree_node_iter_set *_l = iter->data, *_r;
-       struct bkey_packed *prev = NULL, *out, *lk, *rk;
+       struct bkey_packed *prev = NULL, *lk, *rk;
        struct bkey l_unpacked, r_unpacked;
        struct bkey_s l, r;
        struct btree_nr_keys nr;
@@ -289,9 +299,10 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 
        while (!bch2_btree_node_iter_large_end(iter)) {
                lk = __btree_node_offset_to_key(b, _l->k);
+               l = __bkey_disassemble(b, lk, &l_unpacked);
 
                if (iter->used == 1) {
-                       extent_sort_append(c, b, &nr, dst->start, &prev, lk);
+                       extent_sort_append(c, f, &nr, dst->start, &prev, l);
                        extent_sort_next(iter, b, _l);
                        continue;
                }
@@ -302,13 +313,11 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
                        _r++;
 
                rk = __btree_node_offset_to_key(b, _r->k);
-
-               l = __bkey_disassemble(b, lk, &l_unpacked);
                r = __bkey_disassemble(b, rk, &r_unpacked);
 
                /* If current key and next key don't overlap, just append */
                if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
-                       extent_sort_append(c, b, &nr, dst->start, &prev, lk);
+                       extent_sort_append(c, f, &nr, dst->start, &prev, l);
                        extent_sort_next(iter, b, _l);
                        continue;
                }
@@ -353,23 +362,17 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 
                        extent_sort_sift(iter, b, 0);
 
-                       extent_sort_append(c, b, &nr, dst->start, &prev,
-                                          bkey_to_packed(&tmp.k));
+                       extent_sort_append(c, f, &nr, dst->start,
+                                          &prev, bkey_i_to_s(&tmp.k));
                } else {
                        bch2_cut_back(bkey_start_pos(r.k), l.k);
                        extent_save(b, lk, l.k);
                }
        }
 
-       if (prev) {
-               bch2_bkey_pack(prev, (void *) prev, f);
-               btree_keys_account_key_add(&nr, 0, prev);
-               out = bkey_next(prev);
-       } else {
-               out = dst->start;
-       }
+       extent_sort_advance_prev(f, &nr, dst->start, &prev);
 
-       dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+       dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
        return nr;
 }
 
@@ -412,58 +415,36 @@ bch2_sort_repack_merge(struct bch_fs *c,
                       struct bkey_format *out_f,
                       bool filter_whiteouts)
 {
-       struct bkey_packed *k, *prev = NULL, *out;
+       struct bkey_packed *prev = NULL, *k_packed, *next;
+       struct bkey k_unpacked;
+       struct bkey_s k;
        struct btree_nr_keys nr;
-       BKEY_PADDED(k) tmp;
 
        memset(&nr, 0, sizeof(nr));
 
-       while ((k = bch2_btree_node_iter_next_all(iter, src))) {
-               if (filter_whiteouts && bkey_whiteout(k))
-                       continue;
-
+       next = bch2_btree_node_iter_next_all(iter, src);
+       while ((k_packed = next)) {
                /*
-                * The filter might modify pointers, so we have to unpack the
-                * key and values to &tmp.k:
+                * The filter might modify the size of @k's value, so advance
+                * the iterator first:
                 */
-               bch2_bkey_unpack(src, &tmp.k, k);
+               next = bch2_btree_node_iter_next_all(iter, src);
 
-               if (filter_whiteouts &&
-                   bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k)))
+               if (filter_whiteouts && bkey_whiteout(k_packed))
                        continue;
 
-               /* prev is always unpacked, for key merging: */
+               k = __bkey_disassemble(src, k_packed, &k_unpacked);
 
-               if (prev &&
-                   bch2_bkey_merge(c, (void *) prev, &tmp.k) ==
-                   BCH_MERGE_MERGE)
+               if (filter_whiteouts &&
+                   bch2_bkey_normalize(c, k))
                        continue;
 
-               /*
-                * the current key becomes the new prev: advance prev, then
-                * copy the current key - but first pack prev (in place):
-                */
-               if (prev) {
-                       bch2_bkey_pack(prev, (void *) prev, out_f);
-
-                       btree_keys_account_key_add(&nr, 0, prev);
-                       prev = bkey_next(prev);
-               } else {
-                       prev = vstruct_last(dst);
-               }
-
-               bkey_copy(prev, &tmp.k);
+               extent_sort_append(c, out_f, &nr, vstruct_last(dst), &prev, k);
        }
 
-       if (prev) {
-               bch2_bkey_pack(prev, (void *) prev, out_f);
-               btree_keys_account_key_add(&nr, 0, prev);
-               out = bkey_next(prev);
-       } else {
-               out = vstruct_last(dst);
-       }
+       extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
 
-       dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+       dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
        return nr;
 }
 
index d189d814766149d268f3b25feaaaf993c44e4ccc..397009181eaee233d0cf820d9cd2a775d87e76da 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BKEY_SORT_H
 #define _BCACHEFS_BKEY_SORT_H
 
index 4d182518abe68a2eed5f90a89a62dbe439a893d6..ef10e77ec1e510aedec44fd5eb935a1d8d169ba0 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Code for working with individual keys, and sorted sets of keys with in a
  * btree node
@@ -12,7 +13,6 @@
 #include "util.h"
 
 #include <asm/unaligned.h>
-#include <linux/dynamic_fault.h>
 #include <linux/console.h>
 #include <linux/random.h>
 #include <linux/prefetch.h>
index 74b962a05b36cd4424535ff588df6cd8f804f995..17c2399473002ad9b578b8b22f9dfe2359a89329 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BSET_H
 #define _BCACHEFS_BSET_H
 
index 587a04f56b84e6d3c6a2851efdabba831ee06784..046524c8d5ea6928a25ce207ce4d629b87f5b062 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "btree_cache.h"
@@ -7,6 +8,7 @@
 #include "debug.h"
 
 #include <linux/prefetch.h>
+#include <linux/sched/mm.h>
 #include <trace/events/bcachefs.h>
 
 const char * const bch2_btree_ids[] = {
@@ -507,7 +509,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
        u64 start_time = local_clock();
+       unsigned flags;
 
+       flags = memalloc_nofs_save();
        mutex_lock(&bc->lock);
 
        /*
@@ -545,6 +549,7 @@ out_unlock:
 
        list_del_init(&b->list);
        mutex_unlock(&bc->lock);
+       memalloc_nofs_restore(flags);
 out:
        b->flags                = 0;
        b->written              = 0;
index 19e14d32cf2d2f2ecab405e71a3e5004bbf6b0c3..c5873c58439cf8d39927cb699386592dd2642a3b 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_CACHE_H
 #define _BCACHEFS_BTREE_CACHE_H
 
index c2b893a9cb0f31173777ee3d08c0fe75516f5afd..a458cfe0e92d55f53fdca5099528752cb38f81f0 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
  * Copyright (C) 2014 Datera Inc.
@@ -287,11 +288,11 @@ static int mark_journal_key(struct bch_fs *c, enum btree_id id,
 
        for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k),
                           BTREE_ITER_SLOTS, k, ret) {
-               percpu_down_read_preempt_disable(&c->mark_lock);
+               percpu_down_read(&c->mark_lock);
                ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL,
                                         BCH_BUCKET_MARK_GC|
                                         BCH_BUCKET_MARK_NOATOMIC);
-               percpu_up_read_preempt_enable(&c->mark_lock);
+               percpu_up_read(&c->mark_lock);
 
                if (!ret)
                        break;
@@ -367,9 +368,7 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
         */
        if (c) {
                lockdep_assert_held(&c->sb_lock);
-               percpu_down_read_preempt_disable(&c->mark_lock);
-       } else {
-               preempt_disable();
+               percpu_down_read(&c->mark_lock);
        }
 
        for (i = 0; i < layout->nr_superblocks; i++) {
@@ -391,11 +390,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
                                          gc_phase(GC_PHASE_SB), flags);
        }
 
-       if (c) {
-               percpu_up_read_preempt_enable(&c->mark_lock);
-       } else {
-               preempt_enable();
-       }
+       if (c)
+               percpu_up_read(&c->mark_lock);
 }
 
 static void bch2_mark_superblocks(struct bch_fs *c)
@@ -435,7 +431,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
        size_t i, j, iter;
        unsigned ci;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
 
        spin_lock(&c->freelist_lock);
        gc_pos_set(c, gc_pos_alloc(c, NULL));
@@ -471,7 +467,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
                spin_unlock(&ob->lock);
        }
 
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 }
 
 static void bch2_gc_free(struct bch_fs *c)
index 6522ebaf4a29c73e79fdf7ae064997f5a0fa729b..bd5f2752954fede861e8cb5170d9909cc8e41da0 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_GC_H
 #define _BCACHEFS_BTREE_GC_H
 
index 8b7e05ed066af7abe84c98ba4c5a43f5163f3757..b6e286c36b861fd353876e4aad630c0885f63ddd 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
@@ -1037,10 +1038,9 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
        INIT_WORK(&rb->work, btree_node_read_work);
        bio->bi_opf             = REQ_OP_READ|REQ_SYNC|REQ_META;
        bio->bi_iter.bi_sector  = pick.ptr.offset;
-       bio->bi_iter.bi_size    = btree_bytes(c);
        bio->bi_end_io          = btree_node_read_endio;
        bio->bi_private         = b;
-       bch2_bio_map(bio, b->data);
+       bch2_bio_map(bio, b->data, btree_bytes(c));
 
        set_btree_node_read_in_flight(b);
 
@@ -1501,11 +1501,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
        wbio->wbio.order                = order;
        wbio->wbio.used_mempool         = used_mempool;
        wbio->wbio.bio.bi_opf           = REQ_OP_WRITE|REQ_META|REQ_FUA;
-       wbio->wbio.bio.bi_iter.bi_size  = sectors_to_write << 9;
        wbio->wbio.bio.bi_end_io        = btree_node_write_endio;
        wbio->wbio.bio.bi_private       = b;
 
-       bch2_bio_map(&wbio->wbio.bio, data);
+       bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
 
        /*
         * If we're appending to a leaf node, we don't technically need FUA -
index 4de1fb736ae59980964f390f3a553e60bb4d177e..c817aeed878adf0c005732b40eeaaa10d5aabe20 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_IO_H
 #define _BCACHEFS_BTREE_IO_H
 
index 9c37de75774d59d869d18ca6afb1b3e0ababfced..8955555d6603267fd491d15919ac9259957f7eca 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
index 177cc314f3581f8e30eee6b33644b15d2cd25277..9483ec8913e3199afbbe897606bee48f148c0caf 100644 (file)
@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_ITER_H
 #define _BCACHEFS_BTREE_ITER_H
 
-#include <linux/dynamic_fault.h>
-
 #include "bset.h"
 #include "btree_types.h"
 
index 35289b0c3586c6826f08871f69a61a301c282553..ea07ba19c5dc17ac83f52c95d1ed12e76643c651 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_LOCKING_H
 #define _BCACHEFS_BTREE_LOCKING_H
 
index f2641d564e49faa4558b3c53ef986d44d3cb0d5e..91aa30a6ed2f8bc7c3b95fa7328b8b90376b6629 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_TYPES_H
 #define _BCACHEFS_BTREE_TYPES_H
 
@@ -261,6 +262,7 @@ struct btree_insert_entry {
 
        bool                    deferred;
        bool                    triggered;
+       bool                    marked;
 };
 
 #define BTREE_ITER_MAX         64
index 32e30f75ed22e38e9e9fe04f2867bc9b4aa171f3..616c103c05ecd874fdb97c4d8bb8408413006085 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_UPDATE_H
 #define _BCACHEFS_BTREE_UPDATE_H
 
index c6920b63fa858ed3b319c17d4550072b5969c0f7..9294137719df771235d3918456cc4ae840e6e9cd 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
index 6ae17ffa4101932283444132b18a045fcedae1f4..e5156e9081106e97e57833e6c30240849123c1e2 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
 #define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
 
index 250aae47ad4d85afe6137156616eb4eca8f2fe1a..4f12108bd6fe6d120239542a035a382fad435cda 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "btree_update.h"
@@ -541,6 +542,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct bch_fs_usage *fs_usage = NULL;
        struct btree_insert_entry *i;
+       bool saw_non_marked;
        unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
                ? BCH_BUCKET_MARK_BUCKET_INVALIDATE
                : 0;
@@ -550,14 +552,28 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
 
        trans_for_each_update_iter(trans, i)
-               if (update_has_triggers(trans, i) &&
-                   update_triggers_transactional(trans, i)) {
-                       ret = bch2_trans_mark_update(trans, i);
-                       if (ret == -EINTR)
-                               trace_trans_restart_mark(trans->ip);
-                       if (ret)
-                               goto out_clear_replicas;
+               i->marked = false;
+
+       do {
+               saw_non_marked = false;
+
+               trans_for_each_update_iter(trans, i) {
+                       if (i->marked)
+                               continue;
+
+                       saw_non_marked = true;
+                       i->marked = true;
+
+                       if (update_has_triggers(trans, i) &&
+                           update_triggers_transactional(trans, i)) {
+                               ret = bch2_trans_mark_update(trans, i->iter, i->k);
+                               if (ret == -EINTR)
+                                       trace_trans_restart_mark(trans->ip);
+                               if (ret)
+                                       goto out_clear_replicas;
+                       }
                }
+       } while (saw_non_marked);
 
        btree_trans_lock_write(c, trans);
 
index 3cfe684a604f35b596300c041ba95f49bf5a0210..b6b3ac5111cad30c4750e19a5f37e0777d39af7e 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Code for manipulating bucket marks for garbage collection.
  *
@@ -227,12 +228,12 @@ retry:
        if (unlikely(!ret))
                return NULL;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
 
        v = fs_usage_u64s(c);
        if (unlikely(u64s != v)) {
                u64s = v;
-               percpu_up_read_preempt_enable(&c->mark_lock);
+               percpu_up_read(&c->mark_lock);
                kfree(ret);
                goto retry;
        }
@@ -350,9 +351,9 @@ bch2_fs_usage_read_short(struct bch_fs *c)
 {
        struct bch_fs_usage_short ret;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        ret = __bch2_fs_usage_read_short(c);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        return ret;
 }
@@ -449,6 +450,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
                bch2_data_types[old.data_type],
                bch2_data_types[new.data_type]);
 
+       preempt_disable();
        dev_usage = this_cpu_ptr(ca->usage[gc]);
 
        if (bucket_type(old))
@@ -472,6 +474,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
                (int) new.cached_sectors - (int) old.cached_sectors;
        dev_usage->sectors_fragmented +=
                is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
+       preempt_enable();
 
        if (!is_available_bucket(old) && is_available_bucket(new))
                bch2_wake_allocator(ca);
@@ -495,11 +498,9 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c)
 
                buckets = bucket_array(ca);
 
-               preempt_disable();
                for_each_bucket(g, buckets)
                        bch2_dev_usage_update(c, ca, c->usage_base,
                                              old, g->mark, false);
-               preempt_enable();
        }
 }
 
@@ -681,8 +682,12 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
                            size_t b, bool owned_by_allocator,
                            struct gc_pos pos, unsigned flags)
 {
+       preempt_disable();
+
        do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
                   ca, b, owned_by_allocator);
+
+       preempt_enable();
 }
 
 static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
@@ -792,12 +797,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
        BUG_ON(type != BCH_DATA_SB &&
               type != BCH_DATA_JOURNAL);
 
+       preempt_disable();
+
        if (likely(c)) {
                do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
                           ca, b, type, sectors);
        } else {
                __bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0);
        }
+
+       preempt_enable();
 }
 
 static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
@@ -1148,10 +1157,10 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
 {
        int ret;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        ret = bch2_mark_key_locked(c, k, sectors,
                                   fs_usage, journal_seq, flags);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        return ret;
 }
@@ -1309,22 +1318,18 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
 
 static int trans_get_key(struct btree_trans *trans,
                         enum btree_id btree_id, struct bpos pos,
-                        struct btree_insert_entry **insert,
                         struct btree_iter **iter,
                         struct bkey_s_c *k)
 {
        unsigned i;
        int ret;
 
-       *insert = NULL;
-
        for (i = 0; i < trans->nr_updates; i++)
                if (!trans->updates[i].deferred &&
                    trans->updates[i].iter->btree_id == btree_id &&
                    !bkey_cmp(pos, trans->updates[i].iter->pos)) {
-                       *insert = &trans->updates[i];
-                       *iter   = (*insert)->iter;
-                       *k      = bkey_i_to_s_c((*insert)->k);
+                       *iter   = trans->updates[i].iter;
+                       *k      = bkey_i_to_s_c(trans->updates[i].k);
                        return 0;
                }
 
@@ -1340,30 +1345,34 @@ static int trans_get_key(struct btree_trans *trans,
        return ret;
 }
 
-static int trans_update_key(struct btree_trans *trans,
-                           struct btree_insert_entry **insert,
-                           struct btree_iter *iter,
-                           struct bkey_s_c k,
-                           unsigned extra_u64s)
+static void *trans_update_key(struct btree_trans *trans,
+                             struct btree_iter *iter,
+                             unsigned u64s)
 {
        struct bkey_i *new_k;
+       unsigned i;
 
-       if (*insert)
-               return 0;
-
-       new_k = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
-                                  extra_u64s * sizeof(u64));
+       new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
        if (IS_ERR(new_k))
-               return PTR_ERR(new_k);
+               return new_k;
+
+       bkey_init(&new_k->k);
+       new_k->k.p = iter->pos;
 
-       *insert = bch2_trans_update(trans, ((struct btree_insert_entry) {
-                               .iter = iter,
-                               .k = new_k,
-                               .triggered = true,
+       for (i = 0; i < trans->nr_updates; i++)
+               if (!trans->updates[i].deferred &&
+                   trans->updates[i].iter == iter) {
+                       trans->updates[i].k = new_k;
+                       return new_k;
+               }
+
+       bch2_trans_update(trans, ((struct btree_insert_entry) {
+               .iter = iter,
+               .k = new_k,
+               .triggered = true,
        }));
 
-       bkey_reassemble((*insert)->k, k);
-       return 0;
+       return new_k;
 }
 
 static int bch2_trans_mark_pointer(struct btree_trans *trans,
@@ -1372,7 +1381,6 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-       struct btree_insert_entry *insert;
        struct btree_iter *iter;
        struct bkey_s_c k;
        struct bkey_alloc_unpacked u;
@@ -1382,7 +1390,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
 
        ret = trans_get_key(trans, BTREE_ID_ALLOC,
                            POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
-                           &insert, &iter, &k);
+                           &iter, &k);
        if (ret)
                return ret;
 
@@ -1415,11 +1423,12 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
                ? u.dirty_sectors
                : u.cached_sectors, sectors);
 
-       ret = trans_update_key(trans, &insert, iter, k, 1);
+       a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
+       ret = PTR_ERR_OR_ZERO(a);
        if (ret)
                goto out;
 
-       a = bkey_alloc_init(insert->k);
+       bkey_alloc_init(&a->k_i);
        a->k.p = iter->pos;
        bch2_alloc_pack(a, u);
 out:
@@ -1432,8 +1441,8 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                        s64 sectors, enum bch_data_type data_type)
 {
        struct bch_replicas_padded r;
-       struct btree_insert_entry *insert;
        struct btree_iter *iter;
+       struct bkey_i *new_k;
        struct bkey_s_c k;
        struct bkey_s_stripe s;
        unsigned nr_data;
@@ -1442,8 +1451,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
 
        BUG_ON(!sectors);
 
-       ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx),
-                           &insert, &iter, &k);
+       ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
        if (ret)
                return ret;
 
@@ -1455,11 +1463,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                goto out;
        }
 
-       ret = trans_update_key(trans, &insert, iter, k, 1);
+       new_k = trans_update_key(trans, iter, k.k->u64s);
+       ret = PTR_ERR_OR_ZERO(new_k);
        if (ret)
                goto out;
 
-       s = bkey_i_to_s_stripe(insert->k);
+       bkey_reassemble(new_k, k);
+       s = bkey_i_to_s_stripe(new_k);
 
        nr_data = s.v->nr_blocks - s.v->nr_redundant;
 
@@ -1580,9 +1590,9 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
 }
 
 int bch2_trans_mark_update(struct btree_trans *trans,
-                          struct btree_insert_entry *insert)
+                          struct btree_iter *iter,
+                          struct bkey_i *insert)
 {
-       struct btree_iter       *iter = insert->iter;
        struct btree            *b = iter->l[0].b;
        struct btree_node_iter  node_iter = iter->l[0].iter;
        struct bkey_packed      *_k;
@@ -1592,9 +1602,9 @@ int bch2_trans_mark_update(struct btree_trans *trans,
                return 0;
 
        ret = bch2_trans_mark_key(trans,
-                       bkey_i_to_s_c(insert->k),
-                       bpos_min(insert->k->k.p, b->key.k.p).offset -
-                       bkey_start_offset(&insert->k->k),
+                       bkey_i_to_s_c(insert),
+                       bpos_min(insert->k.p, b->key.k.p).offset -
+                       bkey_start_offset(&insert->k),
                        BCH_BUCKET_MARK_INSERT);
        if (ret)
                return ret;
@@ -1608,25 +1618,25 @@ int bch2_trans_mark_update(struct btree_trans *trans,
                k = bkey_disassemble(b, _k, &unpacked);
 
                if (btree_node_is_extents(b)
-                   ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
-                   : bkey_cmp(insert->k->k.p, k.k->p))
+                   ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
+                   : bkey_cmp(insert->k.p, k.k->p))
                        break;
 
                if (btree_node_is_extents(b)) {
-                       switch (bch2_extent_overlap(&insert->k->k, k.k)) {
+                       switch (bch2_extent_overlap(&insert->k, k.k)) {
                        case BCH_EXTENT_OVERLAP_ALL:
                                sectors = -((s64) k.k->size);
                                break;
                        case BCH_EXTENT_OVERLAP_BACK:
-                               sectors = bkey_start_offset(&insert->k->k) -
+                               sectors = bkey_start_offset(&insert->k) -
                                        k.k->p.offset;
                                break;
                        case BCH_EXTENT_OVERLAP_FRONT:
                                sectors = bkey_start_offset(k.k) -
-                                       insert->k->k.p.offset;
+                                       insert->k.p.offset;
                                break;
                        case BCH_EXTENT_OVERLAP_MIDDLE:
-                               sectors = k.k->p.offset - insert->k->k.p.offset;
+                               sectors = k.k->p.offset - insert->k.p.offset;
                                BUG_ON(sectors <= 0);
 
                                ret = bch2_trans_mark_key(trans, k, sectors,
@@ -1634,7 +1644,7 @@ int bch2_trans_mark_update(struct btree_trans *trans,
                                if (ret)
                                        return ret;
 
-                               sectors = bkey_start_offset(&insert->k->k) -
+                               sectors = bkey_start_offset(&insert->k) -
                                        k.k->p.offset;
                                break;
                        }
@@ -1664,10 +1674,10 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
 
 void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
 {
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        this_cpu_sub(c->usage[0]->online_reserved,
                     res->sectors);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        res->sectors = 0;
 }
@@ -1682,7 +1692,8 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
        s64 sectors_available;
        int ret;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
+       preempt_disable();
        pcpu = this_cpu_ptr(c->pcpu);
 
        if (sectors <= pcpu->sectors_available)
@@ -1694,7 +1705,8 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
                get = min((u64) sectors + SECTORS_CACHE, old);
 
                if (get < sectors) {
-                       percpu_up_read_preempt_enable(&c->mark_lock);
+                       preempt_enable();
+                       percpu_up_read(&c->mark_lock);
                        goto recalculate;
                }
        } while ((v = atomic64_cmpxchg(&c->sectors_available,
@@ -1707,7 +1719,8 @@ out:
        this_cpu_add(c->usage[0]->online_reserved, sectors);
        res->sectors                    += sectors;
 
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       preempt_enable();
+       percpu_up_read(&c->mark_lock);
        return 0;
 
 recalculate:
index 65a934f88781e79b326533adf2695eda8bfb5e33..5ab6f3d3413718218ddf060473f05d53ce23a3c0 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Code for manipulating bucket marks for garbage collection.
  *
@@ -273,7 +274,8 @@ void bch2_replicas_delta_list_apply(struct bch_fs *,
                                    struct replicas_delta_list *);
 int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, s64, unsigned);
 int bch2_trans_mark_update(struct btree_trans *,
-                          struct btree_insert_entry *);
+                          struct btree_iter *iter,
+                          struct bkey_i *insert);
 void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
 
 /* disk reservations: */
index 309a5fb61e989031929cab1ee3dd51cc6fbaa855..e51d297976bef888f9b41986ac051a33f32da743 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BUCKETS_TYPES_H
 #define _BUCKETS_TYPES_H
 
index 595d4797eb4f815918e4be389d05dc939a607ccf..059eca01ccc4581f1df7bf07e3129ce711d94d2b 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifndef NO_BCACHEFS_CHARDEV
 
 #include "bcachefs.h"
@@ -405,7 +406,7 @@ static long bch2_ioctl_usage(struct bch_fs *c,
                dst.used                = bch2_fs_sectors_used(c, src);
                dst.online_reserved     = src->online_reserved;
 
-               percpu_up_read_preempt_enable(&c->mark_lock);
+               percpu_up_read(&c->mark_lock);
 
                for (i = 0; i < BCH_REPLICAS_MAX; i++) {
                        dst.persistent_reserved[i] =
index c3057b07523c7e9bb5c9eb26a33c22c877b626a0..3a4890d39ff98990b6a6cd46fd8bc3f67eacf8a8 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_CHARDEV_H
 #define _BCACHEFS_CHARDEV_H
 
index a4c1b8ada3e89977532ecad8eba438fb7bfe93ff..e55aa98cf9ee48fe3d7d5bda4786ad20dc9bb072 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "checksum.h"
 #include "super.h"
@@ -60,7 +61,7 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t
                return crc32c(crc, data, len);
        case BCH_CSUM_CRC64_NONZERO:
        case BCH_CSUM_CRC64:
-               return bch2_crc64_update(crc, data, len);
+               return crc64_be(crc, data, len);
        default:
                BUG();
        }
@@ -199,7 +200,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
                        kunmap_atomic(p);
                }
 #else
-               __bio_for_each_contig_segment(bv, bio, *iter, *iter)
+               __bio_for_each_bvec(bv, bio, *iter, *iter)
                        crc = bch2_checksum_update(type, crc,
                                page_address(bv.bv_page) + bv.bv_offset,
                                bv.bv_len);
@@ -224,7 +225,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
                        kunmap_atomic(p);
                }
 #else
-               __bio_for_each_contig_segment(bv, bio, *iter, *iter)
+               __bio_for_each_bvec(bv, bio, *iter, *iter)
                        crypto_shash_update(desc,
                                page_address(bv.bv_page) + bv.bv_offset,
                                bv.bv_len);
index 2c0fbbb83273b47d7fd4e2355baf9ea844094432..b84e81bac8ff7b663b8f3238f2dceadc84bf5c9d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_CHECKSUM_H
 #define _BCACHEFS_CHECKSUM_H
 
@@ -24,11 +25,6 @@ static inline bool bch2_checksum_mergeable(unsigned type)
 struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
                                    struct bch_csum, size_t);
 
-static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
-{
-       return crc64_be(crc, p, len);
-}
-
 #define BCH_NONCE_EXTENT       cpu_to_le32(1 << 28)
 #define BCH_NONCE_BTREE                cpu_to_le32(2 << 28)
 #define BCH_NONCE_JOURNAL      cpu_to_le32(3 << 28)
index 90b10cef60b8ccd06abc9f903677ad6e5d96779c..8ac6990c6971813fd53740ee914818bbfad74bd3 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "clock.h"
 
index 1e2a7dea4ddd0a0fe5c7c35695c502aa662ecb4b..5cb043c579d8b6a68b70f23496e7a450a6829165 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_CLOCK_H
 #define _BCACHEFS_CLOCK_H
 
index df404b6dd3fe7c24760da5f815653059ebda66b6..2b5e499e12b43cee088004fb1f8fd9ad5e9f34de 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_CLOCK_TYPES_H
 #define _BCACHEFS_CLOCK_TYPES_H
 
index e74fc1f8aee5cbd77863b8ab5390790917da0955..3787390da47fc20835a77fe67c0e7286246e8338 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "checksum.h"
 #include "compress.h"
@@ -5,7 +6,6 @@
 #include "io.h"
 #include "super-io.h"
 
-#include "lz4.h"
 #include <linux/lz4.h>
 #include <linux/zlib.h>
 #include <linux/zstd.h>
@@ -66,7 +66,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
        BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
 
 #ifndef CONFIG_HIGHMEM
-       __bio_for_each_contig_segment(bv, bio, iter, start) {
+       __bio_for_each_bvec(bv, bio, iter, start) {
                if (bv.bv_len == start.bi_size)
                        return (struct bbuf) {
                                .b = page_address(bv.bv_page) + bv.bv_offset,
@@ -159,11 +159,6 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
 
        switch (crc.compression_type) {
        case BCH_COMPRESSION_LZ4_OLD:
-               ret = bch2_lz4_decompress(src_data.b, &src_len,
-                                    dst_data, dst_len);
-               if (ret)
-                       goto err;
-               break;
        case BCH_COMPRESSION_LZ4:
                ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
                                                  src_len, dst_len, dst_len);
@@ -246,10 +241,10 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
        }
 
        /*
-        * might have to free existing pages and retry allocation from mempool -
-        * do this _after_ decompressing:
+        * XXX: don't have a good way to assert that the bio was allocated with
+        * enough space, we depend on bch2_move_extent doing the right thing
         */
-       bch2_bio_alloc_more_pages_pool(c, bio, crc->live_size << 9);
+       bio->bi_iter.bi_size = crc->live_size << 9;
 
        memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
 
index 06fff6a57f942ffdcea2d8fb63beaaf779c56529..4bab1f61b3b50bddbc3978f0a25840abd87d154d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_COMPRESS_H
 #define _BCACHEFS_COMPRESS_H
 
index 47b8dd74dc62fd3b5fbce70ea299253a4cbd3fbc..69b123bad83b9e3e469f030b19c3dc2067bde0d6 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Assorted bcachefs debug code
  *
@@ -69,8 +70,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
        bio_set_dev(bio, ca->disk_sb.bdev);
        bio->bi_opf             = REQ_OP_READ|REQ_META;
        bio->bi_iter.bi_sector  = pick.ptr.offset;
-       bio->bi_iter.bi_size    = btree_bytes(c);
-       bch2_bio_map(bio, n_sorted);
+       bch2_bio_map(bio, n_sorted, btree_bytes(c));
 
        submit_bio_wait(bio);
 
index b5de1a7072d4904fcd17bbb95af792de5939b0c2..56c2d1ab5f630de8ce1b2484f934bc90dceb12eb 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_DEBUG_H
 #define _BCACHEFS_DEBUG_H
 
index 11e628876fffbb600974c9b97016a4dff19531f1..1442dacef0de66256af921d117bc045fe26d42f3 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
index a35d3aad29c1f44fec47bbe2a112b9e1c4ecec46..bc64718a78328a9899f29dcffd6c22f6b38cee8a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_DIRENT_H
 #define _BCACHEFS_DIRENT_H
 
index eddd8899182bcbe70dc28f6ce91717a000de6677..4a4ec8f4610849eaee4ce733ba9976b3fd0bd916 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "disk_groups.h"
 #include "super-io.h"
index b90b0ef5c85f8beecbed11710b0bff57d31d3bcb..c8e0c37a5e1a49ebf7dac0864cc07c976b10bedc 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_DISK_GROUPS_H
 #define _BCACHEFS_DISK_GROUPS_H
 
index 43cceb02955516be136ec0950d3c2cd552e67903..6916fea532ace6dbc9dbd35694b5fd020777e993 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 /* erasure coding */
 
@@ -398,11 +399,10 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
                bio_set_op_attrs(&ec_bio->bio, rw, 0);
 
                ec_bio->bio.bi_iter.bi_sector   = ptr->offset + buf->offset + (offset >> 9);
-               ec_bio->bio.bi_iter.bi_size     = b;
                ec_bio->bio.bi_end_io           = ec_block_endio;
                ec_bio->bio.bi_private          = cl;
 
-               bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset);
+               bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
 
                closure_get(cl);
                percpu_ref_get(&ca->io_ref);
@@ -626,7 +626,8 @@ void bch2_stripes_heap_update(struct bch_fs *c,
                bch2_stripes_heap_insert(c, m, idx);
        }
 
-       if (stripe_idx_to_delete(c) >= 0)
+       if (stripe_idx_to_delete(c) >= 0 &&
+           !percpu_ref_is_dying(&c->writes))
                schedule_work(&c->ec_stripe_delete_work);
 }
 
@@ -684,7 +685,8 @@ static void ec_stripe_delete_work(struct work_struct *work)
                if (idx < 0)
                        break;
 
-               ec_stripe_delete(c, idx);
+               if (ec_stripe_delete(c, idx))
+                       break;
        }
 
        mutex_unlock(&c->ec_stripe_create_lock);
index 6c00ec5cdbc7dfe5a0e24c09550456a390b81eeb..8d9fbfd19f66f3ca5069e9ef58a48f82fb227b7a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_EC_H
 #define _BCACHEFS_EC_H
 
index b4d377053c875d409084904cf6e6337d514dcfb2..5c3f77c8aac71b9f269031f9deb2271e7e59c519 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_EC_TYPES_H
 #define _BCACHEFS_EC_TYPES_H
 
index afffbfb3695449a6f1c1cb5b3687a8302af8b56d..1aaff44e18cf0047f8a677852990c165b003a7d4 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "error.h"
 #include "io.h"
index 588e763f0440686d9af7999c1edb5df4aa132764..2591e12305b710df1ec0e5e1ab8b7e0039e098b0 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_ERROR_H
 #define _BCACHEFS_ERROR_H
 
index 818c772a8479b506ef44050b83efad46bcdd1c12..e286048b5bf83fd2e3ac29f42447e6c42c55be6c 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
  *
@@ -1291,9 +1292,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
 
 const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
-       if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
-               return "value too big";
-
        return bch2_bkey_ptrs_invalid(c, k);
 }
 
@@ -1521,21 +1519,21 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
 }
 
 enum merge_result bch2_extent_merge(struct bch_fs *c,
-                                   struct bkey_i *l, struct bkey_i *r)
+                                   struct bkey_s _l, struct bkey_s _r)
 {
-       struct bkey_s_extent el = bkey_i_to_s_extent(l);
-       struct bkey_s_extent er = bkey_i_to_s_extent(r);
-       union bch_extent_entry *en_l = el.v->start;
-       union bch_extent_entry *en_r = er.v->start;
+       struct bkey_s_extent l = bkey_s_to_extent(_l);
+       struct bkey_s_extent r = bkey_s_to_extent(_r);
+       union bch_extent_entry *en_l = l.v->start;
+       union bch_extent_entry *en_r = r.v->start;
        struct bch_extent_crc_unpacked crc_l, crc_r;
 
-       if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k))
+       if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
                return BCH_MERGE_NOMERGE;
 
-       crc_l = bch2_extent_crc_unpack(el.k, NULL);
+       crc_l = bch2_extent_crc_unpack(l.k, NULL);
 
-       extent_for_each_entry(el, en_l) {
-               en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
+       extent_for_each_entry(l, en_l) {
+               en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
 
                if (extent_entry_type(en_l) != extent_entry_type(en_r))
                        return BCH_MERGE_NOMERGE;
@@ -1567,8 +1565,8 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
                case BCH_EXTENT_ENTRY_crc32:
                case BCH_EXTENT_ENTRY_crc64:
                case BCH_EXTENT_ENTRY_crc128:
-                       crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l));
-                       crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r));
+                       crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+                       crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
 
                        if (crc_l.csum_type             != crc_r.csum_type ||
                            crc_l.compression_type      != crc_r.compression_type ||
@@ -1600,16 +1598,16 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
                }
        }
 
-       extent_for_each_entry(el, en_l) {
+       extent_for_each_entry(l, en_l) {
                struct bch_extent_crc_unpacked crc_l, crc_r;
 
-               en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
+               en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
 
                if (!extent_entry_is_crc(en_l))
                        continue;
 
-               crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l));
-               crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r));
+               crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+               crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
 
                crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
                                                 crc_l.csum,
@@ -1622,7 +1620,7 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
                bch2_extent_crc_pack(entry_to_crc(en_l), crc_l);
        }
 
-       bch2_key_resize(&l->k, l->k.size + r->k.size);
+       bch2_key_resize(l.k, l.k->size + r.k->size);
 
        return BCH_MERGE_MERGE;
 }
@@ -1662,7 +1660,9 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
        bch2_bkey_unpack(b, &li.k, l);
        bch2_bkey_unpack(b, &ri.k, r);
 
-       ret = bch2_bkey_merge(c, &li.k, &ri.k);
+       ret = bch2_bkey_merge(c,
+                             bkey_i_to_s(&li.k),
+                             bkey_i_to_s(&ri.k));
        if (ret == BCH_MERGE_NOMERGE)
                return false;
 
@@ -1785,22 +1785,22 @@ void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
 }
 
 enum merge_result bch2_reservation_merge(struct bch_fs *c,
-                                        struct bkey_i *l, struct bkey_i *r)
+                                        struct bkey_s _l, struct bkey_s _r)
 {
-       struct bkey_i_reservation *li = bkey_i_to_reservation(l);
-       struct bkey_i_reservation *ri = bkey_i_to_reservation(r);
+       struct bkey_s_reservation l = bkey_s_to_reservation(_l);
+       struct bkey_s_reservation r = bkey_s_to_reservation(_r);
 
-       if (li->v.generation != ri->v.generation ||
-           li->v.nr_replicas != ri->v.nr_replicas)
+       if (l.v->generation != r.v->generation ||
+           l.v->nr_replicas != r.v->nr_replicas)
                return BCH_MERGE_NOMERGE;
 
-       if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
-               bch2_key_resize(&l->k, KEY_SIZE_MAX);
-               bch2_cut_front(l->k.p, r);
+       if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
+               bch2_key_resize(l.k, KEY_SIZE_MAX);
+               __bch2_cut_front(l.k->p, r.s);
                return BCH_MERGE_PARTIAL;
        }
 
-       bch2_key_resize(&l->k, l->k.size + r->k.size);
+       bch2_key_resize(l.k, l.k->size + r.k->size);
 
        return BCH_MERGE_MERGE;
 }
index 9bf156d0a4326dff1448bb756683f3227f594da6..fe92737354bd202815ea70fb627b383824911520 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_EXTENTS_H
 #define _BCACHEFS_EXTENTS_H
 
@@ -385,7 +386,7 @@ void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
 void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
 enum merge_result bch2_extent_merge(struct bch_fs *,
-                                   struct bkey_i *, struct bkey_i *);
+                                   struct bkey_s, struct bkey_s);
 
 #define bch2_bkey_ops_extent (struct bkey_ops) {               \
        .key_invalid    = bch2_extent_invalid,                  \
@@ -401,7 +402,7 @@ enum merge_result bch2_extent_merge(struct bch_fs *,
 const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 enum merge_result bch2_reservation_merge(struct bch_fs *,
-                                        struct bkey_i *, struct bkey_i *);
+                                        struct bkey_s, struct bkey_s);
 
 #define bch2_bkey_ops_reservation (struct bkey_ops) {          \
        .key_invalid    = bch2_reservation_invalid,             \
index 6d4284105053bb1a77908674b9242cd36951b779..a8dd6952d9895cddd2f13c350db500966454cc94 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_EXTENTS_TYPES_H
 #define _BCACHEFS_EXTENTS_TYPES_H
 
index d19d809c7580d8fbd61dc3107ae224622eb63e55..26d5cad7e6a5fd37b8640130c319f932def8e317 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _EYTZINGER_H
 #define _EYTZINGER_H
 
index 0982af022ff9e07c5123bda6292cc8d1164f56d5..cdb272708a4bdacf94093a7c0351570189abf973 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_FIFO_H
 #define _BCACHEFS_FIFO_H
 
index 81a86664c99e572d01e2eb533d101a7d56e598ec..5d0c2b696c1ec8ecbd3617fc003880c73f4fc390 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifndef NO_BCACHEFS_FS
 
 #include "bcachefs.h"
@@ -500,184 +501,263 @@ static inline struct bch_io_opts io_opts(struct bch_fs *c, struct bch_inode_info
 
 /* stored in page->private: */
 
-/*
- * bch_page_state has to (unfortunately) be manipulated with cmpxchg - we could
- * almost protected it with the page lock, except that bch2_writepage_io_done has
- * to update the sector counts (and from interrupt/bottom half context).
- */
-struct bch_page_state {
-union { struct {
-       /* existing data: */
-       unsigned                sectors:PAGE_SECTOR_SHIFT + 1;
-
+struct bch_page_sector {
        /* Uncompressed, fully allocated replicas: */
-       unsigned                nr_replicas:4;
+       unsigned                nr_replicas:3;
 
        /* Owns PAGE_SECTORS * replicas_reserved sized reservation: */
-       unsigned                replicas_reserved:4;
-
-       /* Owns PAGE_SECTORS sized quota reservation: */
-       unsigned                quota_reserved:1;
-
-       /*
-        * Number of sectors on disk - for i_blocks
-        * Uncompressed size, not compressed size:
-        */
-       unsigned                dirty_sectors:PAGE_SECTOR_SHIFT + 1;
-};
-       /* for cmpxchg: */
-       unsigned long           v;
+       unsigned                replicas_reserved:3;
+
+       /* i_sectors: */
+       enum {
+               SECTOR_UNALLOCATED,
+               SECTOR_QUOTA_RESERVED,
+               SECTOR_DIRTY,
+               SECTOR_ALLOCATED,
+       }                       state:2;
 };
+
+struct bch_page_state {
+       struct bch_page_sector  s[PAGE_SECTORS];
 };
 
-#define page_state_cmpxchg(_ptr, _new, _expr)                          \
-({                                                                     \
-       unsigned long _v = READ_ONCE((_ptr)->v);                        \
-       struct bch_page_state _old;                                     \
-                                                                       \
-       do {                                                            \
-               _old.v = _new.v = _v;                                   \
-               _expr;                                                  \
-                                                                       \
-               EBUG_ON(_new.sectors + _new.dirty_sectors > PAGE_SECTORS);\
-       } while (_old.v != _new.v &&                                    \
-                (_v = cmpxchg(&(_ptr)->v, _old.v, _new.v)) != _old.v); \
-                                                                       \
-       _old;                                                           \
-})
+static inline struct bch_page_state *__bch2_page_state(struct page *page)
+{
+       return page_has_private(page)
+               ? (struct bch_page_state *) page_private(page)
+               : NULL;
+}
 
-static inline struct bch_page_state *page_state(struct page *page)
+static inline struct bch_page_state *bch2_page_state(struct page *page)
 {
-       struct bch_page_state *s = (void *) &page->private;
+       EBUG_ON(!PageLocked(page));
 
-       BUILD_BUG_ON(sizeof(*s) > sizeof(page->private));
+       return __bch2_page_state(page);
+}
 
-       if (!PagePrivate(page))
-               SetPagePrivate(page);
+/* for newly allocated pages: */
+static void __bch2_page_state_release(struct page *page)
+{
+       struct bch_page_state *s = __bch2_page_state(page);
 
-       return s;
+       if (!s)
+               return;
+
+       ClearPagePrivate(page);
+       set_page_private(page, 0);
+       put_page(page);
+       kfree(s);
 }
 
-static inline unsigned page_res_sectors(struct bch_page_state s)
+static void bch2_page_state_release(struct page *page)
 {
+       struct bch_page_state *s = bch2_page_state(page);
+
+       if (!s)
+               return;
 
-       return s.replicas_reserved * PAGE_SECTORS;
+       ClearPagePrivate(page);
+       set_page_private(page, 0);
+       put_page(page);
+       kfree(s);
 }
 
-static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
-                                       struct bch_page_state s)
+/* for newly allocated pages: */
+static struct bch_page_state *__bch2_page_state_create(struct page *page,
+                                                      gfp_t gfp)
 {
-       struct disk_reservation res = { .sectors = page_res_sectors(s) };
-       struct quota_res quota_res = { .sectors = s.quota_reserved ? PAGE_SECTORS : 0 };
+       struct bch_page_state *s;
 
-       bch2_quota_reservation_put(c, inode, &quota_res);
-       bch2_disk_reservation_put(c, &res);
+       s = kzalloc(sizeof(*s), GFP_NOFS|gfp);
+       if (!s)
+               return NULL;
+
+       /*
+        * migrate_page_move_mapping() assumes that pages with private data
+        * have their count elevated by 1.
+        */
+       get_page(page);
+       set_page_private(page, (unsigned long) s);
+       SetPagePrivate(page);
+       return s;
+}
+
+static struct bch_page_state *bch2_page_state_create(struct page *page,
+                                                    gfp_t gfp)
+{
+       return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
 }
 
 static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
                                      struct page *page)
 {
-       struct bch_page_state s;
+       struct bch_page_state *s = bch2_page_state(page);
+       struct disk_reservation disk_res = { 0 };
+       struct quota_res quota_res = { 0 };
+       unsigned i;
 
-       EBUG_ON(!PageLocked(page));
+       if (!s)
+               return;
 
-       s = page_state_cmpxchg(page_state(page), s, {
-               s.replicas_reserved     = 0;
-               s.quota_reserved        = 0;
-       });
+       for (i = 0; i < ARRAY_SIZE(s->s); i++) {
+               disk_res.sectors += s->s[i].replicas_reserved;
+               s->s[i].replicas_reserved = 0;
 
-       __bch2_put_page_reservation(c, inode, s);
+               if (s->s[i].state == SECTOR_QUOTA_RESERVED) {
+                       quota_res.sectors++;
+                       s->s[i].state = SECTOR_UNALLOCATED;
+               }
+       }
+
+       bch2_quota_reservation_put(c, inode, &quota_res);
+       bch2_disk_reservation_put(c, &disk_res);
 }
 
-static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
-                                    struct page *page, bool check_enospc)
+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
 {
-       struct bch_page_state *s = page_state(page), new;
-
        /* XXX: this should not be open coded */
-       unsigned nr_replicas = inode->ei_inode.bi_data_replicas
+       return inode->ei_inode.bi_data_replicas
                ? inode->ei_inode.bi_data_replicas - 1
                : c->opts.data_replicas;
-       struct disk_reservation disk_res;
+}
+
+static inline unsigned sectors_to_reserve(struct bch_page_sector *s,
+                                                 unsigned nr_replicas)
+{
+       return max(0, (int) nr_replicas -
+                  s->nr_replicas -
+                  s->replicas_reserved);
+}
+
+static int bch2_get_page_disk_reservation(struct bch_fs *c,
+                               struct bch_inode_info *inode,
+                               struct page *page, bool check_enospc)
+{
+       struct bch_page_state *s = bch2_page_state_create(page, 0);
+       unsigned nr_replicas = inode_nr_replicas(c, inode);
+       struct disk_reservation disk_res = { 0 };
+       unsigned i, disk_res_sectors = 0;
+       int ret;
+
+       if (!s)
+               return -ENOMEM;
+
+       for (i = 0; i < ARRAY_SIZE(s->s); i++)
+               disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
+
+       if (!disk_res_sectors)
+               return 0;
+
+       ret = bch2_disk_reservation_get(c, &disk_res,
+                                       disk_res_sectors, 1,
+                                       !check_enospc
+                                       ? BCH_DISK_RESERVATION_NOFAIL
+                                       : 0);
+       if (unlikely(ret))
+               return ret;
+
+       for (i = 0; i < ARRAY_SIZE(s->s); i++)
+               s->s[i].replicas_reserved +=
+                       sectors_to_reserve(&s->s[i], nr_replicas);
+
+       return 0;
+}
+
+static int bch2_get_page_quota_reservation(struct bch_fs *c,
+                       struct bch_inode_info *inode,
+                       struct page *page, bool check_enospc)
+{
+       struct bch_page_state *s = bch2_page_state_create(page, 0);
        struct quota_res quota_res = { 0 };
+       unsigned i, quota_res_sectors = 0;
        int ret;
 
-       EBUG_ON(!PageLocked(page));
+       if (!s)
+               return -ENOMEM;
 
-       if (s->replicas_reserved < nr_replicas) {
-               ret = bch2_disk_reservation_get(c, &disk_res, PAGE_SECTORS,
-                               nr_replicas - s->replicas_reserved,
-                               !check_enospc ? BCH_DISK_RESERVATION_NOFAIL : 0);
-               if (unlikely(ret))
-                       return ret;
+       for (i = 0; i < ARRAY_SIZE(s->s); i++)
+               quota_res_sectors += s->s[i].state == SECTOR_UNALLOCATED;
 
-               page_state_cmpxchg(s, new, ({
-                       BUG_ON(new.replicas_reserved +
-                              disk_res.nr_replicas != nr_replicas);
-                       new.replicas_reserved += disk_res.nr_replicas;
-               }));
-       }
+       if (!quota_res_sectors)
+               return 0;
 
-       if (!s->quota_reserved &&
-           s->sectors + s->dirty_sectors < PAGE_SECTORS) {
-               ret = bch2_quota_reservation_add(c, inode, &quota_res,
-                                                PAGE_SECTORS,
-                                                check_enospc);
-               if (unlikely(ret))
-                       return ret;
+       ret = bch2_quota_reservation_add(c, inode, &quota_res,
+                                        quota_res_sectors,
+                                        check_enospc);
+       if (unlikely(ret))
+               return ret;
 
-               page_state_cmpxchg(s, new, ({
-                       BUG_ON(new.quota_reserved);
-                       new.quota_reserved = 1;
-               }));
-       }
+       for (i = 0; i < ARRAY_SIZE(s->s); i++)
+               if (s->s[i].state == SECTOR_UNALLOCATED)
+                       s->s[i].state = SECTOR_QUOTA_RESERVED;
 
-       return ret;
+       return 0;
+}
+
+static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
+                                    struct page *page, bool check_enospc)
+{
+       return bch2_get_page_disk_reservation(c, inode, page, check_enospc) ?:
+               bch2_get_page_quota_reservation(c, inode, page, check_enospc);
 }
 
 static void bch2_clear_page_bits(struct page *page)
 {
        struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       struct bch_page_state s;
+       struct bch_page_state *s = bch2_page_state(page);
+       int i, dirty_sectors = 0;
 
-       EBUG_ON(!PageLocked(page));
-
-       if (!PagePrivate(page))
+       if (!s)
                return;
 
-       s.v = xchg(&page_state(page)->v, 0);
-       ClearPagePrivate(page);
+       for (i = 0; i < ARRAY_SIZE(s->s); i++) {
+               if (s->s[i].state == SECTOR_DIRTY) {
+                       dirty_sectors++;
+                       s->s[i].state = SECTOR_UNALLOCATED;
+               }
+       }
 
-       if (s.dirty_sectors)
-               i_sectors_acct(c, inode, NULL, -s.dirty_sectors);
+       if (dirty_sectors)
+               i_sectors_acct(c, inode, NULL, -dirty_sectors);
+       bch2_put_page_reservation(c, inode, page);
 
-       __bch2_put_page_reservation(c, inode, s);
+       bch2_page_state_release(page);
 }
 
-int bch2_set_page_dirty(struct page *page)
+static void __bch2_set_page_dirty(struct page *page)
 {
        struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_page_state *s = bch2_page_state(page);
        struct quota_res quota_res = { 0 };
-       struct bch_page_state old, new;
+       unsigned i, dirty_sectors = 0;
 
-       old = page_state_cmpxchg(page_state(page), new,
-               new.dirty_sectors = PAGE_SECTORS - new.sectors;
-               new.quota_reserved = 0;
-       );
+       BUG_ON(!s);
 
-       quota_res.sectors += old.quota_reserved * PAGE_SECTORS;
+       for (i = 0; i < ARRAY_SIZE(s->s); i++) {
+               if (s->s[i].state == SECTOR_QUOTA_RESERVED)
+                       quota_res.sectors++;
+
+               if (s->s[i].state == SECTOR_UNALLOCATED ||
+                   s->s[i].state == SECTOR_QUOTA_RESERVED) {
+                       s->s[i].state = SECTOR_DIRTY;
+                       dirty_sectors++;
+               }
+       }
 
-       if (old.dirty_sectors != new.dirty_sectors)
-               i_sectors_acct(c, inode, &quota_res,
-                              new.dirty_sectors - old.dirty_sectors);
+       if (dirty_sectors)
+               i_sectors_acct(c, inode, &quota_res, dirty_sectors);
        bch2_quota_reservation_put(c, inode, &quota_res);
+}
 
-       return __set_page_dirty_nobuffers(page);
+static void bch2_set_page_dirty(struct page *page)
+{
+       __bch2_set_page_dirty(page);
+       __set_page_dirty_nobuffers(page);
 }
 
-int bch2_page_mkwrite(struct vm_fault *vmf)
+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
        struct file *file = vmf->vma->vm_file;
@@ -713,7 +793,7 @@ int bch2_page_mkwrite(struct vm_fault *vmf)
        }
 
        if (!PageDirty(page))
-               set_page_dirty(page);
+               bch2_set_page_dirty(page);
        wait_for_stable_page(page);
 out:
        if (current->pagecache_lock != &mapping->add_lock)
@@ -761,11 +841,18 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
                return ret;
 
        if (PagePrivate(page)) {
-               *page_state(newpage) = *page_state(page);
                ClearPagePrivate(page);
+               get_page(newpage);
+               set_page_private(newpage, page_private(page));
+               set_page_private(page, 0);
+               put_page(page);
+               SetPagePrivate(newpage);
        }
 
-       migrate_page_copy(newpage, page);
+       if (mode != MIGRATE_SYNC_NO_COPY)
+               migrate_page_copy(newpage, page);
+       else
+               migrate_page_states(newpage, page);
        return MIGRATEPAGE_SUCCESS;
 }
 #endif
@@ -791,7 +878,7 @@ static int bio_add_page_contig(struct bio *bio, struct page *page)
        else if (!bio_can_add_page_contig(bio, page))
                return -1;
 
-       __bio_add_page(bio, page, PAGE_SIZE, 0);
+       BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
        return 0;
 }
 
@@ -799,10 +886,11 @@ static int bio_add_page_contig(struct bio *bio, struct page *page)
 
 static void bch2_readpages_end_io(struct bio *bio)
 {
+       struct bvec_iter_all iter;
        struct bio_vec *bv;
        int i;
 
-       bio_for_each_segment_all(bv, bio, i) {
+       bio_for_each_segment_all(bv, bio, i, iter) {
                struct page *page = bv->bv_page;
 
                if (!bio->bi_status) {
@@ -848,7 +936,8 @@ static int readpages_iter_init(struct readpages_iter *iter,
        while (!list_empty(pages)) {
                struct page *page = list_last_entry(pages, struct page, lru);
 
-               prefetchw(&page->flags);
+               __bch2_page_state_create(page, __GFP_NOFAIL);
+
                iter->pages[iter->nr_pages++] = page;
                list_del(&page->lru);
        }
@@ -884,6 +973,7 @@ static inline struct page *readpage_iter_next(struct readpages_iter *iter)
                iter->idx++;
                iter->nr_added++;
 
+               __bch2_page_state_release(page);
                put_page(page);
        }
 
@@ -894,7 +984,6 @@ static inline struct page *readpage_iter_next(struct readpages_iter *iter)
 out:
        EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
 
-       page_state_init_for_read(iter->pages[iter->idx]);
        return iter->pages[iter->idx];
 }
 
@@ -904,21 +993,20 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
        struct bio_vec bv;
        unsigned nr_ptrs = bch2_bkey_nr_ptrs_allocated(k);
 
-       bio_for_each_segment(bv, bio, iter) {
-               /* brand new pages, don't need to be locked: */
-
-               struct bch_page_state *s = page_state(bv.bv_page);
-
-               /* sectors in @k from the start of this page: */
-               unsigned k_sectors = k.k->size - (iter.bi_sector - k.k->p.offset);
-
-               unsigned page_sectors = min(bv.bv_len >> 9, k_sectors);
+       BUG_ON(bio->bi_iter.bi_sector   < bkey_start_offset(k.k));
+       BUG_ON(bio_end_sector(bio)      > k.k->p.offset);
 
-               s->nr_replicas = page_sectors == PAGE_SECTORS
-                       ? nr_ptrs : 0;
 
-               BUG_ON(s->sectors + page_sectors > PAGE_SECTORS);
-               s->sectors += page_sectors;
+       bio_for_each_segment(bv, bio, iter) {
+               struct bch_page_state *s = bch2_page_state(bv.bv_page);
+               unsigned i;
+
+               for (i = bv.bv_offset >> 9;
+                    i < (bv.bv_offset + bv.bv_len) >> 9;
+                    i++) {
+                       s->s[i].nr_replicas = nr_ptrs;
+                       s->s[i].state = SECTOR_ALLOCATED;
+               }
        }
 }
 
@@ -949,12 +1037,15 @@ static void readpage_bio_extend(struct readpages_iter *iter,
                        if (!page)
                                break;
 
-                       page_state_init_for_read(page);
+                       if (!__bch2_page_state_create(page, 0)) {
+                               put_page(page);
+                               break;
+                       }
 
                        ret = add_to_page_cache_lru(page, iter->mapping,
                                                    page_offset, GFP_NOFS);
                        if (ret) {
-                               ClearPagePrivate(page);
+                               __bch2_page_state_release(page);
                                put_page(page);
                                break;
                        }
@@ -962,7 +1053,7 @@ static void readpage_bio_extend(struct readpages_iter *iter,
                        put_page(page);
                }
 
-               __bio_add_page(bio, page, PAGE_SIZE, 0);
+               BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
        }
 }
 
@@ -1076,7 +1167,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
                bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0);
                rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT;
                rbio->bio.bi_end_io = bch2_readpages_end_io;
-               __bio_add_page(&rbio->bio, page, PAGE_SIZE, 0);
+               BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
 
                bchfs_read(&trans, iter, rbio, inode->v.i_ino,
                           &readpages_iter);
@@ -1097,7 +1188,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
        struct btree_trans trans;
        struct btree_iter *iter;
 
-       page_state_init_for_read(page);
+       bch2_page_state_create(page, __GFP_NOFAIL);
 
        bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
        bio_add_page_contig(&rbio->bio, page);
@@ -1184,11 +1275,12 @@ static void bch2_writepage_io_done(struct closure *cl)
                                        struct bch_writepage_io, cl);
        struct bch_fs *c = io->op.op.c;
        struct bio *bio = &io->op.op.wbio.bio;
+       struct bvec_iter_all iter;
        struct bio_vec *bvec;
        unsigned i;
 
        if (io->op.op.error) {
-               bio_for_each_segment_all(bvec, bio, i) {
+               bio_for_each_segment_all(bvec, bio, i, iter) {
                        SetPageError(bvec->bv_page);
                        mapping_set_error(bvec->bv_page->mapping, -EIO);
                }
@@ -1215,7 +1307,7 @@ static void bch2_writepage_io_done(struct closure *cl)
                i_sectors_acct(c, io->op.inode, NULL,
                               io->op.sectors_added - (s64) io->new_sectors);
 
-       bio_for_each_segment_all(bvec, bio, i)
+       bio_for_each_segment_all(bvec, bio, i, iter)
                end_page_writeback(bvec->bv_page);
 
        closure_return_with_destructor(&io->cl, bch2_writepage_io_free);
@@ -1266,10 +1358,13 @@ static int __bch2_writepage(struct page *page,
        struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch_writepage_state *w = data;
-       struct bch_page_state new, old;
-       unsigned offset, nr_replicas_this_write;
+       struct bch_page_state *s;
+       unsigned offset, nr_replicas_this_write = U32_MAX;
+       unsigned dirty_sectors = 0, reserved_sectors = 0;
        loff_t i_size = i_size_read(&inode->v);
        pgoff_t end_index = i_size >> PAGE_SHIFT;
+       unsigned i;
+       int ret;
 
        EBUG_ON(!PageUptodate(page));
 
@@ -1293,33 +1388,34 @@ static int __bch2_writepage(struct page *page,
         */
        zero_user_segment(page, offset, PAGE_SIZE);
 do_io:
-       EBUG_ON(!PageLocked(page));
+       s = bch2_page_state_create(page, __GFP_NOFAIL);
 
-       /* Before unlocking the page, transfer reservation to w->io: */
-       old = page_state_cmpxchg(page_state(page), new, {
-               /*
-                * If we didn't get a reservation, we can only write out the
-                * number of (fully allocated) replicas that currently exist,
-                * and only if the entire page has been written:
-                */
+       ret = bch2_get_page_disk_reservation(c, inode, page, true);
+       if (ret) {
+               SetPageError(page);
+               mapping_set_error(page->mapping, ret);
+               unlock_page(page);
+               return 0;
+       }
+
+       for (i = 0; i < PAGE_SECTORS; i++)
                nr_replicas_this_write =
-                       max_t(unsigned,
-                             new.replicas_reserved,
-                             (new.sectors == PAGE_SECTORS
-                              ? new.nr_replicas : 0));
+                       min_t(unsigned, nr_replicas_this_write,
+                             s->s[i].nr_replicas +
+                             s->s[i].replicas_reserved);
 
-               BUG_ON(!nr_replicas_this_write);
+       /* Before unlocking the page, transfer reservation to w->io: */
 
-               new.nr_replicas = w->opts.compression
-                       ? 0
-                       : nr_replicas_this_write;
+       for (i = 0; i < PAGE_SECTORS; i++) {
+               s->s[i].nr_replicas = w->opts.compression
+                       ? 0 : nr_replicas_this_write;
 
-               new.replicas_reserved = 0;
+               reserved_sectors += s->s[i].replicas_reserved;
+               s->s[i].replicas_reserved = 0;
 
-               new.sectors += new.dirty_sectors;
-               BUG_ON(new.sectors != PAGE_SECTORS);
-               new.dirty_sectors = 0;
-       });
+               dirty_sectors += s->s[i].state == SECTOR_DIRTY;
+               s->s[i].state = SECTOR_ALLOCATED;
+       }
 
        BUG_ON(PageWriteback(page));
        set_page_writeback(page);
@@ -1334,12 +1430,12 @@ do_io:
                bch2_writepage_io_alloc(c, w, inode, page,
                                        nr_replicas_this_write);
 
-       w->io->new_sectors += new.sectors - old.sectors;
+       w->io->new_sectors += dirty_sectors;
 
        BUG_ON(inode != w->io->op.inode);
        BUG_ON(bio_add_page_contig(&w->io->op.op.wbio.bio, page));
 
-       w->io->op.op.res.sectors += old.replicas_reserved * PAGE_SECTORS;
+       w->io->op.op.res.sectors += reserved_sectors;
        w->io->op.new_i_size = i_size;
 
        if (wbc->sync_mode == WB_SYNC_ALL)
@@ -1478,7 +1574,7 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
                if (!PageUptodate(page))
                        SetPageUptodate(page);
                if (!PageDirty(page))
-                       set_page_dirty(page);
+                       bch2_set_page_dirty(page);
 
                inode->ei_last_dirtied = (unsigned long) current;
        } else {
@@ -1596,7 +1692,7 @@ out:
                if (!PageUptodate(pages[i]))
                        SetPageUptodate(pages[i]);
                if (!PageDirty(pages[i]))
-                       set_page_dirty(pages[i]);
+                       bch2_set_page_dirty(pages[i]);
                unlock_page(pages[i]);
                put_page(pages[i]);
        }
@@ -1812,6 +1908,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
        struct address_space *mapping = req->ki_filp->f_mapping;
        struct bch_inode_info *inode = dio->iop.inode;
        struct bio *bio = &dio->iop.op.wbio.bio;
+       struct bvec_iter_all iter;
        struct bio_vec *bv;
        loff_t offset;
        bool sync;
@@ -1889,7 +1986,7 @@ err_wait_io:
 
                closure_sync(&dio->cl);
 loop:
-               bio_for_each_segment_all(bv, bio, i)
+               bio_for_each_segment_all(bv, bio, i, iter)
                        put_page(bv->bv_page);
                if (!dio->iter.count || dio->iop.op.error)
                        break;
@@ -2223,7 +2320,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
                zero_user_segment(page, 0, end_offset);
 
        if (!PageDirty(page))
-               set_page_dirty(page);
+               bch2_set_page_dirty(page);
 unlock:
        unlock_page(page);
        put_page(page);
@@ -2677,12 +2774,17 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
 
 static bool page_is_data(struct page *page)
 {
-       EBUG_ON(!PageLocked(page));
+       struct bch_page_state *s = bch2_page_state(page);
+       unsigned i;
+
+       if (!s)
+               return false;
+
+       for (i = 0; i < PAGE_SECTORS; i++)
+               if (s->s[i].state >= SECTOR_DIRTY)
+                       return true;
 
-       /* XXX: should only have to check PageDirty */
-       return PagePrivate(page) &&
-               (page_state(page)->sectors ||
-                page_state(page)->dirty_sectors);
+       return false;
 }
 
 static loff_t bch2_next_pagecache_data(struct inode *vinode,
index 30d1ea9d2b85e1d6f2e907880aee842339c81e2f..2b3ac496dc3e04a396ca50dd92852178a8783789 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_FS_IO_H
 #define _BCACHEFS_FS_IO_H
 
@@ -8,8 +9,6 @@
 
 #include <linux/uio.h>
 
-int bch2_set_page_dirty(struct page *);
-
 int bch2_writepage(struct page *, struct writeback_control *);
 int bch2_readpage(struct file *, struct page *);
 
@@ -33,7 +32,7 @@ long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
 
 loff_t bch2_llseek(struct file *, loff_t, int);
 
-int bch2_page_mkwrite(struct vm_fault *);
+vm_fault_t bch2_page_mkwrite(struct vm_fault *);
 void bch2_invalidatepage(struct page *, unsigned int, unsigned int);
 int bch2_releasepage(struct page *, gfp_t);
 int bch2_migrate_page(struct address_space *, struct page *,
index c707c46bde95f1a0969a2cdff02f8b329a885af2..e80576f5a980f0d3110be2a5fc20715c30fb1a0a 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifndef NO_BCACHEFS_FS
 
 #include "bcachefs.h"
@@ -204,7 +205,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
        if (ret)
                goto err2;
 
-       bch2_lock_inodes(src, dst);
+       bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
 
        if (inode_attr_changing(src, dst, Inode_opt_project)) {
                ret = bch2_fs_quota_transfer(c, dst,
@@ -217,7 +218,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
 
        ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
 err3:
-       bch2_unlock_inodes(src, dst);
+       bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
 
        /* return true if we did work */
        if (ret >= 0)
index c7124ed3b620356e19ee84d7889fdfc30fbce170..f201980ef2c38e2dbbe5faf7138f0d2a2c479f16 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_FS_IOCTL_H
 #define _BCACHEFS_FS_IOCTL_H
 
index a324278b6f43d1df3dcf0d67664c2b52d0458ec3..b1d23e3f7a31a6e7a879a249939d84d417ad912f 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifndef NO_BCACHEFS_FS
 
 #include "bcachefs.h"
@@ -593,7 +594,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
        struct btree_trans trans;
        int ret;
 
-       bch2_lock_inodes(dir, inode);
+       bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
        bch2_trans_init(&trans, c, 4, 1024);
 retry:
        bch2_trans_begin(&trans);
@@ -626,7 +627,7 @@ retry:
                                      ATTR_MTIME);
 err:
        bch2_trans_exit(&trans);
-       bch2_unlock_inodes(dir, inode);
+       bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
 
        return ret;
 }
@@ -803,7 +804,8 @@ static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
 
        bch2_trans_init(&trans, c, 8, 2048);
 
-       bch2_lock_inodes(i.src_dir,
+       bch2_lock_inodes(INODE_UPDATE_LOCK,
+                        i.src_dir,
                         i.dst_dir,
                         i.src_inode,
                         i.dst_inode);
@@ -901,7 +903,8 @@ err:
                                       1 << QTYP_PRJ,
                                       KEY_TYPE_QUOTA_NOCHECK);
 
-       bch2_unlock_inodes(i.src_dir,
+       bch2_unlock_inodes(INODE_UPDATE_LOCK,
+                          i.src_dir,
                           i.dst_dir,
                           i.src_inode,
                           i.dst_inode);
@@ -1263,7 +1266,7 @@ static const struct address_space_operations bch_address_space_operations = {
        .readpage       = bch2_readpage,
        .writepages     = bch2_writepages,
        .readpages      = bch2_readpages,
-       .set_page_dirty = bch2_set_page_dirty,
+       .set_page_dirty = __set_page_dirty_nobuffers,
        .write_begin    = bch2_write_begin,
        .write_end      = bch2_write_end,
        .invalidatepage = bch2_invalidatepage,
@@ -1731,7 +1734,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
 
        sb->s_bdi->congested_fn         = bch2_congested;
        sb->s_bdi->congested_data       = c;
-       sb->s_bdi->ra_pages             = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+       sb->s_bdi->ra_pages             = VM_READAHEAD_PAGES;
 
        for_each_online_member(ca, c, i) {
                struct block_device *bdev = ca->disk_sb.bdev;
index 374f7fd1ffbf980852e62a01daa26b6cbcc7c96d..226223b058a90b7c2016e007a7e51db374411617 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_FS_H
 #define _BCACHEFS_FS_H
 
@@ -35,24 +36,42 @@ static inline int ptrcmp(void *l, void *r)
        return cmp_int(l, r);
 }
 
-#define __bch2_lock_inodes(_lock, ...)                                 \
+enum bch_inode_lock_op {
+       INODE_LOCK              = (1U << 0),
+       INODE_UPDATE_LOCK       = (1U << 1),
+};
+
+#define bch2_lock_inodes(_locks, ...)                                  \
 do {                                                                   \
        struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };             \
        unsigned i;                                                     \
                                                                        \
-       bubble_sort(&a[1], ARRAY_SIZE(a) - 1 , ptrcmp);                 \
+       bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);                  \
                                                                        \
-       for (i = ARRAY_SIZE(a) - 1; a[i]; --i)                          \
+       for (i = 1; i < ARRAY_SIZE(a); i++)                             \
                if (a[i] != a[i - 1]) {                                 \
-                       if (_lock)                                      \
+                       if (_locks & INODE_LOCK)                        \
+                               down_write_nested(&a[i]->v.i_rwsem, i); \
+                       if (_locks & INODE_UPDATE_LOCK)                 \
                                mutex_lock_nested(&a[i]->ei_update_lock, i);\
-                       else                                            \
-                               mutex_unlock(&a[i]->ei_update_lock);    \
                }                                                       \
 } while (0)
 
-#define bch2_lock_inodes(...)  __bch2_lock_inodes(true, __VA_ARGS__)
-#define bch2_unlock_inodes(...)        __bch2_lock_inodes(false, __VA_ARGS__)
+#define bch2_unlock_inodes(_locks, ...)                                        \
+do {                                                                   \
+       struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };             \
+       unsigned i;                                                     \
+                                                                       \
+       bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);                  \
+                                                                       \
+       for (i = 1; i < ARRAY_SIZE(a); i++)                             \
+               if (a[i] != a[i - 1]) {                                 \
+                       if (_locks & INODE_LOCK)                        \
+                               up_write(&a[i]->v.i_rwsem);             \
+                       if (_locks & INODE_UPDATE_LOCK)                 \
+                               mutex_unlock(&a[i]->ei_update_lock);    \
+               }                                                       \
+} while (0)
 
 static inline struct bch_inode_info *file_bch_inode(struct file *file)
 {
index 433552df9049a700ec3ec33bb9efcc51f1288c52..e3738757b6a0cb9f81e87384f13b5a71c46cfdd1 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "btree_update.h"
index 1f03079ce8d93d3d3e59d0ebac79be6c10c748e6..9e4af02bde1e344b285da0c3bfa19934b23bd705 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_FSCK_H
 #define _BCACHEFS_FSCK_H
 
index 59ae6d073a3336b319eb548f7d604e352818ead9..05b7f6594113827b6316075f9a396b2c22706013 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
@@ -245,6 +246,9 @@ const char *bch2_inode_generation_invalid(const struct bch_fs *c,
 void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
                                   struct bkey_s_c k)
 {
+       struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
+
+       pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
 }
 
 void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
index e7e8507d928545f2cfda2a135e0f54566aeba633..af0c355f2f04e4511826c2023117cc0057a8de76 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_INODE_H
 #define _BCACHEFS_INODE_H
 
index dc922a9140b1fbc94f36bdc804e806f4a43cfecf..4d81b6e6e54f79a5510d36941d50ca112810acdd 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Some low level IO code, and hacks for various block layer limitations
  *
@@ -121,23 +122,23 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
 
 void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
 {
+       struct bvec_iter_all iter;
        struct bio_vec *bv;
        unsigned i;
 
-       bio_for_each_segment_all(bv, bio, i)
+       bio_for_each_segment_all(bv, bio, i, iter)
                if (bv->bv_page != ZERO_PAGE(0))
                        mempool_free(bv->bv_page, &c->bio_bounce_pages);
        bio->bi_vcnt = 0;
 }
 
-static void bch2_bio_alloc_page_pool(struct bch_fs *c, struct bio *bio,
-                                   bool *using_mempool)
+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
 {
-       struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt++];
+       struct page *page;
 
        if (likely(!*using_mempool)) {
-               bv->bv_page = alloc_page(GFP_NOIO);
-               if (unlikely(!bv->bv_page)) {
+               page = alloc_page(GFP_NOIO);
+               if (unlikely(!page)) {
                        mutex_lock(&c->bio_bounce_pages_lock);
                        *using_mempool = true;
                        goto pool_alloc;
@@ -145,57 +146,29 @@ static void bch2_bio_alloc_page_pool(struct bch_fs *c, struct bio *bio,
                }
        } else {
 pool_alloc:
-               bv->bv_page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
+               page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
        }
 
-       bv->bv_len = PAGE_SIZE;
-       bv->bv_offset = 0;
+       return page;
 }
 
 void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
-                              size_t bytes)
+                              size_t size)
 {
        bool using_mempool = false;
 
-       BUG_ON(DIV_ROUND_UP(bytes, PAGE_SIZE) > bio->bi_max_vecs);
+       while (size) {
+               struct page *page = __bio_alloc_page_pool(c, &using_mempool);
+               unsigned len = min(PAGE_SIZE, size);
 
-       bio->bi_iter.bi_size = bytes;
-
-       while (bio->bi_vcnt < DIV_ROUND_UP(bytes, PAGE_SIZE))
-               bch2_bio_alloc_page_pool(c, bio, &using_mempool);
+               BUG_ON(!bio_add_page(bio, page, len, 0));
+               size -= len;
+       }
 
        if (using_mempool)
                mutex_unlock(&c->bio_bounce_pages_lock);
 }
 
-void bch2_bio_alloc_more_pages_pool(struct bch_fs *c, struct bio *bio,
-                                   size_t bytes)
-{
-       while (bio->bi_vcnt < DIV_ROUND_UP(bytes, PAGE_SIZE)) {
-               struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
-
-               BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
-
-               bv->bv_page = alloc_page(GFP_NOIO);
-               if (!bv->bv_page) {
-                       /*
-                        * We already allocated from mempool, we can't allocate from it again
-                        * without freeing the pages we already allocated or else we could
-                        * deadlock:
-                        */
-                       bch2_bio_free_pages_pool(c, bio);
-                       bch2_bio_alloc_pages_pool(c, bio, bytes);
-                       return;
-               }
-
-               bv->bv_len = PAGE_SIZE;
-               bv->bv_offset = 0;
-               bio->bi_vcnt++;
-       }
-
-       bio->bi_iter.bi_size = bytes;
-}
-
 /* Writes */
 
 void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
@@ -481,8 +454,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
        wbio->bio.bi_opf        = src->bi_opf;
 
        if (buf) {
-               bio->bi_iter.bi_size = output_available;
-               bch2_bio_map(bio, buf);
+               bch2_bio_map(bio, buf, output_available);
                return bio;
        }
 
@@ -492,31 +464,17 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
         * We can't use mempool for more than c->sb.encoded_extent_max
         * worth of pages, but we'd like to allocate more if we can:
         */
-       while (bio->bi_iter.bi_size < output_available) {
-               unsigned len = min_t(unsigned, PAGE_SIZE,
-                                    output_available - bio->bi_iter.bi_size);
-               struct page *p;
-
-               p = alloc_page(GFP_NOIO);
-               if (!p) {
-                       unsigned pool_max =
-                               min_t(unsigned, output_available,
-                                     c->sb.encoded_extent_max << 9);
-
-                       if (bio_sectors(bio) < pool_max)
-                               bch2_bio_alloc_pages_pool(c, bio, pool_max);
-                       break;
-               }
+       bch2_bio_alloc_pages_pool(c, bio,
+                                 min_t(unsigned, output_available,
+                                       c->sb.encoded_extent_max << 9));
 
-               bio->bi_io_vec[bio->bi_vcnt++] = (struct bio_vec) {
-                       .bv_page        = p,
-                       .bv_len         = len,
-                       .bv_offset      = 0,
-               };
-               bio->bi_iter.bi_size += len;
-       }
+       if (bio->bi_iter.bi_size < output_available)
+               *page_alloc_failed =
+                       bch2_bio_alloc_pages(bio,
+                                            output_available -
+                                            bio->bi_iter.bi_size,
+                                            GFP_NOFS) != 0;
 
-       *page_alloc_failed = bio->bi_vcnt < pages;
        return bio;
 }
 
@@ -820,12 +778,6 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp)
        }
 
        dst->bi_iter.bi_size = total_output;
-
-       /* Free unneeded pages after compressing: */
-       if (to_wbio(dst)->bounce)
-               while (dst->bi_vcnt > DIV_ROUND_UP(dst->bi_iter.bi_size, PAGE_SIZE))
-                       mempool_free(dst->bi_io_vec[--dst->bi_vcnt].bv_page,
-                                    &c->bio_bounce_pages);
 do_write:
        /* might have done a realloc... */
 
@@ -956,7 +908,6 @@ void bch2_write(struct closure *cl)
        BUG_ON(!op->nr_replicas);
        BUG_ON(!op->write_point.v);
        BUG_ON(!bkey_cmp(op->pos, POS_MAX));
-       BUG_ON(bio_sectors(&op->wbio.bio) > U16_MAX);
 
        op->start_time = local_clock();
 
@@ -1003,23 +954,23 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
                                  struct bch_io_opts opts,
                                  unsigned flags)
 {
-       if (!opts.promote_target)
+       if (!bkey_extent_is_data(k.k))
                return false;
 
        if (!(flags & BCH_READ_MAY_PROMOTE))
                return false;
 
-       if (percpu_ref_is_dying(&c->writes))
-               return false;
-
-       if (!bkey_extent_is_data(k.k))
+       if (!opts.promote_target)
                return false;
 
-       if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), opts.promote_target))
+       if (bch2_extent_has_target(c, bkey_s_c_to_extent(k),
+                                  opts.promote_target))
                return false;
 
-       if (bch2_target_congested(c, opts.promote_target))
+       if (bch2_target_congested(c, opts.promote_target)) {
+               /* XXX trace this */
                return false;
+       }
 
        if (rhashtable_lookup_fast(&c->promote_table, &pos,
                                   bch_promote_params))
@@ -1080,22 +1031,18 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
                                          struct bpos pos,
                                          struct extent_ptr_decoded *pick,
                                          struct bch_io_opts opts,
-                                         unsigned rbio_sectors,
+                                         unsigned sectors,
                                          struct bch_read_bio **rbio)
 {
        struct promote_op *op = NULL;
        struct bio *bio;
-       unsigned rbio_pages = DIV_ROUND_UP(rbio_sectors, PAGE_SECTORS);
-       /* data might have to be decompressed in the write path: */
-       unsigned wbio_pages = DIV_ROUND_UP(pick->crc.uncompressed_size,
-                                          PAGE_SECTORS);
+       unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
        int ret;
 
        if (!percpu_ref_tryget(&c->writes))
                return NULL;
 
-       op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * wbio_pages,
-                    GFP_NOIO);
+       op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
        if (!op)
                goto err;
 
@@ -1103,37 +1050,32 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
        op->pos = pos;
 
        /*
-        * promotes require bouncing, but if the extent isn't
-        * checksummed/compressed it might be too big for the mempool:
+        * We don't use the mempool here because extents that aren't
+        * checksummed or compressed can be too big for the mempool:
         */
-       if (rbio_sectors > c->sb.encoded_extent_max) {
-               *rbio = kzalloc(sizeof(struct bch_read_bio) +
-                               sizeof(struct bio_vec) * rbio_pages,
-                               GFP_NOIO);
-               if (!*rbio)
-                       goto err;
+       *rbio = kzalloc(sizeof(struct bch_read_bio) +
+                       sizeof(struct bio_vec) * pages,
+                       GFP_NOIO);
+       if (!*rbio)
+               goto err;
 
-               rbio_init(&(*rbio)->bio, opts);
-               bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs,
-                        rbio_pages);
+       rbio_init(&(*rbio)->bio, opts);
+       bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, pages);
 
-               (*rbio)->bio.bi_iter.bi_size = rbio_sectors << 9;
-               bch2_bio_map(&(*rbio)->bio, NULL);
+       if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
+                                GFP_NOIO))
+               goto err;
 
-               if (bch2_bio_alloc_pages(&(*rbio)->bio, GFP_NOIO))
-                       goto err;
-
-               (*rbio)->bounce         = true;
-               (*rbio)->split          = true;
-               (*rbio)->kmalloc        = true;
-       }
+       (*rbio)->bounce         = true;
+       (*rbio)->split          = true;
+       (*rbio)->kmalloc        = true;
 
        if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
                                          bch_promote_params))
                goto err;
 
        bio = &op->write.op.wbio.bio;
-       bio_init(bio, bio->bi_inline_vecs, wbio_pages);
+       bio_init(bio, bio->bi_inline_vecs, pages);
 
        ret = bch2_migrate_write_init(c, &op->write,
                        writepoint_hashed((unsigned long) current),
@@ -1167,8 +1109,9 @@ static inline struct promote_op *promote_alloc(struct bch_fs *c,
                                               bool *read_full)
 {
        bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
+       /* data might have to be decompressed in the write path: */
        unsigned sectors = promote_full
-               ? pick->crc.compressed_size
+               ? max(pick->crc.compressed_size, pick->crc.live_size)
                : bvec_iter_sectors(iter);
        struct bpos pos = promote_full
                ? bkey_start_pos(k.k)
@@ -1703,7 +1646,16 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
        }
 
        if (rbio) {
-               /* promote already allocated bounce rbio */
+               /*
+                * promote already allocated bounce rbio:
+                * promote needs to allocate a bio big enough for uncompressing
+                * data in the write path, but we're not going to use it all
+                * here:
+                */
+               BUG_ON(rbio->bio.bi_iter.bi_size <
+                      pick.crc.compressed_size << 9);
+               rbio->bio.bi_iter.bi_size =
+                       pick.crc.compressed_size << 9;
        } else if (bounce) {
                unsigned sectors = pick.crc.compressed_size;
 
@@ -1767,9 +1719,9 @@ noclone:
 
        bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        if (likely(!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT)))) {
                bio_inc_remaining(&orig->bio);
index 3ca7797454224c13a6e8fdf3bbe48ec87de9660a..1e8470afbeca60492e6880ae83015aaf64736639 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_IO_H
 #define _BCACHEFS_IO_H
 
@@ -12,7 +13,6 @@
 
 void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
 void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
-void bch2_bio_alloc_more_pages_pool(struct bch_fs *, struct bio *, size_t);
 
 void bch2_latency_acct(struct bch_dev *, u64, int);
 
index 8ec846cc996f1d09aa2386f159e84f4d2832edd7..04f6d9a7c9a2af5c4fd71aed870f71062ab21a4e 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_IO_TYPES_H
 #define _BCACHEFS_IO_TYPES_H
 
index 0a174dffe76e92bdc146fb3aff08d4ea7ad9fcb5..5c3e146e3942ac673735c2fb1bcde80782b6e3ca 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * bcachefs journalling code, for btree insertions
  *
@@ -820,10 +821,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                }
 
                if (c) {
-                       percpu_down_read_preempt_disable(&c->mark_lock);
+                       percpu_down_read(&c->mark_lock);
                        spin_lock(&c->journal.lock);
-               } else {
-                       preempt_disable();
                }
 
                pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
@@ -852,9 +851,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
 
                if (c) {
                        spin_unlock(&c->journal.lock);
-                       percpu_up_read_preempt_enable(&c->mark_lock);
-               } else {
-                       preempt_enable();
+                       percpu_up_read(&c->mark_lock);
                }
 
                if (!new_fs)
index 2e7bc8e4553c5459108f7e13ebbd4e97f15a5a18..ec5ba2b9ef42c30019ee9bdd73bd43463cc15a67 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_JOURNAL_H
 #define _BCACHEFS_JOURNAL_H
 
index 56950049e8929c7e9ff66b01e776323f1b61e69d..387377dadab53c729b75b8df729c9ec5e53443fc 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "alloc_foreground.h"
 #include "buckets.h"
@@ -494,9 +495,8 @@ reread:
                                                    sectors_read << 9));
                        bio_set_dev(bio, ca->disk_sb.bdev);
                        bio->bi_iter.bi_sector  = offset;
-                       bio->bi_iter.bi_size    = sectors_read << 9;
                        bio_set_op_attrs(bio, REQ_OP_READ, 0);
-                       bch2_bio_map(bio, buf->data);
+                       bch2_bio_map(bio, buf->data, sectors_read << 9);
 
                        ret = submit_bio_wait(bio);
                        bio_put(bio);
@@ -1086,12 +1086,11 @@ void bch2_journal_write(struct closure *cl)
                bio_reset(bio);
                bio_set_dev(bio, ca->disk_sb.bdev);
                bio->bi_iter.bi_sector  = ptr->offset;
-               bio->bi_iter.bi_size    = sectors << 9;
                bio->bi_end_io          = journal_write_endio;
                bio->bi_private         = ca;
                bio_set_op_attrs(bio, REQ_OP_WRITE,
                                 REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
-               bch2_bio_map(bio, jset);
+               bch2_bio_map(bio, jset, sectors << 9);
 
                trace_journal_write(bio);
                closure_bio_submit(bio, cl);
index 1dc193c390a98fdb3a1e582235c2d987980284f8..72e575f360afca614bb41178fc86f6558dd3e6a4 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_JOURNAL_IO_H
 #define _BCACHEFS_JOURNAL_IO_H
 
index 5cee6872c1f1ef3b5a629cb0840d14a7447f12e7..695b2c8ba03b9aefbbd9a0c83ef3a8601fc4910b 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "journal.h"
index 7ecfc81435971a3f538b8236f4c48434722e5cf3..9bf982a177971fbf917dc26ac1f60d300d9c0ac5 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_JOURNAL_RECLAIM_H
 #define _BCACHEFS_JOURNAL_RECLAIM_H
 
index 231f5da22f45cee83ab98a239539dc932eb5f593..787d9f7638d05f82b6a73f3d034f053f21752563 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "btree_iter.h"
index b1ad591dda0918711859392852a3c6c2c2a16bc8..03f4b97247fd63be8226751296d411dacebdfcf9 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
 #define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
 
index dec9dd2a561a3b20e553a78e642fb8b6d52e42ad..8eea12a03c06e424918e592c85ee02c5b8769a75 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_JOURNAL_TYPES_H
 #define _BCACHEFS_JOURNAL_TYPES_H
 
index bc724e771aeae4d0c1290781454bf04d0febaee5..5da54ced9cadb736a1edf22e77ddf81b9292f6fd 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "keylist.h"
index 3106759e35f7e96c6391cff63d7ea7a16d9d9381..a7ff86b08abcd008f96b5f5b976812bab3fe20e2 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_KEYLIST_H
 #define _BCACHEFS_KEYLIST_H
 
index 48a17d7af6d8f030915d3c100c574603d1b15174..4b3ff7d8a875608d0587c0b811c4d12d2dc7a164 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_KEYLIST_TYPES_H
 #define _BCACHEFS_KEYLIST_TYPES_H
 
diff --git a/libbcachefs/lz4.h b/libbcachefs/lz4.h
deleted file mode 100644 (file)
index 22e7859..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __BCH_LZ4_H__
-#define __BCH_LZ4_H__
-
-int bch2_lz4_decompress(const unsigned char *src, size_t *src_len,
-                       unsigned char *dest, size_t actual_dest_len);
-
-#endif
diff --git a/libbcachefs/lz4_decompress.c b/libbcachefs/lz4_decompress.c
deleted file mode 100644 (file)
index 9e809f9..0000000
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
- */
-
-#ifndef STATIC
-#include <linux/module.h>
-#include <linux/kernel.h>
-#endif
-
-#include "lz4.h"
-
-/*
- * Detects 64 bits mode
- */
-#if defined(CONFIG_64BIT)
-#define LZ4_ARCH64 1
-#else
-#define LZ4_ARCH64 0
-#endif
-
-#include <asm/unaligned.h>
-#include <linux/log2.h>
-#include <linux/string.h>
-
-#define A32(_p) get_unaligned((u32 *) (_p))
-#define A16(_p) get_unaligned((u16 *) (_p))
-
-#define GET_LE16_ADVANCE(_src)                         \
-({                                                     \
-       u16 _r = get_unaligned_le16(_src);              \
-       (_src) += 2;                                    \
-       _r;                                             \
-})
-
-#define PUT_LE16_ADVANCE(_dst, _v)                     \
-do {                                                   \
-       put_unaligned_le16((_v), (_dst));               \
-       (_dst) += 2;                                    \
-} while (0)
-
-#define LENGTH_LONG            15
-#define COPYLENGTH             8
-#define ML_BITS                        4
-#define ML_MASK                        ((1U << ML_BITS) - 1)
-#define RUN_BITS               (8 - ML_BITS)
-#define RUN_MASK               ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE           14
-#define MINMATCH               4
-#define SKIPSTRENGTH           6
-#define LASTLITERALS           5
-#define MFLIMIT                        (COPYLENGTH + MINMATCH)
-#define MINLENGTH              (MFLIMIT + 1)
-#define MAXD_LOG               16
-#define MAXD                   (1 << MAXD_LOG)
-#define MAXD_MASK              (u32)(MAXD - 1)
-#define MAX_DISTANCE           (MAXD - 1)
-#define HASH_LOG               (MAXD_LOG - 1)
-#define HASHTABLESIZE          (1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS                256
-#define OPTIMAL_ML             (int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT           ((1<<16) + (MFLIMIT - 1))
-
-#define __HASH_VALUE(p, bits)                          \
-       (((A32(p)) * 2654435761U) >> (32 - (bits)))
-
-#define HASH_VALUE(p)          __HASH_VALUE(p, HASH_LOG)
-
-#define MEMCPY_ADVANCE(_dst, _src, length)             \
-do {                                                   \
-       typeof(length) _length = (length);              \
-       memcpy(_dst, _src, _length);                    \
-       _src += _length;                                \
-       _dst += _length;                                \
-} while (0)
-
-#define MEMCPY_ADVANCE_BYTES(_dst, _src, _length)      \
-do {                                                   \
-       const u8 *_end = (_src) + (_length);            \
-       while ((_src) < _end)                           \
-               *_dst++ = *_src++;                      \
-} while (0)
-
-#define STEPSIZE               __SIZEOF_LONG__
-
-#define LZ4_COPYPACKET(_src, _dst)                     \
-do {                                                   \
-       MEMCPY_ADVANCE(_dst, _src, STEPSIZE);           \
-       MEMCPY_ADVANCE(_dst, _src, COPYLENGTH - STEPSIZE);\
-} while (0)
-
-/*
- * Equivalent to MEMCPY_ADVANCE - except may overrun @_dst and @_src by
- * COPYLENGTH:
- *
- * Note: src and dst may overlap (with src < dst) - we must do the copy in
- * STEPSIZE chunks for correctness
- *
- * Note also: length may be negative - we must not call memcpy if length is
- * negative, but still adjust dst and src by length
- */
-#define MEMCPY_ADVANCE_CHUNKED(_dst, _src, _length)    \
-do {                                                   \
-       u8 *_end = (_dst) + (_length);                  \
-       while ((_dst) < _end)                           \
-               LZ4_COPYPACKET(_src, _dst);             \
-       _src -= (_dst) - _end;                          \
-       _dst = _end;                                    \
-} while (0)
-
-#define MEMCPY_ADVANCE_CHUNKED_NOFIXUP(_dst, _src, _end)\
-do {                                                   \
-       while ((_dst) < (_end))                         \
-               LZ4_COPYPACKET((_src), (_dst));         \
-} while (0)
-
-static const int dec32table[8] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[8] = {0, 0, 0, -1, 0, 1, 2, 3};
-#else
-static const int dec64table[8] = {0, 0, 0, 0, 0, 0, 0, 0};
-#endif
-
-static inline size_t get_length(const u8 **ip, size_t length)
-{
-       if (length == LENGTH_LONG) {
-               size_t len;
-
-               do {
-                       length += (len = *(*ip)++);
-               } while (len == 255);
-       }
-
-       return length;
-}
-
-static int lz4_uncompress(const u8 *source, u8 *dest, int osize)
-{
-       const u8 *ip = source;
-       const u8 *ref;
-       u8 *op = dest;
-       u8 * const oend = op + osize;
-       u8 *cpy;
-       unsigned token, offset;
-       ssize_t length;
-
-       while (1) {
-               /* get runlength */
-               token = *ip++;
-               length = get_length(&ip, token >> ML_BITS);
-
-               /* copy literals */
-               if (unlikely(op + length > oend - COPYLENGTH)) {
-                       /*
-                        * Error: not enough place for another match
-                        * (min 4) + 5 literals
-                        */
-                       if (op + length != oend)
-                               goto _output_error;
-
-                       MEMCPY_ADVANCE(op, ip, length);
-                       break; /* EOF */
-               }
-               MEMCPY_ADVANCE_CHUNKED(op, ip, length);
-
-               /* get match offset */
-               offset = GET_LE16_ADVANCE(ip);
-               ref = op - offset;
-
-               /* Error: offset create reference outside destination buffer */
-               if (unlikely(ref < (u8 *const) dest))
-                       goto _output_error;
-
-               /* get match length */
-               length = get_length(&ip, token & ML_MASK);
-               length += MINMATCH;
-
-               /* copy first STEPSIZE bytes of match: */
-               if (unlikely(offset < STEPSIZE)) {
-                       MEMCPY_ADVANCE_BYTES(op, ref, 4);
-                       ref -= dec32table[offset];
-
-                       memcpy(op, ref, 4);
-                       op += STEPSIZE - 4;
-                       ref -= dec64table[offset];
-               } else {
-                       MEMCPY_ADVANCE(op, ref, STEPSIZE);
-               }
-               length -= STEPSIZE;
-               /*
-                * Note - length could have been < STEPSIZE; that's ok, length
-                * will now be negative and we'll just end up rewinding op:
-                */
-
-               /* copy rest of match: */
-               cpy = op + length;
-               if (cpy > oend - COPYLENGTH) {
-                       /* Error: request to write beyond destination buffer */
-                       if (cpy              > oend ||
-                           ref + COPYLENGTH > oend)
-                               goto _output_error;
-#if !LZ4_ARCH64
-                       if (op  + COPYLENGTH > oend)
-                               goto _output_error;
-#endif
-                       MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH);
-                       /* op could be > cpy here */
-                       while (op < cpy)
-                               *op++ = *ref++;
-                       op = cpy;
-                       /*
-                        * Check EOF (should never happen, since last 5 bytes
-                        * are supposed to be literals)
-                        */
-                       if (op == oend)
-                               goto _output_error;
-               } else {
-                       MEMCPY_ADVANCE_CHUNKED(op, ref, length);
-               }
-       }
-       /* end of decoding */
-       return ip - source;
-
-       /* write overflow error detected */
-_output_error:
-       return -1;
-}
-
-int bch2_lz4_decompress(const unsigned char *src, size_t *src_len,
-                       unsigned char *dest, size_t actual_dest_len)
-{
-       int ret = -1;
-       int input_len = 0;
-
-       input_len = lz4_uncompress(src, dest, actual_dest_len);
-       if (input_len < 0)
-               goto exit_0;
-       *src_len = input_len;
-
-       return 0;
-exit_0:
-       return ret;
-}
index 74e17fa92c782f1531a6726809cc89479485037c..ad41f5e36a7c3e5a4b53626d603c8315f6490a07 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Code for moving data off a device.
  */
index de2faab24e1159eddcb8172be21b4c570bd0980e..027efaa0d575f8a0e8db09137c02bdb4f2893010 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_MIGRATE_H
 #define _BCACHEFS_MIGRATE_H
 
index 97890918b829aef3a206f5e848d615cb57db2a7b..e7e58afed5ddf8e8a4519fe247bc6eb81cdc0943 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
@@ -300,12 +301,13 @@ static void move_free(struct closure *cl)
 {
        struct moving_io *io = container_of(cl, struct moving_io, cl);
        struct moving_context *ctxt = io->write.ctxt;
+       struct bvec_iter_all iter;
        struct bio_vec *bv;
        int i;
 
        bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
 
-       bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i)
+       bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i, iter)
                if (bv->bv_page)
                        __free_page(bv->bv_page);
 
@@ -428,10 +430,9 @@ static int bch2_move_extent(struct bch_fs *c,
        bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
        bio_set_prio(&io->write.op.wbio.bio,
                     IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-       io->write.op.wbio.bio.bi_iter.bi_size = sectors << 9;
 
-       bch2_bio_map(&io->write.op.wbio.bio, NULL);
-       if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL))
+       if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
+                                GFP_KERNEL))
                goto err_free;
 
        io->rbio.opts = io_opts;
index b3bee07ec94360c9abb764f0052f307805d7d20c..71b3d2b2ddb6ddbcc1ef744a5e00676578563336 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_MOVE_H
 #define _BCACHEFS_MOVE_H
 
index 7703ce43dce932dd4bd9f426d13cdc6328b1df45..6788170d3f95de1df173019681a4e146b104d3e1 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_MOVE_TYPES_H
 #define _BCACHEFS_MOVE_TYPES_H
 
index fe66efb418066a421f628f99ce1292c388233222..b13af5662f220e73d7f5f354721dbcb5d4e1a4eb 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Moving/copying garbage collector
  *
index c46fa1f15f5d9acfc27e149caa592796af998093..dcd479632cf114150782f685bd9188f157d81789 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_MOVINGGC_H
 #define _BCACHEFS_MOVINGGC_H
 
index 7bf0f84cb69284bc0aaf38513800035819cb5110..13a9a2fcd575853e07bcde406a364f92aa741495 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/kernel.h>
 
index 390bf92154313f299b60b5ba5a47581973634f90..c6ec9f7effe574d25a6c85a572b470839a59d4a8 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_OPTS_H
 #define _BCACHEFS_OPTS_H
 
index 8a42660cff08a8e54400075cc6927f0a65ead9b4..f0da0fac09bfc25c28a692d6e3aa3e6a145ee716 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "btree_update.h"
 #include "inode.h"
index 34b4a0f0ae77ca94001402b5b0d4d6bd996a28c2..51e4f9713ef0bd7904b7aea90ee72dcafcaf5ad9 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_QUOTA_H
 #define _BCACHEFS_QUOTA_H
 
index e978dc54b6aa7c6fb4dc16a6cf53d3c4e0bdee7d..6a136083d3899d29ac96222eb46a26dd113060b2 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_QUOTA_TYPES_H
 #define _BCACHEFS_QUOTA_TYPES_H
 
index d7698451f1ae9d23def458b425efdd03fe77f3ac..6bdd68177ac94896d5935ddcffd57e414acf68a9 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
index 2e6aa67724710fa603c8ee81177e347f04f6f1de..99e2a1fb60844cd301f4a78ec4abd8a2b6dff6a7 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_REBALANCE_H
 #define _BCACHEFS_REBALANCE_H
 
index aaf5b9ca133c9f98c3925cf85ce1f62564a77a79..192c6be20cedd841311518fbee9028f07f09b23b 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_REBALANCE_TYPES_H
 #define _BCACHEFS_REBALANCE_TYPES_H
 
index 535e2b6a15fbbbc7727077078aa999aeb6eaa4a3..e0df2c0a4fdf54b2747b570833c6b476e325de45 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "alloc_background.h"
@@ -378,7 +379,15 @@ static int journal_replay_entry_early(struct bch_fs *c,
 
        switch (entry->type) {
        case BCH_JSET_ENTRY_btree_root: {
-               struct btree_root *r = &c->btree_roots[entry->btree_id];
+               struct btree_root *r;
+
+               if (entry->btree_id >= BTREE_ID_NR) {
+                       bch_err(c, "filesystem has unknown btree type %u",
+                               entry->btree_id);
+                       return -EINVAL;
+               }
+
+               r = &c->btree_roots[entry->btree_id];
 
                if (entry->u64s) {
                        r->level = entry->level;
@@ -720,10 +729,12 @@ int bch2_fs_recovery(struct bch_fs *c)
 
        ret = bch2_blacklist_table_initialize(c);
 
-       ret = verify_journal_entries_not_blacklisted_or_missing(c,
-                                               &journal_entries);
-       if (ret)
-               goto err;
+       if (!list_empty(&journal_entries)) {
+               ret = verify_journal_entries_not_blacklisted_or_missing(c,
+                                                       &journal_entries);
+               if (ret)
+                       goto err;
+       }
 
        ret = bch2_fs_journal_start(&c->journal, journal_seq,
                                    &journal_entries);
index c61b55f5406ca90104fdc590f3ff18304f07fcc4..a69260d6165a148b917a4ef2bae4abc78814a104 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_RECOVERY_H
 #define _BCACHEFS_RECOVERY_H
 
index 319c7dc4ef9a46337ffc8f16a08147086cd7e894..4818453c015a405a09d55924605b91fdaba1a95e 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "buckets.h"
@@ -228,9 +229,9 @@ bool bch2_replicas_marked(struct bch_fs *c,
 {
        bool marked;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        return marked;
 }
@@ -446,9 +447,9 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
 {
        bool marked;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
        marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        return marked;
 }
@@ -971,7 +972,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
 
        mi = bch2_sb_get_members(c->disk_sb.sb);
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
 
        for_each_cpu_replicas_entry(&c->replicas, e) {
                if (e->data_type >= ARRAY_SIZE(ret.replicas))
@@ -998,7 +999,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
                            nr_offline);
        }
 
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
                if (ret.replicas[i].redundancy == INT_MAX)
@@ -1049,14 +1050,14 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
        struct bch_replicas_entry *e;
        unsigned i, ret = 0;
 
-       percpu_down_read_preempt_disable(&c->mark_lock);
+       percpu_down_read(&c->mark_lock);
 
        for_each_cpu_replicas_entry(&c->replicas, e)
                for (i = 0; i < e->nr_devs; i++)
                        if (e->devs[i] == ca->dev_idx)
                                ret |= 1 << e->data_type;
 
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        return ret;
 }
index bca82e04e28dfc1fa66857fb970b37e22895414c..0d6e191260217d125a76f36d5b1e70f1932fb6ae 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_REPLICAS_H
 #define _BCACHEFS_REPLICAS_H
 
index 3a6c9c8217f03719561f236ad3fe3ec3e66095f5..c062edb3fbc24e6fd5889d1ac138b3e2c10ab9db 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
 /*     $OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */
 
 /*-
index 7a4b2241f1e10c6677173f04428464dd0a71d034..3dfaf34a43b2848ccbfc2e5ba9842376bce9fb70 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
 /* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */
 /*-
  * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
index fcbe42bc53c0ea1d7899f59852a31223b8f7c7d1..091bf7a8957755c092636177ae13bce9a5af6998 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_STR_HASH_H
 #define _BCACHEFS_STR_HASH_H
 
@@ -71,7 +72,7 @@ static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
                ctx->crc32c = crc32c(~0, &info->crc_key, sizeof(info->crc_key));
                break;
        case BCH_STR_HASH_CRC64:
-               ctx->crc64 = bch2_crc64_update(~0, &info->crc_key, sizeof(info->crc_key));
+               ctx->crc64 = crc64_be(~0, &info->crc_key, sizeof(info->crc_key));
                break;
        case BCH_STR_HASH_SIPHASH:
                SipHash24_Init(&ctx->siphash, &info->siphash_key);
@@ -90,7 +91,7 @@ static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
                ctx->crc32c = crc32c(ctx->crc32c, data, len);
                break;
        case BCH_STR_HASH_CRC64:
-               ctx->crc64 = bch2_crc64_update(ctx->crc64, data, len);
+               ctx->crc64 = crc64_be(ctx->crc64, data, len);
                break;
        case BCH_STR_HASH_SIPHASH:
                SipHash24_Update(&ctx->siphash, data, len);
index 61eefd2dd1d235cdcfeda7add3ea1b8851795103..3043def884ab43ea01f555ee35aded06f3f9d986 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "buckets.h"
@@ -469,9 +470,8 @@ reread:
        bio_reset(sb->bio);
        bio_set_dev(sb->bio, sb->bdev);
        sb->bio->bi_iter.bi_sector = offset;
-       sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
        bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-       bch2_bio_map(sb->bio, sb->sb);
+       bch2_bio_map(sb->bio, sb->sb, PAGE_SIZE << sb->page_order);
 
        if (submit_bio_wait(sb->bio))
                return "IO error";
@@ -573,13 +573,12 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
        bio_reset(sb->bio);
        bio_set_dev(sb->bio, sb->bdev);
        sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
-       sb->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout);
        bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
        /*
         * use sb buffer to read layout, since sb buffer is page aligned but
         * layout won't be:
         */
-       bch2_bio_map(sb->bio, sb->sb);
+       bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
 
        err = "IO error";
        if (submit_bio_wait(sb->bio))
@@ -649,11 +648,10 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
        bio_reset(bio);
        bio_set_dev(bio, ca->disk_sb.bdev);
        bio->bi_iter.bi_sector  = le64_to_cpu(sb->layout.sb_offset[0]);
-       bio->bi_iter.bi_size    = PAGE_SIZE;
        bio->bi_end_io          = write_super_endio;
        bio->bi_private         = ca;
        bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-       bch2_bio_map(bio, ca->sb_read_scratch);
+       bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
 
        this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_SB],
                     bio_sectors(bio));
@@ -676,13 +674,12 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
        bio_reset(bio);
        bio_set_dev(bio, ca->disk_sb.bdev);
        bio->bi_iter.bi_sector  = le64_to_cpu(sb->offset);
-       bio->bi_iter.bi_size    =
-               roundup((size_t) vstruct_bytes(sb),
-                       bdev_logical_block_size(ca->disk_sb.bdev));
        bio->bi_end_io          = write_super_endio;
        bio->bi_private         = ca;
        bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
-       bch2_bio_map(bio, sb);
+       bch2_bio_map(bio, sb,
+                    roundup((size_t) vstruct_bytes(sb),
+                            bdev_logical_block_size(ca->disk_sb.bdev)));
 
        this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_SB],
                     bio_sectors(bio));
index cf25b44a077eb3e3788689d12fc8ff60cefef263..f5450e596c6269e262ef1e244ed820433e1a81f8 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_SUPER_IO_H
 #define _BCACHEFS_SUPER_IO_H
 
index aa3adbf2babf11a95f62acbe3ad969b8c2358ea9..b1b274a8fc103be39d073938ccf3376296c6bb06 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * bcachefs setup/teardown code, and some metadata io - read a superblock and
  * figure out what to do with it.
@@ -397,6 +398,8 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
 
        schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
 
+       schedule_work(&c->ec_stripe_delete_work);
+
        return 0;
 }
 
index 4598de9b9c5e001a6fcc4f2a2150d15871b9d288..41992e891391257e411e8e032d080caac48bdfaa 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_SUPER_H
 #define _BCACHEFS_SUPER_H
 
index 6277be42c914a9379e0c8bfacec490afe526cf7a..20406ebd6f5bad7cd89252a293366262155e9cef 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_SUPER_TYPES_H
 #define _BCACHEFS_SUPER_TYPES_H
 
index 675706761b12672eb8f2ec8aa788ec45a8a9d5db..27646c435e304fb1df1231c5966c6fb09ea01f52 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * bcache sysfs interfaces
  *
@@ -241,7 +242,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
 
        bch2_fs_usage_to_text(&out, c, fs_usage);
 
-       percpu_up_read_preempt_enable(&c->mark_lock);
+       percpu_up_read(&c->mark_lock);
 
        kfree(fs_usage);
 
index 1ba759fd6e8cfbcfe042b1d643aafb56feebfba5..525fd05d91f7d003519e17a82e876e83157db9b3 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_SYSFS_H_
 #define _BCACHEFS_SYSFS_H_
 
index 96bca8009da70f7f51e361f29dee8c023dd56169..fe0b987902fb83621b01459b89f4a15ff304390a 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifdef CONFIG_BCACHEFS_TESTS
 
 #include "bcachefs.h"
index 3f1b8d1fbedf337878efdab42d082bcf2b32bfae..551d0764225ecf47eb79658b2fd2bf5eaa5c8d6d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_TEST_H
 #define _BCACHEFS_TEST_H
 
index 13f0fc24a3f744b174a2b7984409b9404ed34194..59e8dfa3d24520a2f57e5744efd3c4c456dab3a8 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "alloc_types.h"
 #include "buckets.h"
index 94dd651949caeb1982f7abd101ee323dddf4748e..2cc433ec0e3a5a83aba673570828034a0fe7b3f9 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * random utiility code, for bcache but in theory not specific to bcache
  *
@@ -503,48 +504,32 @@ size_t bch2_pd_controller_print_debug(struct bch_pd_controller *pd, char *buf)
 
 /* misc: */
 
-void bch2_bio_map(struct bio *bio, void *base)
+void bch2_bio_map(struct bio *bio, void *base, size_t size)
 {
-       size_t size = bio->bi_iter.bi_size;
-       struct bio_vec *bv = bio->bi_io_vec;
-
-       BUG_ON(!bio->bi_iter.bi_size);
-       BUG_ON(bio->bi_vcnt);
-       BUG_ON(!bio->bi_max_vecs);
-
-       bv->bv_offset = base ? offset_in_page(base) : 0;
-       goto start;
-
-       for (; size; bio->bi_vcnt++, bv++) {
-               BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
-
-               bv->bv_offset   = 0;
-start:         bv->bv_len      = min_t(size_t, PAGE_SIZE - bv->bv_offset,
-                                       size);
-               if (base) {
-                       bv->bv_page = is_vmalloc_addr(base)
+       while (size) {
+               struct page *page = is_vmalloc_addr(base)
                                ? vmalloc_to_page(base)
                                : virt_to_page(base);
+               unsigned offset = offset_in_page(base);
+               unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
 
-                       base += bv->bv_len;
-               }
-
-               size -= bv->bv_len;
+               BUG_ON(!bio_add_page(bio, page, len, offset));
+               size -= len;
+               base += len;
        }
 }
 
-int bch2_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
 {
-       int i;
-       struct bio_vec *bv;
+       while (size) {
+               struct page *page = alloc_page(gfp_mask);
+               unsigned len = min(PAGE_SIZE, size);
 
-       bio_for_each_segment_all(bv, bio, i) {
-               bv->bv_page = alloc_page(gfp_mask);
-               if (!bv->bv_page) {
-                       while (--bv >= bio->bi_io_vec)
-                               __free_page(bv->bv_page);
+               if (!page)
                        return -ENOMEM;
-               }
+
+               BUG_ON(!bio_add_page(bio, page, len, 0));
+               size -= len;
        }
 
        return 0;
index f7a35880b0cf9b6c180a4bab7877a9b27b16f06e..fa3a991453e91109a8a4865073b557a9a7bbee41 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_UTIL_H
 #define _BCACHEFS_UTIL_H
 
@@ -510,8 +511,8 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
        return x;
 }
 
-void bch2_bio_map(struct bio *bio, void *base);
-int bch2_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask);
+void bch2_bio_map(struct bio *bio, void *base, size_t);
+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
 
 static inline sector_t bdev_sectors(struct block_device *bdev)
 {
@@ -627,35 +628,6 @@ static inline void memmove_u64s(void *dst, const void *src,
                __memmove_u64s_up(dst, src, u64s);
 }
 
-static inline struct bio_vec next_contig_bvec(struct bio *bio,
-                                             struct bvec_iter *iter)
-{
-       struct bio_vec bv = bio_iter_iovec(bio, *iter);
-
-       bio_advance_iter(bio, iter, bv.bv_len);
-#ifndef CONFIG_HIGHMEM
-       while (iter->bi_size) {
-               struct bio_vec next = bio_iter_iovec(bio, *iter);
-
-               if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len !=
-                   page_address(next.bv_page) + next.bv_offset)
-                       break;
-
-               bv.bv_len += next.bv_len;
-               bio_advance_iter(bio, iter, next.bv_len);
-       }
-#endif
-       return bv;
-}
-
-#define __bio_for_each_contig_segment(bv, bio, iter, start)            \
-       for (iter = (start);                                            \
-            (iter).bi_size &&                                          \
-               ((bv = next_contig_bvec((bio), &(iter))), 1);)
-
-#define bio_for_each_contig_segment(bv, bio, iter)                     \
-       __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
-
 void sort_cmp_size(void *base, size_t num, size_t size,
          int (*cmp_func)(const void *, const void *, size_t),
          void (*swap_func)(void *, void *, size_t));
index 795664428876e6d6ab3d1d0c3a601c40cfc179c7..c099cdc0605f960c7d2ebae01304d0722194ee05 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _VSTRUCTS_H
 #define _VSTRUCTS_H
 
index 41a9753e919c65b9b79053ce12b7145cfd241c92..9b8f6f1f9a77f5cd088fc9b061924979499281f0 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
@@ -385,6 +386,9 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
                bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode));
        const struct bch_option *opt;
        int id, inode_opt_id;
+       char buf[512];
+       struct printbuf out = PBUF(buf);
+       unsigned val_len;
        u64 v;
 
        id = bch2_opt_lookup(name);
@@ -405,23 +409,16 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
                return -ENODATA;
 
        v = bch2_opt_get_by_id(&opts, id);
+       bch2_opt_to_text(&out, c, opt, v, 0);
 
-       if (!buffer) {
-               char buf[512];
-               struct printbuf out = PBUF(buf);
+       val_len = out.pos - buf;
 
-               bch2_opt_to_text(&out, c, opt, v, 0);
+       if (buffer && val_len > size)
+               return -ERANGE;
 
-               return out.pos - buf;
-       } else {
-               struct printbuf out = _PBUF(buffer, size);
-
-               bch2_opt_to_text(&out, c, opt, v, 0);
-
-               return printbuf_remaining(&out)
-                       ? (void *) out.pos - buffer
-                       : -ERANGE;
-       }
+       if (buffer)
+               memcpy(buffer, buf, val_len);
+       return val_len;
 }
 
 static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
index e9b27767caea9915ca710746df6af27e5396399e..4151065ab853546c3f071a831cfba10d9af03010 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_XATTR_H
 #define _BCACHEFS_XATTR_H
 
index c4cdceaa8c948e2262396e63c1686872bc6a7736..d9b860a026a80d68bea50622ba9e8d4efb1bfd40 100644 (file)
@@ -165,10 +165,11 @@ struct bio *bio_split(struct bio *bio, int sectors,
 
 void bio_free_pages(struct bio *bio)
 {
+       struct bvec_iter_all iter;
        struct bio_vec *bvec;
        int i;
 
-       bio_for_each_segment_all(bvec, bio, i)
+       bio_for_each_segment_all(bvec, bio, i, iter)
                __free_page(bvec->bv_page);
 }
 
@@ -199,6 +200,23 @@ void bio_put(struct bio *bio)
        }
 }
 
+int bio_add_page(struct bio *bio, struct page *page,
+                unsigned int len, unsigned int off)
+{
+       struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
+
+       WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+       WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs);
+
+       bv->bv_page = page;
+       bv->bv_offset = off;
+       bv->bv_len = len;
+
+       bio->bi_iter.bi_size += len;
+       bio->bi_vcnt++;
+       return len;
+}
+
 static inline bool bio_remaining_done(struct bio *bio)
 {
        /*