]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to ef60854e99 bcachefs: More allocator startup improvements
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 14 Jan 2019 01:36:38 +0000 (20:36 -0500)
committerKent Overstreet <kent.overstreet@gmail.com>
Mon, 14 Jan 2019 01:53:25 +0000 (20:53 -0500)
38 files changed:
.bcachefs_revision
cmd_migrate.c
include/crypto/sha.h [new file with mode: 0644]
include/crypto/skcipher.h
include/linux/compiler.h
include/linux/crc64.h [new file with mode: 0644]
include/linux/six.h [moved from libbcachefs/six.h with 98% similarity]
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/bcachefs.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_io.h
libbcachefs/btree_iter.h
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/buckets_types.h
libbcachefs/chardev.c
libbcachefs/checksum.c
libbcachefs/checksum.h
libbcachefs/ec_types.h
libbcachefs/fs-io.c
libbcachefs/fs-ioctl.c
libbcachefs/replicas.c
libbcachefs/replicas.h
libbcachefs/str_hash.h
libbcachefs/super.c
libbcachefs/sysfs.c
libbcachefs/util.c
libbcachefs/util.h
linux/crc64.c [new file with mode: 0644]
linux/crc64table.h [new file with mode: 0644]
linux/six.c [moved from libbcachefs/six.c with 94% similarity]

index 66897e5c0e044c7dd5d2e1fce19a3f4bfd84e459..3bcc585adc16a0ed4b9c5f84424d6abb9854b590 100644 (file)
@@ -1 +1 @@
-2724e115d243043ee62d78883bec4035651d74ab
+ef60854e9912d24c0ba83e0760552c98257d2b07
index 2d82d157e7f8add7f61dfbf4536a6dac748dcd53..e9594ab79bb869c2b16b986d7bf0368396b098d6 100644 (file)
@@ -614,7 +614,8 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
        darray_free(s.extents);
        genradix_free(&s.hardlinks);
 
-       bch2_alloc_write(c);
+       bool wrote;
+       bch2_alloc_write(c, false, &wrote);
 }
 
 static void find_superblock_space(ranges extents, struct dev_opts *dev)
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
new file mode 100644 (file)
index 0000000..8a46202
--- /dev/null
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values for SHA algorithms
+ */
+
+#ifndef _CRYPTO_SHA_H
+#define _CRYPTO_SHA_H
+
+#include <linux/types.h>
+
+#define SHA1_DIGEST_SIZE        20
+#define SHA1_BLOCK_SIZE         64
+
+#define SHA224_DIGEST_SIZE     28
+#define SHA224_BLOCK_SIZE      64
+
+#define SHA256_DIGEST_SIZE      32
+#define SHA256_BLOCK_SIZE       64
+
+#define SHA384_DIGEST_SIZE      48
+#define SHA384_BLOCK_SIZE       128
+
+#define SHA512_DIGEST_SIZE      64
+#define SHA512_BLOCK_SIZE       128
+
+#define SHA1_H0                0x67452301UL
+#define SHA1_H1                0xefcdab89UL
+#define SHA1_H2                0x98badcfeUL
+#define SHA1_H3                0x10325476UL
+#define SHA1_H4                0xc3d2e1f0UL
+
+#define SHA224_H0      0xc1059ed8UL
+#define SHA224_H1      0x367cd507UL
+#define SHA224_H2      0x3070dd17UL
+#define SHA224_H3      0xf70e5939UL
+#define SHA224_H4      0xffc00b31UL
+#define SHA224_H5      0x68581511UL
+#define SHA224_H6      0x64f98fa7UL
+#define SHA224_H7      0xbefa4fa4UL
+
+#define SHA256_H0      0x6a09e667UL
+#define SHA256_H1      0xbb67ae85UL
+#define SHA256_H2      0x3c6ef372UL
+#define SHA256_H3      0xa54ff53aUL
+#define SHA256_H4      0x510e527fUL
+#define SHA256_H5      0x9b05688cUL
+#define SHA256_H6      0x1f83d9abUL
+#define SHA256_H7      0x5be0cd19UL
+
+#define SHA384_H0      0xcbbb9d5dc1059ed8ULL
+#define SHA384_H1      0x629a292a367cd507ULL
+#define SHA384_H2      0x9159015a3070dd17ULL
+#define SHA384_H3      0x152fecd8f70e5939ULL
+#define SHA384_H4      0x67332667ffc00b31ULL
+#define SHA384_H5      0x8eb44a8768581511ULL
+#define SHA384_H6      0xdb0c2e0d64f98fa7ULL
+#define SHA384_H7      0x47b5481dbefa4fa4ULL
+
+#define SHA512_H0      0x6a09e667f3bcc908ULL
+#define SHA512_H1      0xbb67ae8584caa73bULL
+#define SHA512_H2      0x3c6ef372fe94f82bULL
+#define SHA512_H3      0xa54ff53a5f1d36f1ULL
+#define SHA512_H4      0x510e527fade682d1ULL
+#define SHA512_H5      0x9b05688c2b3e6c1fULL
+#define SHA512_H6      0x1f83d9abfb41bd6bULL
+#define SHA512_H7      0x5be0cd19137e2179ULL
+
+extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE];
+
+extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE];
+
+extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE];
+
+extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE];
+
+extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE];
+
+struct sha1_state {
+       u32 state[SHA1_DIGEST_SIZE / 4];
+       u64 count;
+       u8 buffer[SHA1_BLOCK_SIZE];
+};
+
+struct sha256_state {
+       u32 state[SHA256_DIGEST_SIZE / 4];
+       u64 count;
+       u8 buf[SHA256_BLOCK_SIZE];
+};
+
+struct sha512_state {
+       u64 state[SHA512_DIGEST_SIZE / 8];
+       u64 count[2];
+       u8 buf[SHA512_BLOCK_SIZE];
+};
+
+struct shash_desc;
+
+extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
+                             unsigned int len);
+
+extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
+                            unsigned int len, u8 *hash);
+
+extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
+                             unsigned int len);
+
+extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
+                              unsigned int len, u8 *hash);
+
+extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
+                             unsigned int len);
+
+extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
+                              unsigned int len, u8 *hash);
+#endif
index c9e887c985ef8bf2983327e497075eebf959a910..5989855d1894bf28efa883a07d38233061af9399 100644 (file)
@@ -36,14 +36,29 @@ struct crypto_skcipher {
        struct crypto_tfm       base;
 };
 
+struct crypto_sync_skcipher {
+       struct crypto_skcipher base;
+};
+
 struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
                                              u32 type, u32 mask);
 
+static inline struct crypto_sync_skcipher *
+crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask)
+{
+       return (void *) crypto_alloc_skcipher(alg_name, type, mask);
+}
+
 static inline void crypto_free_skcipher(struct crypto_skcipher *tfm)
 {
        kfree(tfm);
 }
 
+static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm)
+{
+       crypto_free_skcipher(&tfm->base);
+}
+
 struct skcipher_request {
        unsigned                cryptlen;
        u8                      *iv;
@@ -54,9 +69,14 @@ struct skcipher_request {
        struct crypto_tfm       *tfm;
 };
 
-#define SKCIPHER_REQUEST_ON_STACK(name, tfm)                   \
-       struct skcipher_request __##name##_desc;                \
-       struct skcipher_request *name = &__##name##_desc
+#define MAX_SYNC_SKCIPHER_REQSIZE      384
+#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \
+       char __##name##_desc[sizeof(struct skcipher_request) + \
+                            MAX_SYNC_SKCIPHER_REQSIZE + \
+                            (!(sizeof((struct crypto_sync_skcipher *)1 == \
+                                      (typeof(tfm))1))) \
+                           ] CRYPTO_MINALIGN_ATTR; \
+       struct skcipher_request *name = (void *)__##name##_desc
 
 static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
                                         const u8 *key, unsigned int keylen)
@@ -86,6 +106,12 @@ static inline void skcipher_request_set_tfm(struct skcipher_request *req,
        req->tfm = &tfm->base;
 }
 
+static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req,
+                                           struct crypto_sync_skcipher *tfm)
+{
+       skcipher_request_set_tfm(req, &tfm->base);
+}
+
 static inline void skcipher_request_set_crypt(
        struct skcipher_request *req,
        struct scatterlist *src, struct scatterlist *dst,
index 915a6f88643fc6571656eff48d681747ca31514a..2bfbfadb34b250d9f0840aded40060896bb72b37 100644 (file)
@@ -34,6 +34,7 @@
 #define __maybe_unused         __attribute__((unused))
 #define __always_unused                __attribute__((unused))
 #define __packed               __attribute__((__packed__))
+#define __flatten              __attribute__((flatten))
 #define __force
 #define __nocast
 #define __iomem
diff --git a/include/linux/crc64.h b/include/linux/crc64.h
new file mode 100644 (file)
index 0000000..c756e65
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
similarity index 98%
rename from libbcachefs/six.h
rename to include/linux/six.h
index 999c49db23715b616e679c37c5cea84e90ba9b7a..40e213f2fb40192911a9cd90dca76748b2be7288 100644 (file)
@@ -1,5 +1,7 @@
-#ifndef _BCACHEFS_SIX_H
-#define _BCACHEFS_SIX_H
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _LINUX_SIX_H
+#define _LINUX_SIX_H
 
 /*
  * Shared/intent/exclusive locks: sleepable read/write locks, much like rw
@@ -61,8 +63,6 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 
-#include "util.h"
-
 #define SIX_LOCK_SEPARATE_LOCKFNS
 
 union six_lock_state {
@@ -227,4 +227,4 @@ bool six_trylock_convert(struct six_lock *, enum six_lock_type,
 
 void six_lock_increment(struct six_lock *, enum six_lock_type);
 
-#endif /* _BCACHEFS_SIX_H */
+#endif /* _LINUX_SIX_H */
index 955caa217f69da31d60e829f11eb979fa70a79a0..6de6e26384b21e3135e658540c7b9651da155144 100644 (file)
@@ -272,12 +272,19 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
                                  size_t b, struct btree_iter *iter,
                                  u64 *journal_seq, unsigned flags)
 {
+#if 0
        __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
+#else
+       /* hack: */
+       __BKEY_PADDED(k, 8) alloc_key;
+#endif
        struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
        struct bucket *g;
        struct bucket_mark m;
        int ret;
 
+       BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
+
        a->k.p = POS(ca->dev_idx, b);
 
        percpu_down_read_preempt_disable(&c->mark_lock);
@@ -339,12 +346,14 @@ err:
        return ret;
 }
 
-int bch2_alloc_write(struct bch_fs *c)
+int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
 {
        struct bch_dev *ca;
        unsigned i;
        int ret = 0;
 
+       *wrote = false;
+
        for_each_rw_member(ca, c, i) {
                struct btree_iter iter;
                struct bucket_array *buckets;
@@ -362,9 +371,14 @@ int bch2_alloc_write(struct bch_fs *c)
                        if (!buckets->b[b].mark.dirty)
                                continue;
 
-                       ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, 0);
+                       ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL,
+                                                    nowait
+                                                    ? BTREE_INSERT_NOWAIT
+                                                    : 0);
                        if (ret)
                                break;
+
+                       *wrote = true;
                }
                up_read(&ca->bucket_lock);
                bch2_btree_iter_unlock(&iter);
@@ -1262,20 +1276,23 @@ static void flush_held_btree_writes(struct bch_fs *c)
        struct bucket_table *tbl;
        struct rhash_head *pos;
        struct btree *b;
-       bool flush_updates;
-       size_t i, nr_pending_updates;
+       bool nodes_blocked;
+       size_t i;
+       struct closure cl;
+
+       closure_init_stack(&cl);
 
        clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
 again:
        pr_debug("flushing dirty btree nodes");
        cond_resched();
+       closure_wait(&c->btree_interior_update_wait, &cl);
 
-       flush_updates = false;
-       nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
+       nodes_blocked = false;
 
        rcu_read_lock();
        for_each_cached_btree(b, c, tbl, i, pos)
-               if (btree_node_dirty(b) && (!b->written || b->level)) {
+               if (btree_node_need_write(b)) {
                        if (btree_node_may_write(b)) {
                                rcu_read_unlock();
                                btree_node_lock_type(c, b, SIX_LOCK_read);
@@ -1283,7 +1300,7 @@ again:
                                six_unlock_read(&b->lock);
                                goto again;
                        } else {
-                               flush_updates = true;
+                               nodes_blocked = true;
                        }
                }
        rcu_read_unlock();
@@ -1291,17 +1308,16 @@ again:
        if (c->btree_roots_dirty)
                bch2_journal_meta(&c->journal);
 
-       /*
-        * This is ugly, but it's needed to flush btree node writes
-        * without spinning...
-        */
-       if (flush_updates) {
-               closure_wait_event(&c->btree_interior_update_wait,
-                                  bch2_btree_interior_updates_nr_pending(c) <
-                                  nr_pending_updates);
+       if (nodes_blocked) {
+               closure_sync(&cl);
                goto again;
        }
 
+       closure_wake_up(&c->btree_interior_update_wait);
+       closure_sync(&cl);
+
+       closure_wait_event(&c->btree_interior_update_wait,
+                          !bch2_btree_interior_updates_nr_pending(c));
 }
 
 static void allocator_start_issue_discards(struct bch_fs *c)
@@ -1323,13 +1339,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
        unsigned dev_iter;
        u64 journal_seq = 0;
        long bu;
-       bool invalidating_data = false;
        int ret = 0;
 
-       if (test_alloc_startup(c)) {
-               invalidating_data = true;
+       if (test_alloc_startup(c))
                goto not_enough;
-       }
 
        /* Scan for buckets that are already invalidated: */
        for_each_rw_member(ca, c, dev_iter) {
@@ -1376,21 +1389,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
 not_enough:
        pr_debug("not enough empty buckets; scanning for reclaimable buckets");
 
-       for_each_rw_member(ca, c, dev_iter) {
-               find_reclaimable_buckets(c, ca);
-
-               while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
-                      (bu = next_alloc_bucket(ca)) >= 0) {
-                       invalidating_data |=
-                               bch2_invalidate_one_bucket(c, ca, bu, &journal_seq);
-
-                       fifo_push(&ca->free[RESERVE_BTREE], bu);
-                       bucket_set_dirty(ca, bu);
-               }
-       }
-
-       pr_debug("done scanning for reclaimable buckets");
-
        /*
         * We're moving buckets to freelists _before_ they've been marked as
         * invalidated on disk - we have to so that we can allocate new btree
@@ -1400,38 +1398,59 @@ not_enough:
         * have cached data in them, which is live until they're marked as
         * invalidated on disk:
         */
-       if (invalidating_data) {
-               pr_debug("invalidating existing data");
-               set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
-       } else {
-               pr_debug("issuing discards");
-               allocator_start_issue_discards(c);
-       }
+       set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
 
-       /*
-        * XXX: it's possible for this to deadlock waiting on journal reclaim,
-        * since we're holding btree writes. What then?
-        */
-       ret = bch2_alloc_write(c);
-       if (ret)
-               return ret;
+       while (1) {
+               bool wrote = false;
 
-       if (invalidating_data) {
-               pr_debug("flushing journal");
+               for_each_rw_member(ca, c, dev_iter) {
+                       find_reclaimable_buckets(c, ca);
 
-               ret = bch2_journal_flush_seq(&c->journal, journal_seq);
-               if (ret)
-                       return ret;
+                       while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
+                              (bu = next_alloc_bucket(ca)) >= 0) {
+                               bch2_invalidate_one_bucket(c, ca, bu,
+                                                          &journal_seq);
+
+                               fifo_push(&ca->free[RESERVE_BTREE], bu);
+                               bucket_set_dirty(ca, bu);
+                       }
+               }
+
+               pr_debug("done scanning for reclaimable buckets");
+
+               /*
+                * XXX: it's possible for this to deadlock waiting on journal reclaim,
+                * since we're holding btree writes. What then?
+                */
+               ret = bch2_alloc_write(c, true, &wrote);
 
-               pr_debug("issuing discards");
-               allocator_start_issue_discards(c);
+               /*
+                * If bch2_alloc_write() did anything, it may have used some
+                * buckets, and we need the RESERVE_BTREE freelist full - so we
+                * need to loop and scan again.
+                * And if it errored, it may have been because there weren't
+                * enough buckets, so just scan and loop again as long as it
+                * made some progress:
+                */
+               if (!wrote && ret)
+                       return ret;
+               if (!wrote && !ret)
+                       break;
        }
 
+       pr_debug("flushing journal");
+
+       ret = bch2_journal_flush(&c->journal);
+       if (ret)
+               return ret;
+
+       pr_debug("issuing discards");
+       allocator_start_issue_discards(c);
+
        set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
 
        /* now flush dirty btree nodes: */
-       if (invalidating_data)
-               flush_held_btree_writes(c);
+       flush_held_btree_writes(c);
 
        return 0;
 }
@@ -1440,6 +1459,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
 {
        struct bch_dev *ca;
        unsigned i;
+       bool wrote;
        int ret;
 
        down_read(&c->gc_lock);
@@ -1457,7 +1477,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
                }
        }
 
-       return bch2_alloc_write(c);
+       return bch2_alloc_write(c, false, &wrote);
 }
 
 void bch2_fs_allocator_background_init(struct bch_fs *c)
index b382c8b68ce5036ae0cf44f94aeeb518b832850b..a0c08e347ad7d7469b4a577c59b82f281d0366cc 100644 (file)
@@ -54,7 +54,7 @@ void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_stop(struct bch_dev *);
 int bch2_dev_allocator_start(struct bch_dev *);
 
-int bch2_alloc_write(struct bch_fs *);
+int bch2_alloc_write(struct bch_fs *, bool, bool *);
 int bch2_fs_allocator_start(struct bch_fs *);
 void bch2_fs_allocator_background_init(struct bch_fs *);
 
index 596d3bc7d99ed2f85ecc3db02d1a3f10e27ddbde..14e6453bc3dab0455d0f60ccc184795789c43960 100644 (file)
@@ -288,6 +288,7 @@ out:
        ob->valid       = true;
        ob->sectors_free = ca->mi.bucket_size;
        ob->ptr         = (struct bch_extent_ptr) {
+               .type   = 1 << BCH_EXTENT_ENTRY_ptr,
                .gen    = buckets->b[bucket].mark.gen,
                .offset = bucket_to_sector(ca, bucket),
                .dev    = ca->dev_idx,
index 3056f3bf4a4bf9fcd7355ec5d00c28fe4a9ce8f5..449eb0c1ce6116e682c38a0a64c8da622fd2e6d7 100644 (file)
@@ -618,10 +618,11 @@ struct bch_fs {
 
        struct bch_fs_pcpu __percpu     *pcpu;
 
-       struct bch_fs_usage __percpu    *usage[2];
-
        struct percpu_rw_semaphore      mark_lock;
 
+       struct bch_fs_usage __percpu    *usage[2];
+       struct bch_fs_usage __percpu    *usage_scratch;
+
        /*
         * When we invalidate buckets, we use both the priority and the amount
         * of good data to determine which buckets to reuse first - to weight
@@ -685,7 +686,7 @@ struct bch_fs {
        ZSTD_parameters         zstd_params;
 
        struct crypto_shash     *sha256;
-       struct crypto_skcipher  *chacha20;
+       struct crypto_sync_skcipher *chacha20;
        struct crypto_shash     *poly1305;
 
        atomic64_t              key_version;
index d99441a1edf4f1e283280e455638991b17948523..f77dc20d9fea5a71fb4f2a99fa2e7430a7479113 100644 (file)
@@ -169,6 +169,10 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
        if (!btree_node_may_write(b))
                goto out_unlock;
 
+       if (btree_node_dirty(b) &&
+           test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
+               goto out_unlock;
+
        if (btree_node_dirty(b) ||
            btree_node_write_in_flight(b) ||
            btree_node_read_in_flight(b)) {
index 9f5a79a8c240a6dae42548c21776df204741c5a4..23013fbb6fb2b17270e332357096fe9f92215566 100644 (file)
@@ -475,29 +475,43 @@ static void bch2_gc_free(struct bch_fs *c)
                ca->usage[1] = NULL;
        }
 
+       percpu_down_write(&c->mark_lock);
+
        free_percpu(c->usage[1]);
        c->usage[1] = NULL;
-}
 
-static void fs_usage_reset(struct bch_fs_usage *fs_usage)
-{
-       memset(&fs_usage->s.gc_start[0], 0,
-              sizeof(*fs_usage) - offsetof(typeof(*fs_usage), s.gc_start));
+       percpu_up_write(&c->mark_lock);
 }
 
-static void fs_usage_cpy(struct bch_fs_usage *dst,
-                        struct bch_fs_usage *src)
+/*
+ * Accumulate percpu counters onto one cpu's copy - only valid when access
+ * against any percpu counter is guarded against
+ */
+static u64 *acc_percpu_u64s(u64 __percpu *p, unsigned nr)
 {
-       memcpy(&dst->s.gc_start[0],
-              &src->s.gc_start[0],
-              sizeof(*dst) - offsetof(typeof(*dst), s.gc_start));
+       u64 *ret;
+       int cpu;
+
+       preempt_disable();
+       ret = this_cpu_ptr(p);
+       preempt_enable();
+
+       for_each_possible_cpu(cpu) {
+               u64 *i = per_cpu_ptr(p, cpu);
+
+               if (i != ret) {
+                       acc_u64s(ret, i, nr);
+                       memset(i, 0, nr * sizeof(u64));
+               }
+       }
+
+       return ret;
 }
 
 static void bch2_gc_done_nocheck(struct bch_fs *c)
 {
        struct bch_dev *ca;
        unsigned i;
-       int cpu;
 
        {
                struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
@@ -527,42 +541,39 @@ static void bch2_gc_done_nocheck(struct bch_fs *c)
        };
 
        for_each_member_device(ca, c, i) {
-               struct bch_dev_usage *p;
+               unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
+               struct bch_dev_usage *dst = (void *)
+                       acc_percpu_u64s((void *) ca->usage[0], nr);
+               struct bch_dev_usage *src = (void *)
+                       acc_percpu_u64s((void *) ca->usage[1], nr);
 
-               for_each_possible_cpu(cpu) {
-                       p = per_cpu_ptr(ca->usage[0], cpu);
-                       memset(p, 0, sizeof(*p));
-               }
-
-               preempt_disable();
-               *this_cpu_ptr(ca->usage[0]) = __bch2_dev_usage_read(ca, 1);
-               preempt_enable();
+               *dst = *src;
        }
 
        {
-               struct bch_fs_usage src = __bch2_fs_usage_read(c, 1);
-
-               for_each_possible_cpu(cpu)
-                       fs_usage_reset(per_cpu_ptr(c->usage[0], cpu));
-
-               preempt_disable();
-               fs_usage_cpy(this_cpu_ptr(c->usage[0]), &src);
-               preempt_enable();
+               unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) +
+                       c->replicas.nr;
+               struct bch_fs_usage *dst = (void *)
+                       acc_percpu_u64s((void *) c->usage[0], nr);
+               struct bch_fs_usage *src = (void *)
+                       acc_percpu_u64s((void *) c->usage[1], nr);
+
+               memcpy(&dst->s.gc_start[0],
+                      &src->s.gc_start[0],
+                      nr * sizeof(u64) - offsetof(typeof(*dst), s.gc_start));
        }
-
 }
 
 static void bch2_gc_done(struct bch_fs *c, bool initial)
 {
        struct bch_dev *ca;
        unsigned i;
-       int cpu;
 
 #define copy_field(_f, _msg, ...)                                      \
-       if (dst._f != src._f) {                                         \
-               bch_err(c, _msg ": got %llu, should be %llu, fixing"\
-                       , ##__VA_ARGS__, dst._f, src._f);               \
-               dst._f = src._f;                                        \
+       if (dst->_f != src->_f) {                                       \
+               bch_err(c, _msg ": got %llu, should be %llu, fixing"    \
+                       , ##__VA_ARGS__, dst->_f, src->_f);             \
+               dst->_f = src->_f;                                      \
        }
 #define copy_stripe_field(_f, _msg, ...)                               \
        if (dst->_f != src->_f) {                                       \
@@ -643,9 +654,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
        };
 
        for_each_member_device(ca, c, i) {
-               struct bch_dev_usage dst = __bch2_dev_usage_read(ca, 0);
-               struct bch_dev_usage src = __bch2_dev_usage_read(ca, 1);
-               struct bch_dev_usage *p;
+               unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
+               struct bch_dev_usage *dst = (void *)
+                       acc_percpu_u64s((void *) ca->usage[0], nr);
+               struct bch_dev_usage *src = (void *)
+                       acc_percpu_u64s((void *) ca->usage[1], nr);
                unsigned b;
 
                for (b = 0; b < BCH_DATA_NR; b++)
@@ -659,22 +672,15 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
                                       "sectors[%s]", bch2_data_types[b]);
                copy_dev_field(sectors_fragmented,
                               "sectors_fragmented");
-
-               for_each_possible_cpu(cpu) {
-                       p = per_cpu_ptr(ca->usage[0], cpu);
-                       memset(p, 0, sizeof(*p));
-               }
-
-               preempt_disable();
-               p = this_cpu_ptr(ca->usage[0]);
-               *p = dst;
-               preempt_enable();
        }
 
        {
-               struct bch_fs_usage dst = __bch2_fs_usage_read(c, 0);
-               struct bch_fs_usage src = __bch2_fs_usage_read(c, 1);
-               unsigned r, b;
+               unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) +
+                       c->replicas.nr;
+               struct bch_fs_usage *dst = (void *)
+                       acc_percpu_u64s((void *) c->usage[0], nr);
+               struct bch_fs_usage *src = (void *)
+                       acc_percpu_u64s((void *) c->usage[1], nr);
 
                copy_fs_field(s.hidden,         "hidden");
                copy_fs_field(s.data,           "data");
@@ -682,27 +688,16 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
                copy_fs_field(s.reserved,       "reserved");
                copy_fs_field(s.nr_inodes,      "nr_inodes");
 
-               for (r = 0; r < BCH_REPLICAS_MAX; r++) {
-                       for (b = 0; b < BCH_DATA_NR; b++)
-                               copy_fs_field(replicas[r].data[b],
-                                             "replicas[%i].data[%s]",
-                                             r, bch2_data_types[b]);
-                       copy_fs_field(replicas[r].ec_data,
-                                     "replicas[%i].ec_data", r);
-                       copy_fs_field(replicas[r].persistent_reserved,
-                                     "replicas[%i].persistent_reserved", r);
-               }
-
-               for (b = 0; b < BCH_DATA_NR; b++)
-                       copy_fs_field(buckets[b],
-                                     "buckets[%s]", bch2_data_types[b]);
+               for (i = 0; i < BCH_REPLICAS_MAX; i++)
+                       copy_fs_field(persistent_reserved[i],
+                                     "persistent_reserved[%i]", i);
 
-               for_each_possible_cpu(cpu)
-                       fs_usage_reset(per_cpu_ptr(c->usage[0], cpu));
-
-               preempt_disable();
-               fs_usage_cpy(this_cpu_ptr(c->usage[0]), &dst);
-               preempt_enable();
+               for (i = 0; i < c->replicas.nr; i++) {
+                       /*
+                        * XXX: print out replicas entry
+                        */
+                       copy_fs_field(data[i], "data[%i]", i);
+               }
        }
 out:
        percpu_up_write(&c->mark_lock);
@@ -725,9 +720,15 @@ static int bch2_gc_start(struct bch_fs *c)
         */
        gc_pos_set(c, gc_phase(GC_PHASE_START));
 
+       percpu_down_write(&c->mark_lock);
        BUG_ON(c->usage[1]);
 
-       c->usage[1] = alloc_percpu(struct bch_fs_usage);
+       c->usage[1] = __alloc_percpu_gfp(sizeof(struct bch_fs_usage) +
+                                        sizeof(u64) * c->replicas.nr,
+                                        sizeof(u64),
+                                        GFP_KERNEL);
+       percpu_up_write(&c->mark_lock);
+
        if (!c->usage[1])
                return -ENOMEM;
 
index 231ace4fe017a897cc66b9dd3b3109118584057d..25aa22a017acd42691d9c5b610df0420b54625f0 100644 (file)
@@ -1330,8 +1330,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
                if (!(old & (1 << BTREE_NODE_dirty)))
                        return;
 
-               if (b->written &&
-                   !btree_node_may_write(b))
+               if (!btree_node_may_write(b))
                        return;
 
                if (old & (1 << BTREE_NODE_write_in_flight)) {
@@ -1347,7 +1346,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
        } while (cmpxchg_acquire(&b->flags, old, new) != old);
 
        BUG_ON(btree_node_fake(b));
-       BUG_ON(!list_empty(&b->write_blocked));
        BUG_ON((b->will_make_reachable != 0) != !b->written);
 
        BUG_ON(b->written >= c->opts.btree_node_size);
@@ -1684,15 +1682,13 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
                unsigned long flags = READ_ONCE(b->flags);
                unsigned idx = (flags & (1 << BTREE_NODE_write_idx)) != 0;
 
-               if (//!(flags & (1 << BTREE_NODE_dirty)) &&
-                   !b->writes[0].wait.list.first &&
-                   !b->writes[1].wait.list.first &&
-                   !(b->will_make_reachable & 1))
+               if (!(flags & (1 << BTREE_NODE_dirty)))
                        continue;
 
-               pr_buf(&out, "%p d %u l %u w %u b %u r %u:%lu c %u p %u\n",
+               pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu c %u p %u\n",
                       b,
                       (flags & (1 << BTREE_NODE_dirty)) != 0,
+                      (flags & (1 << BTREE_NODE_need_write)) != 0,
                       b->level,
                       b->written,
                       !list_empty_careful(&b->write_blocked),
index 4be3221a931d10e9ce611e922deab3a51f164a91..4de1fb736ae59980964f390f3a553e60bb4d177e 100644 (file)
@@ -2,6 +2,7 @@
 #define _BCACHEFS_BTREE_IO_H
 
 #include "bset.h"
+#include "btree_locking.h"
 #include "extents.h"
 #include "io_types.h"
 
@@ -47,7 +48,7 @@ static inline void btree_node_wait_on_io(struct btree *b)
 static inline bool btree_node_may_write(struct btree *b)
 {
        return list_empty_careful(&b->write_blocked) &&
-               !b->will_make_reachable;
+               (!b->written || !b->will_make_reachable);
 }
 
 enum compact_mode {
@@ -99,42 +100,36 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
 void bch2_btree_node_write(struct bch_fs *, struct btree *,
                          enum six_lock_type);
 
-/*
- * btree_node_dirty() can be cleared with only a read lock,
- * and for bch2_btree_node_write_cond() we want to set need_write iff it's
- * still dirty:
- */
-static inline void set_btree_node_need_write_if_dirty(struct btree *b)
+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b)
 {
-       unsigned long old, new, v = READ_ONCE(b->flags);
-
-       do {
-               old = new = v;
-
-               if (!(old & (1 << BTREE_NODE_dirty)))
-                       return;
-
-               new |= (1 << BTREE_NODE_need_write);
-       } while ((v = cmpxchg(&b->flags, old, new)) != old);
+       while (b->written &&
+              btree_node_need_write(b) &&
+              btree_node_may_write(b)) {
+               if (!btree_node_write_in_flight(b)) {
+                       bch2_btree_node_write(c, b, SIX_LOCK_read);
+                       break;
+               }
+
+               six_unlock_read(&b->lock);
+               btree_node_wait_on_io(b);
+               btree_node_lock_type(c, b, SIX_LOCK_read);
+       }
 }
 
 #define bch2_btree_node_write_cond(_c, _b, cond)                       \
 do {                                                                   \
-       while ((_b)->written && btree_node_dirty(_b) && (cond)) {       \
-               if (!btree_node_may_write(_b)) {                        \
-                       set_btree_node_need_write_if_dirty(_b);         \
-                       break;                                          \
-               }                                                       \
+       unsigned long old, new, v = READ_ONCE((_b)->flags);             \
+                                                                       \
+       do {                                                            \
+               old = new = v;                                          \
                                                                        \
-               if (!btree_node_write_in_flight(_b)) {                  \
-                       bch2_btree_node_write(_c, _b, SIX_LOCK_read);   \
+               if (!(old & (1 << BTREE_NODE_dirty)) || !(cond))        \
                        break;                                          \
-               }                                                       \
                                                                        \
-               six_unlock_read(&(_b)->lock);                           \
-               btree_node_wait_on_io(_b);                              \
-               btree_node_lock_type(c, b, SIX_LOCK_read);              \
-       }                                                               \
+               new |= (1 << BTREE_NODE_need_write);                    \
+       } while ((v = cmpxchg(&(_b)->flags, old, new)) != old);         \
+                                                                       \
+       btree_node_write_if_need(_c, _b);                               \
 } while (0)
 
 void bch2_btree_flush_all_reads(struct bch_fs *);
index 1a1ca952c7e5f130668dfd7ec33e3db4157b9e88..873332f78826681edffd2378d8c09a8ca3fce7b8 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <linux/dynamic_fault.h>
 
+#include "bset.h"
 #include "btree_types.h"
 
 static inline void btree_iter_set_dirty(struct btree_iter *iter,
index 33260a99da542e55e76f0fa9b6cfadc815053b89..9054de0dbd2e166e905656586ca8267aa0035572 100644 (file)
@@ -9,9 +9,9 @@
  * updating the iterator state
  */
 
+#include <linux/six.h>
+
 #include "btree_iter.h"
-#include "btree_io.h"
-#include "six.h"
 
 /* matches six lock types */
 enum btree_node_locked_type {
index 0af2a7dcb2c3e37578f4a8bae808abbb88ac9183..dce4ed385e8e3fc8ecef2b3a8c25d45d0124fca8 100644 (file)
@@ -3,10 +3,10 @@
 
 #include <linux/list.h>
 #include <linux/rhashtable.h>
+#include <linux/six.h>
 
 #include "bkey_methods.h"
 #include "journal_types.h"
-#include "six.h"
 
 struct open_bucket;
 struct btree_update;
index e18655e444ce032088d5d66671206a2887c7e417..0f2fa6f72619d150b1b177586a9a9e1358d67141 100644 (file)
@@ -366,6 +366,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
 
        set_btree_node_accessed(b);
        set_btree_node_dirty(b);
+       set_btree_node_need_write(b);
 
        bch2_bset_init_first(b, &b->data->keys);
        memset(&b->nr, 0, sizeof(b->nr));
@@ -654,6 +655,12 @@ retry:
                closure_wait(&btree_current_write(b)->wait, cl);
 
                list_del(&as->write_blocked_list);
+
+               /*
+                * for flush_held_btree_writes() waiting on updates to flush or
+                * nodes to be writeable:
+                */
+               closure_wake_up(&c->btree_interior_update_wait);
                mutex_unlock(&c->btree_interior_update_lock);
 
                /*
@@ -957,6 +964,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
        list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
                list_del(&p->write_blocked_list);
                btree_update_reparent(as, p);
+
+               /*
+                * for flush_held_btree_writes() waiting on updates to flush or
+                * nodes to be writeable:
+                */
+               closure_wake_up(&c->btree_interior_update_wait);
        }
 
        clear_btree_node_dirty(b);
@@ -1056,23 +1069,24 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
 {
        struct bch_fs *c = as->c;
        struct btree *old = btree_node_root(c, b);
-       struct bch_fs_usage stats = { 0 };
+       struct bch_fs_usage *fs_usage;
 
        __bch2_btree_set_root_inmem(c, b);
 
        mutex_lock(&c->btree_interior_update_lock);
        percpu_down_read_preempt_disable(&c->mark_lock);
+       fs_usage = bch2_fs_usage_get_scratch(c);
 
        bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
                      true, 0,
                      gc_pos_btree_root(b->btree_id),
-                     &stats, 0, 0);
+                     fs_usage, 0, 0);
 
        if (old && !btree_node_fake(old))
                bch2_btree_node_free_index(as, NULL,
                                           bkey_i_to_s_c(&old->key),
-                                          &stats);
-       bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
+                                          fs_usage);
+       bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
                            gc_pos_btree_root(b->btree_id));
 
        percpu_up_read_preempt_enable(&c->mark_lock);
@@ -1147,7 +1161,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
                                        struct btree_node_iter *node_iter)
 {
        struct bch_fs *c = as->c;
-       struct bch_fs_usage stats = { 0 };
+       struct bch_fs_usage *fs_usage;
        struct bkey_packed *k;
        struct bkey tmp;
 
@@ -1155,10 +1169,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
 
        mutex_lock(&c->btree_interior_update_lock);
        percpu_down_read_preempt_disable(&c->mark_lock);
+       fs_usage = bch2_fs_usage_get_scratch(c);
 
        bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
                             true, 0,
-                            gc_pos_btree_node(b), &stats, 0, 0);
+                            gc_pos_btree_node(b), fs_usage, 0, 0);
 
        while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
               bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@@ -1171,9 +1186,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
        if (k && !bkey_cmp_packed(b, k, &insert->k))
                bch2_btree_node_free_index(as, b,
                                           bkey_disassemble(b, k, &tmp),
-                                          &stats);
+                                          fs_usage);
 
-       bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
+       bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
                            gc_pos_btree_node(b));
 
        percpu_up_read_preempt_enable(&c->mark_lock);
@@ -1957,7 +1972,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                        bkey_copy(&b->key, &new_key->k_i);
                }
        } else {
-               struct bch_fs_usage stats = { 0 };
+               struct bch_fs_usage *fs_usage;
 
                BUG_ON(btree_node_root(c, b) != b);
 
@@ -1965,15 +1980,16 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 
                mutex_lock(&c->btree_interior_update_lock);
                percpu_down_read_preempt_disable(&c->mark_lock);
+               fs_usage = bch2_fs_usage_get_scratch(c);
 
                bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
                              true, 0,
                              gc_pos_btree_root(b->btree_id),
-                             &stats, 0, 0);
+                             fs_usage, 0, 0);
                bch2_btree_node_free_index(as, NULL,
                                           bkey_i_to_s_c(&b->key),
-                                          &stats);
-               bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
+                                          fs_usage);
+               bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
                                    gc_pos_btree_root(b->btree_id));
 
                percpu_up_read_preempt_enable(&c->mark_lock);
index d72e5951615f2e65f8581b2553696cca82fc8917..d33d0bf048f07b6cb8acb5be7361e1036b0b152c 100644 (file)
 #include "ec.h"
 #include "error.h"
 #include "movinggc.h"
+#include "replicas.h"
 
 #include <linux/preempt.h>
 #include <trace/events/bcachefs.h>
 
-static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
-
-#ifdef DEBUG_BUCKETS
-
-#define lg_local_lock  lg_global_lock
-#define lg_local_unlock        lg_global_unlock
-
-static void bch2_fs_stats_verify(struct bch_fs *c)
-{
-       struct bch_fs_usage stats =_bch2_fs_usage_read(c);
-       unsigned i, j;
-
-       for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
-               for (j = 0; j < ARRAY_SIZE(stats.replicas[i].data); j++)
-                       if ((s64) stats.replicas[i].data[j] < 0)
-                               panic("replicas %u %s sectors underflow: %lli\n",
-                                     i + 1, bch_data_types[j],
-                                     stats.replicas[i].data[j]);
-
-               if ((s64) stats.replicas[i].persistent_reserved < 0)
-                       panic("replicas %u reserved underflow: %lli\n",
-                             i + 1, stats.replicas[i].persistent_reserved);
-       }
-
-       for (j = 0; j < ARRAY_SIZE(stats.buckets); j++)
-               if ((s64) stats.replicas[i].data_buckets[j] < 0)
-                       panic("%s buckets underflow: %lli\n",
-                             bch_data_types[j],
-                             stats.buckets[j]);
-
-       if ((s64) stats.s.online_reserved < 0)
-               panic("sectors_online_reserved underflow: %lli\n",
-                     stats.s.online_reserved);
-}
-
-static void bch2_dev_stats_verify(struct bch_dev *ca)
-{
-       struct bch_dev_usage stats =
-               __bch2_dev_usage_read(ca);
-       u64 n = ca->mi.nbuckets - ca->mi.first_bucket;
-       unsigned i;
-
-       for (i = 0; i < ARRAY_SIZE(stats.buckets); i++)
-               BUG_ON(stats.buckets[i]         > n);
-       BUG_ON(stats.buckets_alloc              > n);
-       BUG_ON(stats.buckets_unavailable        > n);
-}
-
-static void bch2_disk_reservations_verify(struct bch_fs *c, int flags)
-{
-       if (!(flags & BCH_DISK_RESERVATION_NOFAIL)) {
-               u64 used = __bch2_fs_sectors_used(c);
-               u64 cached = 0;
-               u64 avail = atomic64_read(&c->sectors_available);
-               int cpu;
-
-               for_each_possible_cpu(cpu)
-                       cached += per_cpu_ptr(c->usage_percpu, cpu)->available_cache;
-
-               if (used + avail + cached > c->capacity)
-                       panic("used %llu avail %llu cached %llu capacity %llu\n",
-                             used, avail, cached, c->capacity);
-       }
-}
-
-#else
-
-static void bch2_fs_stats_verify(struct bch_fs *c) {}
-static void bch2_dev_stats_verify(struct bch_dev *ca) {}
-static void bch2_disk_reservations_verify(struct bch_fs *c, int flags) {}
-
-#endif
-
 /*
  * Clear journal_seq_valid for buckets for which it's not needed, to prevent
  * wraparound:
@@ -185,46 +113,47 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
        }
 }
 
-#define bch2_usage_add(_acc, _stats)                                   \
-do {                                                                   \
-       typeof(_acc) _a = (_acc), _s = (_stats);                        \
-       unsigned i;                                                     \
-                                                                       \
-       for (i = 0; i < sizeof(*_a) / sizeof(u64); i++)                 \
-               ((u64 *) (_a))[i] += ((u64 *) (_s))[i];                 \
-} while (0)
-
 #define bch2_usage_read_raw(_stats)                                    \
 ({                                                                     \
        typeof(*this_cpu_ptr(_stats)) _acc;                             \
-       int cpu;                                                        \
                                                                        \
        memset(&_acc, 0, sizeof(_acc));                                 \
-                                                                       \
-       for_each_possible_cpu(cpu)                                      \
-               bch2_usage_add(&_acc, per_cpu_ptr((_stats), cpu));      \
+       acc_u64s_percpu((u64 *) &_acc,                                  \
+                       (u64 __percpu *) _stats,                        \
+                       sizeof(_acc) / sizeof(u64));                    \
                                                                        \
        _acc;                                                           \
 })
 
-struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *ca, bool gc)
-{
-       return bch2_usage_read_raw(ca->usage[gc]);
-}
-
 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
 {
        return bch2_usage_read_raw(ca->usage[0]);
 }
 
-struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *c, bool gc)
+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
 {
-       return bch2_usage_read_raw(c->usage[gc]);
-}
+       struct bch_fs_usage *ret;
+       unsigned nr = READ_ONCE(c->replicas.nr);
+retry:
+       ret = kzalloc(sizeof(*ret) + nr * sizeof(u64), GFP_NOFS);
+       if (unlikely(!ret))
+               return NULL;
 
-struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *c)
-{
-       return bch2_usage_read_raw(c->usage[0]);
+       percpu_down_read_preempt_disable(&c->mark_lock);
+
+       if (unlikely(nr < c->replicas.nr)) {
+               nr = c->replicas.nr;
+               percpu_up_read_preempt_enable(&c->mark_lock);
+               kfree(ret);
+               goto retry;
+       }
+
+       acc_u64s_percpu((u64 *) ret,
+                       (u64 __percpu *) c->usage[0],
+                       sizeof(*ret) / sizeof(u64) + nr);
+       percpu_up_read_preempt_enable(&c->mark_lock);
+
+       return ret;
 }
 
 #define RESERVE_FACTOR 6
@@ -239,17 +168,13 @@ static u64 avail_factor(u64 r)
        return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
 }
 
-static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
-{
-       return fs_usage.s.hidden +
-               fs_usage.s.data +
-               reserve_factor(fs_usage.s.reserved +
-                              fs_usage.s.online_reserved);
-}
-
 u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
 {
-       return min(c->capacity, __bch2_fs_sectors_used(c, fs_usage));
+       return min(fs_usage.s.hidden +
+                  fs_usage.s.data +
+                  reserve_factor(fs_usage.s.reserved +
+                                 fs_usage.s.online_reserved),
+                  c->capacity);
 }
 
 struct bch_fs_usage_short
@@ -324,14 +249,16 @@ void bch2_fs_usage_apply(struct bch_fs *c,
                fs_usage->s.online_reserved     -= added;
        }
 
-       bch2_usage_add(this_cpu_ptr(c->usage[0]), fs_usage);
-
-       if (gc_visited(c, gc_pos))
-               bch2_usage_add(this_cpu_ptr(c->usage[1]), fs_usage);
-
-       bch2_fs_stats_verify(c);
+       acc_u64s((u64 *) this_cpu_ptr(c->usage[0]),
+                (u64 *) fs_usage,
+                sizeof(*fs_usage) / sizeof(u64) + c->replicas.nr);
 
-       memset(fs_usage, 0, sizeof(*fs_usage));
+       if (gc_visited(c, gc_pos)) {
+               BUG_ON(!c->usage[1]);
+               acc_u64s((u64 *) this_cpu_ptr(c->usage[1]),
+                        (u64 *) fs_usage,
+                        sizeof(*fs_usage) / sizeof(u64) + c->replicas.nr);
+       }
 }
 
 static inline void account_bucket(struct bch_fs_usage *fs_usage,
@@ -342,7 +269,6 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
        if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
                fs_usage->s.hidden      += size;
 
-       fs_usage->buckets[type]         += size;
        dev_usage->buckets[type]        += nr;
 }
 
@@ -387,8 +313,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
 
        if (!is_available_bucket(old) && is_available_bucket(new))
                bch2_wake_allocator(ca);
-
-       bch2_dev_stats_verify(ca);
 }
 
 void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
@@ -416,6 +340,37 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
        _old;                                                   \
 })
 
+static inline void update_replicas(struct bch_fs *c,
+                                  struct bch_fs_usage *fs_usage,
+                                  struct bch_replicas_entry *r,
+                                  s64 sectors)
+{
+       int idx = bch2_replicas_entry_idx(c, r);
+
+       BUG_ON(idx < 0);
+       BUG_ON(!sectors);
+
+       if (r->data_type == BCH_DATA_CACHED)
+               fs_usage->s.cached      += sectors;
+       else
+               fs_usage->s.data        += sectors;
+       fs_usage->data[idx]             += sectors;
+}
+
+static inline void update_cached_sectors(struct bch_fs *c,
+                                        struct bch_fs_usage *fs_usage,
+                                        unsigned dev, s64 sectors)
+{
+       struct bch_replicas_padded r;
+
+       r.e.data_type   = BCH_DATA_CACHED;
+       r.e.nr_devs     = 1;
+       r.e.nr_required = 1;
+       r.e.devs[0]     = dev;
+
+       update_replicas(c, fs_usage, &r.e, sectors);
+}
+
 static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
                                     size_t b, struct bucket_mark *old,
                                     bool gc)
@@ -434,8 +389,9 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
                new.gen++;
        }));
 
-       fs_usage->replicas[0].data[BCH_DATA_CACHED]     -= old->cached_sectors;
-       fs_usage->s.cached                              -= old->cached_sectors;
+       if (old->cached_sectors)
+               update_cached_sectors(c, fs_usage, ca->dev_idx,
+                                     -old->cached_sectors);
 }
 
 void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
@@ -502,11 +458,6 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                new.data_type   = type;
                checked_add(new.dirty_sectors, sectors);
        }));
-
-       if (type == BCH_DATA_BTREE ||
-           type == BCH_DATA_USER)
-               fs_usage->s.data                += sectors;
-       fs_usage->replicas[0].data[type]        += sectors;
 }
 
 void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
@@ -630,9 +581,9 @@ static void bch2_mark_pointer(struct bch_fs *c,
 
 static int bch2_mark_stripe_ptr(struct bch_fs *c,
                                struct bch_extent_stripe_ptr p,
+                               enum bch_data_type data_type,
+                               struct bch_fs_usage *fs_usage,
                                s64 sectors, unsigned flags,
-                               s64 *adjusted_disk_sectors,
-                               unsigned *redundancy,
                                bool gc)
 {
        struct stripe *m;
@@ -648,16 +599,15 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
                return -1;
        }
 
+       BUG_ON(m->r.e.data_type != data_type);
+
        nr_data = m->nr_blocks - m->nr_redundant;
 
        parity_sectors = DIV_ROUND_UP(abs(sectors) * m->nr_redundant, nr_data);
 
        if (sectors < 0)
                parity_sectors = -parity_sectors;
-
-       *adjusted_disk_sectors += parity_sectors;
-
-       *redundancy = max_t(unsigned, *redundancy, m->nr_redundant + 1);
+       sectors += parity_sectors;
 
        new = atomic_add_return(sectors, &m->block_sectors[p.block]);
        old = new - sectors;
@@ -673,11 +623,14 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
        if (!gc)
                bch2_stripes_heap_update(c, m, p.idx);
 
+       update_replicas(c, fs_usage, &m->r.e, sectors);
+
        return 0;
 }
 
 static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
-                           s64 sectors, enum bch_data_type data_type,
+                           s64 sectors,
+                           enum bch_data_type data_type,
                            struct bch_fs_usage *fs_usage,
                            unsigned journal_seq, unsigned flags,
                            bool gc)
@@ -685,58 +638,46 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
-       s64 cached_sectors      = 0;
-       s64 dirty_sectors       = 0;
-       s64 ec_sectors          = 0;
-       unsigned replicas       = 0;
-       unsigned ec_redundancy  = 0;
+       struct bch_replicas_padded r;
+       s64 dirty_sectors = 0;
        unsigned i;
        int ret;
 
+       r.e.data_type   = data_type;
+       r.e.nr_devs     = 0;
+       r.e.nr_required = 1;
+
        BUG_ON(!sectors);
 
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                s64 disk_sectors = data_type == BCH_DATA_BTREE
                        ? sectors
                        : ptr_disk_sectors_delta(p, sectors);
-               s64 adjusted_disk_sectors = disk_sectors;
 
                bch2_mark_pointer(c, p, disk_sectors, data_type,
                                  fs_usage, journal_seq, flags, gc);
 
-               if (!p.ptr.cached)
+               if (p.ptr.cached) {
+                       update_cached_sectors(c, fs_usage, p.ptr.dev,
+                                             disk_sectors);
+               } else if (!p.ec_nr) {
+                       dirty_sectors          += disk_sectors;
+                       r.e.devs[r.e.nr_devs++] = p.ptr.dev;
+               } else {
                        for (i = 0; i < p.ec_nr; i++) {
                                ret = bch2_mark_stripe_ptr(c, p.ec[i],
-                                               disk_sectors, flags,
-                                               &adjusted_disk_sectors,
-                                               &ec_redundancy, gc);
+                                               data_type, fs_usage,
+                                               disk_sectors, flags, gc);
                                if (ret)
                                        return ret;
                        }
-               if (!p.ptr.cached)
-                       replicas++;
 
-               if (p.ptr.cached)
-                       cached_sectors  += adjusted_disk_sectors;
-               else if (!p.ec_nr)
-                       dirty_sectors   += adjusted_disk_sectors;
-               else
-                       ec_sectors      += adjusted_disk_sectors;
+                       r.e.nr_required = 0;
+               }
        }
 
-       replicas        = clamp_t(unsigned,     replicas,
-                                 1, ARRAY_SIZE(fs_usage->replicas));
-       ec_redundancy   = clamp_t(unsigned,     ec_redundancy,
-                                 1, ARRAY_SIZE(fs_usage->replicas));
-
-       fs_usage->s.cached                                      += cached_sectors;
-       fs_usage->replicas[0].data[BCH_DATA_CACHED]             += cached_sectors;
-
-       fs_usage->s.data                                        += dirty_sectors;
-       fs_usage->replicas[replicas - 1].data[data_type]        += dirty_sectors;
-
-       fs_usage->s.data                                        += ec_sectors;
-       fs_usage->replicas[ec_redundancy - 1].ec_data           += ec_sectors;
+       if (dirty_sectors)
+               update_replicas(c, fs_usage, &r.e, dirty_sectors);
 
        return 0;
 }
@@ -804,8 +745,24 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
                m->algorithm    = s.v->algorithm;
                m->nr_blocks    = s.v->nr_blocks;
                m->nr_redundant = s.v->nr_redundant;
+
+               memset(&m->r, 0, sizeof(m->r));
+
+               m->r.e.data_type        = BCH_DATA_USER;
+               m->r.e.nr_devs          = s.v->nr_blocks;
+               m->r.e.nr_required      = s.v->nr_blocks - s.v->nr_redundant;
+
+               for (i = 0; i < s.v->nr_blocks; i++)
+                       m->r.e.devs[i] = s.v->ptrs[i].dev;
        }
 
+       /*
+        * XXX: account for stripes somehow here
+        */
+#if 0
+       update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
+#endif
+
        if (!gc) {
                if (inserting)
                        bch2_stripes_heap_insert(c, m, idx);
@@ -853,11 +810,11 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 
                sectors *= replicas;
-               replicas = clamp_t(unsigned, replicas,
-                                  1, ARRAY_SIZE(fs_usage->replicas));
+               replicas = clamp_t(unsigned, replicas, 1,
+                                  ARRAY_SIZE(fs_usage->persistent_reserved));
 
-               fs_usage->s.reserved                                    += sectors;
-               fs_usage->replicas[replicas - 1].persistent_reserved    += sectors;
+               fs_usage->s.reserved                            += sectors;
+               fs_usage->persistent_reserved[replicas - 1]     += sectors;
                break;
        }
        default:
@@ -919,7 +876,7 @@ void bch2_mark_update(struct btree_insert *trans,
        struct btree_iter       *iter = insert->iter;
        struct btree            *b = iter->l[0].b;
        struct btree_node_iter  node_iter = iter->l[0].iter;
-       struct bch_fs_usage     fs_usage = { 0 };
+       struct bch_fs_usage     *fs_usage;
        struct gc_pos           pos = gc_pos_btree_node(b);
        struct bkey_packed      *_k;
 
@@ -927,12 +884,13 @@ void bch2_mark_update(struct btree_insert *trans,
                return;
 
        percpu_down_read_preempt_disable(&c->mark_lock);
+       fs_usage = bch2_fs_usage_get_scratch(c);
 
        if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
                bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
                        bpos_min(insert->k->k.p, b->key.k.p).offset -
                        bkey_start_offset(&insert->k->k),
-                       pos, &fs_usage, trans->journal_res.seq, 0);
+                       pos, fs_usage, trans->journal_res.seq, 0);
 
        while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
                                                      KEY_TYPE_discard))) {
@@ -965,7 +923,7 @@ void bch2_mark_update(struct btree_insert *trans,
                                BUG_ON(sectors <= 0);
 
                                bch2_mark_key_locked(c, k, true, sectors,
-                                       pos, &fs_usage, trans->journal_res.seq, 0);
+                                       pos, fs_usage, trans->journal_res.seq, 0);
 
                                sectors = bkey_start_offset(&insert->k->k) -
                                        k.k->p.offset;
@@ -976,12 +934,12 @@ void bch2_mark_update(struct btree_insert *trans,
                }
 
                bch2_mark_key_locked(c, k, false, sectors,
-                       pos, &fs_usage, trans->journal_res.seq, 0);
+                       pos, fs_usage, trans->journal_res.seq, 0);
 
                bch2_btree_node_iter_advance(&node_iter, b);
        }
 
-       bch2_fs_usage_apply(c, &fs_usage, trans->disk_res, pos);
+       bch2_fs_usage_apply(c, fs_usage, trans->disk_res, pos);
 
        percpu_up_read_preempt_enable(&c->mark_lock);
 }
@@ -1003,8 +961,6 @@ void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
        percpu_down_read_preempt_disable(&c->mark_lock);
        this_cpu_sub(c->usage[0]->s.online_reserved,
                     res->sectors);
-
-       bch2_fs_stats_verify(c);
        percpu_up_read_preempt_enable(&c->mark_lock);
 
        res->sectors = 0;
@@ -1045,8 +1001,6 @@ out:
        this_cpu_add(c->usage[0]->s.online_reserved, sectors);
        res->sectors                    += sectors;
 
-       bch2_disk_reservations_verify(c, flags);
-       bch2_fs_stats_verify(c);
        percpu_up_read_preempt_enable(&c->mark_lock);
        return 0;
 
@@ -1078,14 +1032,11 @@ recalculate:
                this_cpu_add(c->usage[0]->s.online_reserved, sectors);
                res->sectors                    += sectors;
                ret = 0;
-
-               bch2_disk_reservations_verify(c, flags);
        } else {
                atomic64_set(&c->sectors_available, sectors_available);
                ret = -ENOSPC;
        }
 
-       bch2_fs_stats_verify(c);
        percpu_up_write(&c->mark_lock);
 
        if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD))
@@ -1123,7 +1074,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
        size_t reserve_none     = max_t(size_t, 1, nbuckets >> 9);
        size_t copygc_reserve   = max_t(size_t, 2, nbuckets >> 7);
        size_t free_inc_nr      = max(max_t(size_t, 1, nbuckets >> 12),
-                                     btree_reserve);
+                                     btree_reserve * 2);
        bool resize = ca->buckets[0] != NULL,
             start_copygc = ca->copygc_thread != NULL;
        int ret = -ENOMEM;
index 84059111c7ca2e795eed725a788ec1fe340da588..ebd39e85fad4c6aeb31d04ab0f5592b34d135f71 100644 (file)
@@ -179,7 +179,6 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
 
 /* Device usage: */
 
-struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *, bool);
 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
 
 static inline u64 __dev_buckets_available(struct bch_dev *ca,
@@ -218,8 +217,18 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
 
 /* Filesystem usage: */
 
-struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *, bool);
-struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *);
+static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
+{
+       struct bch_fs_usage *ret;
+
+       ret = this_cpu_ptr(c->usage_scratch);
+
+       memset(ret, 0, sizeof(*ret) + c->replicas.nr * sizeof(u64));
+
+       return ret;
+}
+
+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
 
 u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
 
index c5537a283b97f0e54a7bc56bec9ce61135327b94..56863c2371265603a2ae8e3e55a66c5475c1b8cf 100644 (file)
@@ -74,16 +74,18 @@ struct bch_fs_usage {
                u64             cached;
                u64             reserved;
                u64             nr_inodes;
+
+               /* XXX: add stats for compression ratio */
+#if 0
+               u64             uncompressed;
+               u64             compressed;
+#endif
        } s;
 
        /* broken out: */
-       struct {
-               u64             data[BCH_DATA_NR];
-               u64             ec_data;
-               u64             persistent_reserved;
-       }                       replicas[BCH_REPLICAS_MAX];
 
-       u64                     buckets[BCH_DATA_NR];
+       u64                     persistent_reserved[BCH_REPLICAS_MAX];
+       u64                     data[];
 };
 
 struct bch_fs_usage_short {
index ac1ec5f62cea06c4b4bef44ef108af4d4ef03179..56ceb260b2bc6840a8c55135060ec15fb219a343 100644 (file)
@@ -393,21 +393,29 @@ static long bch2_ioctl_usage(struct bch_fs *c,
        }
 
        {
-               struct bch_fs_usage src = bch2_fs_usage_read(c);
+               struct bch_fs_usage *src;
                struct bch_ioctl_fs_usage dst = {
                        .capacity               = c->capacity,
-                       .used                   = bch2_fs_sectors_used(c, src),
-                       .online_reserved        = src.s.online_reserved,
                };
 
+               src = bch2_fs_usage_read(c);
+               if (!src)
+                       return -ENOMEM;
+
+               dst.used                = bch2_fs_sectors_used(c, *src);
+               dst.online_reserved     = src->s.online_reserved;
+
                for (i = 0; i < BCH_REPLICAS_MAX; i++) {
                        dst.persistent_reserved[i] =
-                               src.replicas[i].persistent_reserved;
-
+                               src->persistent_reserved[i];
+#if 0
                        for (j = 0; j < BCH_DATA_NR; j++)
                                dst.sectors[j][i] = src.replicas[i].data[j];
+#endif
                }
 
+               kfree(src);
+
                ret = copy_to_user(&user_arg->fs, &dst, sizeof(dst));
                if (ret)
                        return ret;
index 28d086bc0e6124379503078ebd105ba121586244..dfa2de90fde4511c286f0e769bcf074f0f30bd6d 100644 (file)
 #include <crypto/poly1305.h>
 #include <keys/user-type.h>
 
-/*
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
- * use permitted, subject to terms of PostgreSQL license; see.)
-
- * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
- * usual sort of implementation. (See Ross Williams' excellent introduction
- * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
- * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
- * If we have no working 64-bit type, then fake it with two 32-bit registers.
- *
- * The present implementation is a normal (not "reflected", in Williams'
- * terms) 64-bit CRC, using initial all-ones register contents and a final
- * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
- * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
- *
- * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x + 1
-*/
-
-static const u64 crc_table[256] = {
-       0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
-       0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
-       0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
-       0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
-       0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
-       0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
-       0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
-       0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
-       0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
-       0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
-       0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
-       0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
-       0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
-       0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
-       0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
-       0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
-       0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
-       0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
-       0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
-       0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
-       0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
-       0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
-       0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
-       0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
-       0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
-       0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
-       0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
-       0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
-       0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
-       0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
-       0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
-       0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
-       0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
-       0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
-       0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
-       0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
-       0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
-       0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
-       0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
-       0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
-       0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
-       0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
-       0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
-       0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
-       0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
-       0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
-       0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
-       0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
-       0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
-       0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
-       0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
-       0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
-       0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
-       0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
-       0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
-       0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
-       0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
-       0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
-       0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
-       0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
-       0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
-       0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
-       0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
-       0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
-       0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
-       0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
-       0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
-       0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
-       0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
-       0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
-       0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
-       0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
-       0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
-       0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
-       0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
-       0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
-       0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
-       0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
-       0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
-       0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
-       0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
-       0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
-       0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
-       0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
-       0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
-       0x9AFCE626CE85B507ULL,
-};
-
-u64 bch2_crc64_update(u64 crc, const void *_data, size_t len)
-{
-       const unsigned char *data = _data;
-
-       while (len--) {
-               int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
-               crc = crc_table[i] ^ (crc << 8);
-       }
-
-       return crc;
-}
-
 static u64 bch2_checksum_init(unsigned type)
 {
        switch (type) {
@@ -188,21 +66,21 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t
        }
 }
 
-static inline void do_encrypt_sg(struct crypto_skcipher *tfm,
+static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
                                 struct nonce nonce,
                                 struct scatterlist *sg, size_t len)
 {
-       SKCIPHER_REQUEST_ON_STACK(req, tfm);
+       SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
        int ret;
 
-       skcipher_request_set_tfm(req, tfm);
+       skcipher_request_set_sync_tfm(req, tfm);
        skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
 
        ret = crypto_skcipher_encrypt(req);
        BUG_ON(ret);
 }
 
-static inline void do_encrypt(struct crypto_skcipher *tfm,
+static inline void do_encrypt(struct crypto_sync_skcipher *tfm,
                              struct nonce nonce,
                              void *buf, size_t len)
 {
@@ -213,10 +91,10 @@ static inline void do_encrypt(struct crypto_skcipher *tfm,
 }
 
 int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
-                          void *buf, size_t len)
+                           void *buf, size_t len)
 {
-       struct crypto_skcipher *chacha20 =
-               crypto_alloc_skcipher("chacha20", 0, 0);
+       struct crypto_sync_skcipher *chacha20 =
+               crypto_alloc_sync_skcipher("chacha20", 0, 0);
        int ret;
 
        if (!chacha20) {
@@ -224,7 +102,8 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
                return PTR_ERR(chacha20);
        }
 
-       ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key));
+       ret = crypto_skcipher_setkey(&chacha20->base,
+                                    (void *) key, sizeof(*key));
        if (ret) {
                pr_err("crypto_skcipher_setkey() error: %i", ret);
                goto err;
@@ -232,7 +111,7 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
 
        do_encrypt(chacha20, nonce, buf, len);
 err:
-       crypto_free_skcipher(chacha20);
+       crypto_free_sync_skcipher(chacha20);
        return ret;
 }
 
@@ -597,7 +476,7 @@ err:
 static int bch2_alloc_ciphers(struct bch_fs *c)
 {
        if (!c->chacha20)
-               c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0);
+               c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
        if (IS_ERR(c->chacha20)) {
                bch_err(c, "error requesting chacha20 module: %li",
                        PTR_ERR(c->chacha20));
@@ -680,7 +559,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
                        goto err;
        }
 
-       ret = crypto_skcipher_setkey(c->chacha20,
+       ret = crypto_skcipher_setkey(&c->chacha20->base,
                        (void *) &key.key, sizeof(key.key));
        if (ret)
                goto err;
@@ -708,7 +587,7 @@ void bch2_fs_encryption_exit(struct bch_fs *c)
        if (!IS_ERR_OR_NULL(c->poly1305))
                crypto_free_shash(c->poly1305);
        if (!IS_ERR_OR_NULL(c->chacha20))
-               crypto_free_skcipher(c->chacha20);
+               crypto_free_sync_skcipher(c->chacha20);
        if (!IS_ERR_OR_NULL(c->sha256))
                crypto_free_shash(c->sha256);
 }
@@ -740,7 +619,7 @@ int bch2_fs_encryption_init(struct bch_fs *c)
        if (ret)
                goto out;
 
-       ret = crypto_skcipher_setkey(c->chacha20,
+       ret = crypto_skcipher_setkey(&c->chacha20->base,
                        (void *) &key.key, sizeof(key.key));
        if (ret)
                goto out;
index 031b36f3f36ec94fd483c47b3b2e892e380e49bb..fb72c6a4e3f408b262a3c3f2b8789900f70037df 100644 (file)
@@ -5,9 +5,13 @@
 #include "extents_types.h"
 #include "super-io.h"
 
+#include <linux/crc64.h>
 #include <crypto/chacha20.h>
 
-u64 bch2_crc64_update(u64, const void *, size_t);
+static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
+{
+       return crc64_be(crc, p, len);
+}
 
 #define BCH_NONCE_EXTENT       cpu_to_le32(1 << 28)
 #define BCH_NONCE_BTREE                cpu_to_le32(2 << 28)
index d04298106fa5422a3ce48b70994de381f9a5420f..44c5d3821a3868066f45975c595637d3ecc38a90 100644 (file)
@@ -5,6 +5,11 @@
 
 #define EC_STRIPE_MAX  16
 
+struct bch_replicas_padded {
+       struct bch_replicas_entry       e;
+       u8                              pad[EC_STRIPE_MAX];
+};
+
 struct stripe {
        size_t                  heap_idx;
 
@@ -17,6 +22,8 @@ struct stripe {
        u8                      alive;
        atomic_t                blocks_nonempty;
        atomic_t                block_sectors[EC_STRIPE_MAX];
+
+       struct bch_replicas_padded r;
 };
 
 struct ec_stripe_heap_entry {
index 41ac5d4849b89de77a2bd64adfaeb491cc7f1bbc..fdc24be18f6675cb5bd5193b1d7e4d73edf44960 100644 (file)
@@ -899,11 +899,8 @@ static void readpage_bio_extend(struct readpages_iter *iter,
                        if (!get_more)
                                break;
 
-                       rcu_read_lock();
-                       page = radix_tree_lookup(&iter->mapping->i_pages, page_offset);
-                       rcu_read_unlock();
-
-                       if (page && !radix_tree_exceptional_entry(page))
+                       page = xa_load(&iter->mapping->i_pages, page_offset);
+                       if (page && !xa_is_value(page))
                                break;
 
                        page = __page_cache_alloc(readahead_gfp_mask(iter->mapping));
@@ -2705,7 +2702,7 @@ static bool page_slot_is_data(struct address_space *mapping, pgoff_t index)
        bool ret;
 
        page = find_lock_entry(mapping, index);
-       if (!page || radix_tree_exception(page))
+       if (!page || xa_is_value(page))
                return false;
 
        ret = page_is_data(page);
index d6cb21d690732b030442788866c7caca22f33d26..26d5f348c4f7507392808504374edc3723744ff3 100644 (file)
@@ -183,7 +183,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
        if (unlikely(ret < 0))
                goto err1;
 
-       qstr.hash_len   = ret;
+       qstr.len        = ret;
        qstr.name       = kname;
 
        ret = -ENOENT;
index 1d3161ee13fbd6cbc64494b17d92f92410f3ae36..66ca13aab4e88d12f5a0065828333c03a2ee1666 100644 (file)
@@ -3,11 +3,6 @@
 #include "replicas.h"
 #include "super-io.h"
 
-struct bch_replicas_entry_padded {
-       struct bch_replicas_entry       e;
-       u8                              pad[BCH_SB_MEMBERS_MAX];
-};
-
 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
                                            struct bch_replicas_cpu *);
 
@@ -124,8 +119,6 @@ static void bkey_to_replicas(struct bkey_s_c k,
                stripe_to_replicas(k, e);
                break;
        }
-
-       replicas_entry_sort(e);
 }
 
 static inline void devlist_to_replicas(struct bch_devs_list devs,
@@ -144,8 +137,6 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
 
        for (i = 0; i < devs.nr; i++)
                e->devs[e->nr_devs++] = devs.devs[i];
-
-       replicas_entry_sort(e);
 }
 
 static struct bch_replicas_cpu
@@ -176,13 +167,35 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
        return new;
 }
 
+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
+                                      struct bch_replicas_entry *search)
+{
+       int idx, entry_size = replicas_entry_bytes(search);
+
+       if (unlikely(entry_size > r->entry_size))
+               return -1;
+
+       replicas_entry_sort(search);
+
+       while (entry_size < r->entry_size)
+               ((char *) search)[entry_size++] = 0;
+
+       idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
+                             memcmp, search);
+
+       return idx < r->nr ? idx : -1;
+}
+
+int bch2_replicas_entry_idx(struct bch_fs *c,
+                           struct bch_replicas_entry *search)
+{
+       return __replicas_entry_idx(&c->replicas, search);
+}
+
 static bool __replicas_has_entry(struct bch_replicas_cpu *r,
                                 struct bch_replicas_entry *search)
 {
-       return replicas_entry_bytes(search) <= r->entry_size &&
-               eytzinger0_find(r->entries, r->nr,
-                               r->entry_size,
-                               memcmp, search) < r->nr;
+       return __replicas_entry_idx(r, search) >= 0;
 }
 
 static bool replicas_has_entry(struct bch_fs *c,
@@ -201,6 +214,80 @@ static bool replicas_has_entry(struct bch_fs *c,
        return marked;
 }
 
+static void __replicas_table_update(struct bch_fs_usage __percpu *dst,
+                                   struct bch_replicas_cpu *dst_r,
+                                   struct bch_fs_usage __percpu *src,
+                                   struct bch_replicas_cpu *src_r)
+{
+       int src_idx, dst_idx, cpu;
+
+       for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
+               u64 *dst_v, src_v = 0;
+
+               for_each_possible_cpu(cpu)
+                       src_v += *per_cpu_ptr(&src->data[src_idx], cpu);
+
+               dst_idx = __replicas_entry_idx(dst_r,
+                               cpu_replicas_entry(src_r, src_idx));
+
+               if (dst_idx < 0) {
+                       BUG_ON(src_v);
+                       continue;
+               }
+
+               preempt_disable();
+
+               dst_v = this_cpu_ptr(&dst->data[dst_idx]);
+               BUG_ON(*dst_v);
+
+               *dst_v = src_v;
+
+               preempt_enable();
+       }
+}
+
+/*
+ * Resize filesystem accounting:
+ */
+static int replicas_table_update(struct bch_fs *c,
+                                struct bch_replicas_cpu *new_r)
+{
+       struct bch_fs_usage __percpu *new_usage[3] = { NULL, NULL, NULL };
+       unsigned bytes = sizeof(struct bch_fs_usage) +
+               sizeof(u64) * new_r->nr;
+       unsigned i;
+       int ret = -ENOMEM;
+
+       for (i = 0; i < 3; i++) {
+               if (i < 2 && !c->usage[i])
+                       continue;
+
+               new_usage[i] = __alloc_percpu_gfp(bytes, sizeof(u64),
+                                                 GFP_NOIO);
+               if (!new_usage[i])
+                       goto err;
+       }
+
+       for (i = 0; i < 2; i++) {
+               if (!c->usage[i])
+                       continue;
+
+               __replicas_table_update(new_usage[i],   new_r,
+                                       c->usage[i],    &c->replicas);
+
+               swap(c->usage[i], new_usage[i]);
+       }
+
+       swap(c->usage_scratch, new_usage[2]);
+
+       swap(c->replicas, *new_r);
+       ret = 0;
+err:
+       for (i = 0; i < 3; i++)
+               free_percpu(new_usage[i]);
+       return ret;
+}
+
 noinline
 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
                                struct bch_replicas_entry *new_entry)
@@ -242,7 +329,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
        /* don't update in memory replicas until changes are persistent */
        percpu_down_write(&c->mark_lock);
        if (new_r.entries)
-               swap(new_r, c->replicas);
+               ret = replicas_table_update(c, &new_r);
        if (new_gc.entries)
                swap(new_gc, c->replicas_gc);
        percpu_up_write(&c->mark_lock);
@@ -269,7 +356,7 @@ int bch2_mark_replicas(struct bch_fs *c,
                       enum bch_data_type data_type,
                       struct bch_devs_list devs)
 {
-       struct bch_replicas_entry_padded search;
+       struct bch_replicas_padded search;
 
        if (!devs.nr)
                return 0;
@@ -285,7 +372,7 @@ int bch2_mark_replicas(struct bch_fs *c,
 
 int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
 {
-       struct bch_replicas_entry_padded search;
+       struct bch_replicas_padded search;
        struct bch_devs_list cached = bch2_bkey_cached_devs(k);
        unsigned i;
        int ret;
@@ -306,6 +393,8 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
 
 int bch2_replicas_gc_end(struct bch_fs *c, int ret)
 {
+       unsigned i;
+
        lockdep_assert_held(&c->replicas_gc_lock);
 
        mutex_lock(&c->sb_lock);
@@ -313,6 +402,39 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
        if (ret)
                goto err;
 
+       /*
+        * this is kind of crappy; the replicas gc mechanism needs to be ripped
+        * out
+        */
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+               struct bch_replicas_cpu n;
+               u64 v = 0;
+               int cpu;
+
+               if (__replicas_has_entry(&c->replicas_gc, e))
+                       continue;
+
+               for_each_possible_cpu(cpu)
+                       v += *per_cpu_ptr(&c->usage[0]->data[i], cpu);
+               if (!v)
+                       continue;
+
+               n = cpu_replicas_add_entry(&c->replicas_gc, e);
+               if (!n.entries) {
+                       ret = -ENOSPC;
+                       goto err;
+               }
+
+               percpu_down_write(&c->mark_lock);
+               swap(n, c->replicas_gc);
+               percpu_up_write(&c->mark_lock);
+
+               kfree(n.entries);
+       }
+
        if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
                ret = -ENOSPC;
                goto err;
@@ -324,7 +446,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
 err:
        percpu_down_write(&c->mark_lock);
        if (!ret)
-               swap(c->replicas, c->replicas_gc);
+               ret = replicas_table_update(c, &c->replicas_gc);
 
        kfree(c->replicas_gc.entries);
        c->replicas_gc.entries = NULL;
@@ -460,7 +582,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
        bch2_cpu_replicas_sort(&new_r);
 
        percpu_down_write(&c->mark_lock);
-       swap(c->replicas, new_r);
+       ret = replicas_table_update(c, &new_r);
        percpu_up_write(&c->mark_lock);
 
        kfree(new_r.entries);
@@ -681,7 +803,7 @@ bool bch2_replicas_marked(struct bch_fs *c,
                          struct bch_devs_list devs,
                          bool check_gc_replicas)
 {
-       struct bch_replicas_entry_padded search;
+       struct bch_replicas_padded search;
 
        if (!devs.nr)
                return true;
@@ -697,7 +819,7 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
                               struct bkey_s_c k,
                               bool check_gc_replicas)
 {
-       struct bch_replicas_entry_padded search;
+       struct bch_replicas_padded search;
        struct bch_devs_list cached = bch2_bkey_cached_devs(k);
        unsigned i;
 
index 87246a04ccc40f118940598e54ecf2f669bd64f0..fc833653b8590399d5be56a93e06a2e1c75dc48d 100644 (file)
@@ -1,8 +1,11 @@
 #ifndef _BCACHEFS_REPLICAS_H
 #define _BCACHEFS_REPLICAS_H
 
+#include "eytzinger.h"
 #include "replicas_types.h"
 
+int bch2_replicas_entry_idx(struct bch_fs *,
+                           struct bch_replicas_entry *);
 bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
                          struct bch_devs_list, bool);
 bool bch2_bkey_replicas_marked(struct bch_fs *,
index 032b34a1ac44d5f99254a6b85e8b28de4f885973..1f343e64ca283d14fd3429856ca70be095130f3a 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/crc32c.h>
 #include <crypto/hash.h>
+#include <crypto/sha.h>
 
 struct bch_hash_info {
        u8                      type;
@@ -37,7 +38,7 @@ bch2_hash_info_init(struct bch_fs *c,
                break;
        case BCH_STR_HASH_SIPHASH: {
                SHASH_DESC_ON_STACK(desc, c->sha256);
-               u8 digest[crypto_shash_digestsize(c->sha256)];
+               u8 digest[SHA256_DIGEST_SIZE];
 
                desc->tfm = c->sha256;
                desc->flags = 0;
index 82a0bf0729b92de8303989ace335778b6547fdc0..a539f2a82a7325ad7c3c51161110a55d8de7c437 100644 (file)
@@ -406,6 +406,7 @@ static void bch2_fs_free(struct bch_fs *c)
        bch2_io_clock_exit(&c->io_clock[READ]);
        bch2_fs_compress_exit(c);
        percpu_free_rwsem(&c->mark_lock);
+       free_percpu(c->usage_scratch);
        free_percpu(c->usage[0]);
        free_percpu(c->pcpu);
        mempool_exit(&c->btree_iters_pool);
@@ -536,7 +537,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 {
        struct bch_sb_field_members *mi;
        struct bch_fs *c;
-       unsigned i, iter_size;
+       unsigned i, iter_size, fs_usage_size;
        const char *err;
 
        pr_verbose_init(opts, "");
@@ -630,6 +631,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                (btree_blocks(c) + 1) * 2 *
                sizeof(struct btree_node_iter_set);
 
+       fs_usage_size = sizeof(struct bch_fs_usage) +
+               sizeof(u64) * c->replicas.nr;
+
        if (!(c->wq = alloc_workqueue("bcachefs",
                                WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
            !(c->copygc_wq = alloc_workqueue("bcache_copygc",
@@ -644,7 +648,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                        max(offsetof(struct btree_read_bio, bio),
                            offsetof(struct btree_write_bio, wbio.bio)),
                        BIOSET_NEED_BVECS) ||
-           !(c->usage[0] = alloc_percpu(struct bch_fs_usage)) ||
+           !(c->usage[0] = __alloc_percpu(fs_usage_size, sizeof(u64))) ||
+           !(c->usage_scratch = __alloc_percpu(fs_usage_size, sizeof(u64))) ||
            !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
            mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
                                        btree_bytes(c)) ||
index 316f827fa4907a4ff7fd771062b14638e807e6be..2e6e9bd587ee4cf5a7408be0e237c2e25debdc59 100644 (file)
@@ -233,33 +233,34 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
 static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
 {
        struct printbuf out = _PBUF(buf, PAGE_SIZE);
-       struct bch_fs_usage stats = bch2_fs_usage_read(c);
-       unsigned replicas, type;
+       struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
+       unsigned replicas;
+
+       if (!fs_usage)
+               return -ENOMEM;
 
        pr_buf(&out, "capacity:\t\t%llu\n", c->capacity);
 
-       for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) {
+       for (replicas = 0;
+            replicas < ARRAY_SIZE(fs_usage->persistent_reserved);
+            replicas++) {
                pr_buf(&out, "%u replicas:\n", replicas + 1);
-
+#if 0
                for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
                        pr_buf(&out, "\t%s:\t\t%llu\n",
                               bch2_data_types[type],
                               stats.replicas[replicas].data[type]);
                pr_buf(&out, "\terasure coded:\t%llu\n",
                       stats.replicas[replicas].ec_data);
+#endif
                pr_buf(&out, "\treserved:\t%llu\n",
-                      stats.replicas[replicas].persistent_reserved);
+                      fs_usage->persistent_reserved[replicas]);
        }
 
-       pr_buf(&out, "bucket usage\n");
-
-       for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
-               pr_buf(&out, "\t%s:\t\t%llu\n",
-                      bch2_data_types[type],
-                      stats.buckets[type]);
-
        pr_buf(&out, "online reserved:\t%llu\n",
-              stats.s.online_reserved);
+              fs_usage->s.online_reserved);
+
+       kfree(fs_usage);
 
        return out.pos - buf;
 }
index 80d081861387fce850775422e33338571d9a9145..5c060e77fe0fef0cdb24f8381e8a12fe19dbcc49 100644 (file)
@@ -24,9 +24,6 @@
 #include "eytzinger.h"
 #include "util.h"
 
-#define simple_strtoint(c, end, base)  simple_strtol(c, end, base)
-#define simple_strtouint(c, end, base) simple_strtoul(c, end, base)
-
 static const char si_units[] = "?kMGTPEZY";
 
 static int __bch2_strtoh(const char *cp, u64 *res,
index 905b24f29d1d1ab3b52320eaab0b2b8de8ca074a..25d6750915e30661d7c588b29491e1fda99a54e3 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/sched/clock.h>
 #include <linux/llist.h>
 #include <linux/log2.h>
+#include <linux/percpu.h>
 #include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -61,13 +62,6 @@ struct closure;
 
 #endif
 
-#ifndef __CHECKER__
-#define __flatten __attribute__((flatten))
-#else
-/* sparse doesn't know about attribute((flatten)) */
-#define __flatten
-#endif
-
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define CPU_BIG_ENDIAN         0
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
@@ -704,4 +698,21 @@ do {                                                                       \
        }                                                               \
 } while (0)
 
+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
+{
+       unsigned i;
+
+       for (i = 0; i < nr; i++)
+               acc[i] += src[i];
+}
+
+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
+                                  unsigned nr)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
+}
+
 #endif /* _BCACHEFS_UTIL_H */
diff --git a/linux/crc64.c b/linux/crc64.c
new file mode 100644 (file)
index 0000000..0ef8ae6
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+       or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+       size_t i, t;
+
+       const unsigned char *_p = p;
+
+       for (i = 0; i < len; i++) {
+               t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+               crc = crc64table[t] ^ (crc << 8);
+       }
+
+       return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
diff --git a/linux/crc64table.h b/linux/crc64table.h
new file mode 100644 (file)
index 0000000..9964164
--- /dev/null
@@ -0,0 +1,135 @@
+/* this file is generated - do not edit */
+
+#include <linux/types.h>
+#include <linux/cache.h>
+
+static const u64 ____cacheline_aligned crc64table[256] = {
+       0x0000000000000000ULL,  0x42f0e1eba9ea3693ULL,
+       0x85e1c3d753d46d26ULL,  0xc711223cfa3e5bb5ULL,
+       0x493366450e42ecdfULL,  0x0bc387aea7a8da4cULL,
+       0xccd2a5925d9681f9ULL,  0x8e224479f47cb76aULL,
+       0x9266cc8a1c85d9beULL,  0xd0962d61b56fef2dULL,
+       0x17870f5d4f51b498ULL,  0x5577eeb6e6bb820bULL,
+       0xdb55aacf12c73561ULL,  0x99a54b24bb2d03f2ULL,
+       0x5eb4691841135847ULL,  0x1c4488f3e8f96ed4ULL,
+       0x663d78ff90e185efULL,  0x24cd9914390bb37cULL,
+       0xe3dcbb28c335e8c9ULL,  0xa12c5ac36adfde5aULL,
+       0x2f0e1eba9ea36930ULL,  0x6dfeff5137495fa3ULL,
+       0xaaefdd6dcd770416ULL,  0xe81f3c86649d3285ULL,
+       0xf45bb4758c645c51ULL,  0xb6ab559e258e6ac2ULL,
+       0x71ba77a2dfb03177ULL,  0x334a9649765a07e4ULL,
+       0xbd68d2308226b08eULL,  0xff9833db2bcc861dULL,
+       0x388911e7d1f2dda8ULL,  0x7a79f00c7818eb3bULL,
+       0xcc7af1ff21c30bdeULL,  0x8e8a101488293d4dULL,
+       0x499b3228721766f8ULL,  0x0b6bd3c3dbfd506bULL,
+       0x854997ba2f81e701ULL,  0xc7b97651866bd192ULL,
+       0x00a8546d7c558a27ULL,  0x4258b586d5bfbcb4ULL,
+       0x5e1c3d753d46d260ULL,  0x1cecdc9e94ace4f3ULL,
+       0xdbfdfea26e92bf46ULL,  0x990d1f49c77889d5ULL,
+       0x172f5b3033043ebfULL,  0x55dfbadb9aee082cULL,
+       0x92ce98e760d05399ULL,  0xd03e790cc93a650aULL,
+       0xaa478900b1228e31ULL,  0xe8b768eb18c8b8a2ULL,
+       0x2fa64ad7e2f6e317ULL,  0x6d56ab3c4b1cd584ULL,
+       0xe374ef45bf6062eeULL,  0xa1840eae168a547dULL,
+       0x66952c92ecb40fc8ULL,  0x2465cd79455e395bULL,
+       0x3821458aada7578fULL,  0x7ad1a461044d611cULL,
+       0xbdc0865dfe733aa9ULL,  0xff3067b657990c3aULL,
+       0x711223cfa3e5bb50ULL,  0x33e2c2240a0f8dc3ULL,
+       0xf4f3e018f031d676ULL,  0xb60301f359dbe0e5ULL,
+       0xda050215ea6c212fULL,  0x98f5e3fe438617bcULL,
+       0x5fe4c1c2b9b84c09ULL,  0x1d14202910527a9aULL,
+       0x93366450e42ecdf0ULL,  0xd1c685bb4dc4fb63ULL,
+       0x16d7a787b7faa0d6ULL,  0x5427466c1e109645ULL,
+       0x4863ce9ff6e9f891ULL,  0x0a932f745f03ce02ULL,
+       0xcd820d48a53d95b7ULL,  0x8f72eca30cd7a324ULL,
+       0x0150a8daf8ab144eULL,  0x43a04931514122ddULL,
+       0x84b16b0dab7f7968ULL,  0xc6418ae602954ffbULL,
+       0xbc387aea7a8da4c0ULL,  0xfec89b01d3679253ULL,
+       0x39d9b93d2959c9e6ULL,  0x7b2958d680b3ff75ULL,
+       0xf50b1caf74cf481fULL,  0xb7fbfd44dd257e8cULL,
+       0x70eadf78271b2539ULL,  0x321a3e938ef113aaULL,
+       0x2e5eb66066087d7eULL,  0x6cae578bcfe24bedULL,
+       0xabbf75b735dc1058ULL,  0xe94f945c9c3626cbULL,
+       0x676dd025684a91a1ULL,  0x259d31cec1a0a732ULL,
+       0xe28c13f23b9efc87ULL,  0xa07cf2199274ca14ULL,
+       0x167ff3eacbaf2af1ULL,  0x548f120162451c62ULL,
+       0x939e303d987b47d7ULL,  0xd16ed1d631917144ULL,
+       0x5f4c95afc5edc62eULL,  0x1dbc74446c07f0bdULL,
+       0xdaad56789639ab08ULL,  0x985db7933fd39d9bULL,
+       0x84193f60d72af34fULL,  0xc6e9de8b7ec0c5dcULL,
+       0x01f8fcb784fe9e69ULL,  0x43081d5c2d14a8faULL,
+       0xcd2a5925d9681f90ULL,  0x8fdab8ce70822903ULL,
+       0x48cb9af28abc72b6ULL,  0x0a3b7b1923564425ULL,
+       0x70428b155b4eaf1eULL,  0x32b26afef2a4998dULL,
+       0xf5a348c2089ac238ULL,  0xb753a929a170f4abULL,
+       0x3971ed50550c43c1ULL,  0x7b810cbbfce67552ULL,
+       0xbc902e8706d82ee7ULL,  0xfe60cf6caf321874ULL,
+       0xe224479f47cb76a0ULL,  0xa0d4a674ee214033ULL,
+       0x67c58448141f1b86ULL,  0x253565a3bdf52d15ULL,
+       0xab1721da49899a7fULL,  0xe9e7c031e063acecULL,
+       0x2ef6e20d1a5df759ULL,  0x6c0603e6b3b7c1caULL,
+       0xf6fae5c07d3274cdULL,  0xb40a042bd4d8425eULL,
+       0x731b26172ee619ebULL,  0x31ebc7fc870c2f78ULL,
+       0xbfc9838573709812ULL,  0xfd39626eda9aae81ULL,
+       0x3a28405220a4f534ULL,  0x78d8a1b9894ec3a7ULL,
+       0x649c294a61b7ad73ULL,  0x266cc8a1c85d9be0ULL,
+       0xe17dea9d3263c055ULL,  0xa38d0b769b89f6c6ULL,
+       0x2daf4f0f6ff541acULL,  0x6f5faee4c61f773fULL,
+       0xa84e8cd83c212c8aULL,  0xeabe6d3395cb1a19ULL,
+       0x90c79d3fedd3f122ULL,  0xd2377cd44439c7b1ULL,
+       0x15265ee8be079c04ULL,  0x57d6bf0317edaa97ULL,
+       0xd9f4fb7ae3911dfdULL,  0x9b041a914a7b2b6eULL,
+       0x5c1538adb04570dbULL,  0x1ee5d94619af4648ULL,
+       0x02a151b5f156289cULL,  0x4051b05e58bc1e0fULL,
+       0x87409262a28245baULL,  0xc5b073890b687329ULL,
+       0x4b9237f0ff14c443ULL,  0x0962d61b56fef2d0ULL,
+       0xce73f427acc0a965ULL,  0x8c8315cc052a9ff6ULL,
+       0x3a80143f5cf17f13ULL,  0x7870f5d4f51b4980ULL,
+       0xbf61d7e80f251235ULL,  0xfd913603a6cf24a6ULL,
+       0x73b3727a52b393ccULL,  0x31439391fb59a55fULL,
+       0xf652b1ad0167feeaULL,  0xb4a25046a88dc879ULL,
+       0xa8e6d8b54074a6adULL,  0xea16395ee99e903eULL,
+       0x2d071b6213a0cb8bULL,  0x6ff7fa89ba4afd18ULL,
+       0xe1d5bef04e364a72ULL,  0xa3255f1be7dc7ce1ULL,
+       0x64347d271de22754ULL,  0x26c49cccb40811c7ULL,
+       0x5cbd6cc0cc10fafcULL,  0x1e4d8d2b65facc6fULL,
+       0xd95caf179fc497daULL,  0x9bac4efc362ea149ULL,
+       0x158e0a85c2521623ULL,  0x577eeb6e6bb820b0ULL,
+       0x906fc95291867b05ULL,  0xd29f28b9386c4d96ULL,
+       0xcedba04ad0952342ULL,  0x8c2b41a1797f15d1ULL,
+       0x4b3a639d83414e64ULL,  0x09ca82762aab78f7ULL,
+       0x87e8c60fded7cf9dULL,  0xc51827e4773df90eULL,
+       0x020905d88d03a2bbULL,  0x40f9e43324e99428ULL,
+       0x2cffe7d5975e55e2ULL,  0x6e0f063e3eb46371ULL,
+       0xa91e2402c48a38c4ULL,  0xebeec5e96d600e57ULL,
+       0x65cc8190991cb93dULL,  0x273c607b30f68faeULL,
+       0xe02d4247cac8d41bULL,  0xa2dda3ac6322e288ULL,
+       0xbe992b5f8bdb8c5cULL,  0xfc69cab42231bacfULL,
+       0x3b78e888d80fe17aULL,  0x7988096371e5d7e9ULL,
+       0xf7aa4d1a85996083ULL,  0xb55aacf12c735610ULL,
+       0x724b8ecdd64d0da5ULL,  0x30bb6f267fa73b36ULL,
+       0x4ac29f2a07bfd00dULL,  0x08327ec1ae55e69eULL,
+       0xcf235cfd546bbd2bULL,  0x8dd3bd16fd818bb8ULL,
+       0x03f1f96f09fd3cd2ULL,  0x41011884a0170a41ULL,
+       0x86103ab85a2951f4ULL,  0xc4e0db53f3c36767ULL,
+       0xd8a453a01b3a09b3ULL,  0x9a54b24bb2d03f20ULL,
+       0x5d45907748ee6495ULL,  0x1fb5719ce1045206ULL,
+       0x919735e51578e56cULL,  0xd367d40ebc92d3ffULL,
+       0x1476f63246ac884aULL,  0x568617d9ef46bed9ULL,
+       0xe085162ab69d5e3cULL,  0xa275f7c11f7768afULL,
+       0x6564d5fde549331aULL,  0x279434164ca30589ULL,
+       0xa9b6706fb8dfb2e3ULL,  0xeb46918411358470ULL,
+       0x2c57b3b8eb0bdfc5ULL,  0x6ea7525342e1e956ULL,
+       0x72e3daa0aa188782ULL,  0x30133b4b03f2b111ULL,
+       0xf7021977f9cceaa4ULL,  0xb5f2f89c5026dc37ULL,
+       0x3bd0bce5a45a6b5dULL,  0x79205d0e0db05dceULL,
+       0xbe317f32f78e067bULL,  0xfcc19ed95e6430e8ULL,
+       0x86b86ed5267cdbd3ULL,  0xc4488f3e8f96ed40ULL,
+       0x0359ad0275a8b6f5ULL,  0x41a94ce9dc428066ULL,
+       0xcf8b0890283e370cULL,  0x8d7be97b81d4019fULL,
+       0x4a6acb477bea5a2aULL,  0x089a2aacd2006cb9ULL,
+       0x14dea25f3af9026dULL,  0x562e43b4931334feULL,
+       0x913f6188692d6f4bULL,  0xd3cf8063c0c759d8ULL,
+       0x5dedc41a34bbeeb2ULL,  0x1f1d25f19d51d821ULL,
+       0xd80c07cd676f8394ULL,  0x9afce626ce85b507ULL,
+};
similarity index 94%
rename from libbcachefs/six.c
rename to linux/six.c
index afa59a476a704d81679635613fad24f1d49519da..aceeabb039206f2a54c615e66bfe95520a9e9780 100644 (file)
@@ -1,11 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
 
+#include <linux/export.h>
 #include <linux/log2.h>
 #include <linux/preempt.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/six.h>
 
-#include "six.h"
+#ifdef DEBUG
+#define EBUG_ON(cond)          BUG_ON(cond)
+#else
+#define EBUG_ON(cond)          do {} while (0)
+#endif
 
 #define six_acquire(l, t)      lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
 #define six_release(l)         lock_release(l, 0, _RET_IP_)
@@ -401,21 +408,25 @@ bool six_trylock_##type(struct six_lock *lock)                            \
 {                                                                      \
        return __six_trylock_type(lock, SIX_LOCK_##type);               \
 }                                                                      \
+EXPORT_SYMBOL_GPL(six_trylock_##type);                                 \
                                                                        \
 bool six_relock_##type(struct six_lock *lock, u32 seq)                 \
 {                                                                      \
        return __six_relock_type(lock, SIX_LOCK_##type, seq);           \
 }                                                                      \
+EXPORT_SYMBOL_GPL(six_relock_##type);                                  \
                                                                        \
 void six_lock_##type(struct six_lock *lock)                            \
 {                                                                      \
        __six_lock_type(lock, SIX_LOCK_##type);                         \
 }                                                                      \
+EXPORT_SYMBOL_GPL(six_lock_##type);                                    \
                                                                        \
 void six_unlock_##type(struct six_lock *lock)                          \
 {                                                                      \
        __six_unlock_type(lock, SIX_LOCK_##type);                       \
-}
+}                                                                      \
+EXPORT_SYMBOL_GPL(six_unlock_##type);
 
 __SIX_LOCK(read)
 __SIX_LOCK(intent)
@@ -429,6 +440,7 @@ bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
 {
        return __six_trylock_type(lock, type);
 }
+EXPORT_SYMBOL_GPL(six_trylock_type);
 
 bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
                     unsigned seq)
@@ -436,16 +448,19 @@ bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
        return __six_relock_type(lock, type, seq);
 
 }
+EXPORT_SYMBOL_GPL(six_relock_type);
 
 void six_lock_type(struct six_lock *lock, enum six_lock_type type)
 {
        __six_lock_type(lock, type);
 }
+EXPORT_SYMBOL_GPL(six_lock_type);
 
 void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
 {
        __six_unlock_type(lock, type);
 }
+EXPORT_SYMBOL_GPL(six_unlock_type);
 
 #endif
 
@@ -455,6 +470,7 @@ void six_lock_downgrade(struct six_lock *lock)
        six_lock_increment(lock, SIX_LOCK_read);
        six_unlock_intent(lock);
 }
+EXPORT_SYMBOL_GPL(six_lock_downgrade);
 
 bool six_lock_tryupgrade(struct six_lock *lock)
 {
@@ -481,6 +497,7 @@ bool six_lock_tryupgrade(struct six_lock *lock)
 
        return true;
 }
+EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
 
 bool six_trylock_convert(struct six_lock *lock,
                         enum six_lock_type from,
@@ -498,6 +515,7 @@ bool six_trylock_convert(struct six_lock *lock,
                return six_lock_tryupgrade(lock);
        }
 }
+EXPORT_SYMBOL_GPL(six_trylock_convert);
 
 /*
  * Increment read/intent lock count, assuming we already have it read or intent
@@ -514,3 +532,4 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
 
        atomic64_add(l[type].lock_val, &lock->state.counter);
 }
+EXPORT_SYMBOL_GPL(six_lock_increment);