From 47bd483d27ec13418978b24ec5951661d564ba35 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Jan 2019 20:36:38 -0500 Subject: [PATCH] Update bcachefs sources to ef60854e99 bcachefs: More allocator startup improvements --- .bcachefs_revision | 2 +- cmd_migrate.c | 3 +- include/crypto/sha.h | 115 ++++++++++ include/crypto/skcipher.h | 32 ++- include/linux/compiler.h | 1 + include/linux/crc64.h | 11 + {libbcachefs => include/linux}/six.h | 10 +- libbcachefs/alloc_background.c | 138 +++++++----- libbcachefs/alloc_background.h | 2 +- libbcachefs/alloc_foreground.c | 1 + libbcachefs/bcachefs.h | 7 +- libbcachefs/btree_cache.c | 4 + libbcachefs/btree_gc.c | 143 ++++++------- libbcachefs/btree_io.c | 12 +- libbcachefs/btree_io.h | 53 +++-- libbcachefs/btree_iter.h | 1 + libbcachefs/btree_locking.h | 4 +- libbcachefs/btree_types.h | 2 +- libbcachefs/btree_update_interior.c | 40 ++-- libbcachefs/buckets.c | 307 +++++++++++---------------- libbcachefs/buckets.h | 15 +- libbcachefs/buckets_types.h | 14 +- libbcachefs/chardev.c | 18 +- libbcachefs/checksum.c | 149 ++----------- libbcachefs/checksum.h | 6 +- libbcachefs/ec_types.h | 7 + libbcachefs/fs-io.c | 9 +- libbcachefs/fs-ioctl.c | 2 +- libbcachefs/replicas.c | 162 ++++++++++++-- libbcachefs/replicas.h | 3 + libbcachefs/str_hash.h | 3 +- libbcachefs/super.c | 9 +- libbcachefs/sysfs.c | 27 +-- libbcachefs/util.c | 3 - libbcachefs/util.h | 25 ++- linux/crc64.c | 56 +++++ linux/crc64table.h | 135 ++++++++++++ {libbcachefs => linux}/six.c | 23 +- 38 files changed, 975 insertions(+), 579 deletions(-) create mode 100644 include/crypto/sha.h create mode 100644 include/linux/crc64.h rename {libbcachefs => include/linux}/six.h (98%) create mode 100644 linux/crc64.c create mode 100644 linux/crc64table.h rename {libbcachefs => linux}/six.c (94%) diff --git a/.bcachefs_revision b/.bcachefs_revision index 66897e5..3bcc585 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -2724e115d243043ee62d78883bec4035651d74ab +ef60854e9912d24c0ba83e0760552c98257d2b07 diff --git a/cmd_migrate.c b/cmd_migrate.c index 2d82d15..e9594ab 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -614,7 +614,8 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path, darray_free(s.extents); genradix_free(&s.hardlinks); - bch2_alloc_write(c); + bool wrote; + bch2_alloc_write(c, false, &wrote); } static void find_superblock_space(ranges extents, struct dev_opts *dev) diff --git a/include/crypto/sha.h b/include/crypto/sha.h new file mode 100644 index 0000000..8a46202 --- /dev/null +++ b/include/crypto/sha.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values for SHA algorithms + */ + +#ifndef _CRYPTO_SHA_H +#define _CRYPTO_SHA_H + +#include + +#define SHA1_DIGEST_SIZE 20 +#define SHA1_BLOCK_SIZE 64 + +#define SHA224_DIGEST_SIZE 28 +#define SHA224_BLOCK_SIZE 64 + +#define SHA256_DIGEST_SIZE 32 +#define SHA256_BLOCK_SIZE 64 + +#define SHA384_DIGEST_SIZE 48 +#define SHA384_BLOCK_SIZE 128 + +#define SHA512_DIGEST_SIZE 64 +#define SHA512_BLOCK_SIZE 128 + +#define SHA1_H0 0x67452301UL +#define SHA1_H1 0xefcdab89UL +#define SHA1_H2 0x98badcfeUL +#define SHA1_H3 0x10325476UL +#define SHA1_H4 0xc3d2e1f0UL + +#define SHA224_H0 0xc1059ed8UL +#define SHA224_H1 0x367cd507UL +#define SHA224_H2 0x3070dd17UL +#define SHA224_H3 0xf70e5939UL +#define SHA224_H4 0xffc00b31UL +#define SHA224_H5 0x68581511UL +#define SHA224_H6 0x64f98fa7UL +#define SHA224_H7 0xbefa4fa4UL + +#define SHA256_H0 0x6a09e667UL +#define SHA256_H1 0xbb67ae85UL +#define SHA256_H2 0x3c6ef372UL +#define SHA256_H3 0xa54ff53aUL +#define SHA256_H4 0x510e527fUL +#define SHA256_H5 0x9b05688cUL +#define SHA256_H6 0x1f83d9abUL +#define SHA256_H7 0x5be0cd19UL + +#define SHA384_H0 0xcbbb9d5dc1059ed8ULL +#define SHA384_H1 0x629a292a367cd507ULL +#define SHA384_H2 0x9159015a3070dd17ULL +#define SHA384_H3 0x152fecd8f70e5939ULL +#define SHA384_H4 0x67332667ffc00b31ULL +#define SHA384_H5 0x8eb44a8768581511ULL +#define SHA384_H6 0xdb0c2e0d64f98fa7ULL +#define SHA384_H7 0x47b5481dbefa4fa4ULL + +#define SHA512_H0 0x6a09e667f3bcc908ULL +#define SHA512_H1 0xbb67ae8584caa73bULL +#define SHA512_H2 0x3c6ef372fe94f82bULL +#define SHA512_H3 0xa54ff53a5f1d36f1ULL +#define SHA512_H4 0x510e527fade682d1ULL +#define SHA512_H5 0x9b05688c2b3e6c1fULL +#define SHA512_H6 0x1f83d9abfb41bd6bULL +#define SHA512_H7 0x5be0cd19137e2179ULL + +extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE]; + +extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE]; + +extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE]; + +extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; + +extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; + +struct sha1_state { + u32 state[SHA1_DIGEST_SIZE / 4]; + u64 count; + u8 buffer[SHA1_BLOCK_SIZE]; +}; + +struct sha256_state { + u32 state[SHA256_DIGEST_SIZE / 4]; + u64 count; + u8 buf[SHA256_BLOCK_SIZE]; +}; + +struct sha512_state { + u64 state[SHA512_DIGEST_SIZE / 8]; + u64 count[2]; + u8 buf[SHA512_BLOCK_SIZE]; +}; + +struct shash_desc; + +extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); + +extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); + +extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); +#endif diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index c9e887c..5989855 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -36,14 +36,29 @@ struct crypto_skcipher { struct crypto_tfm base; }; +struct crypto_sync_skcipher { + struct crypto_skcipher base; +}; + struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask); +static inline struct crypto_sync_skcipher * +crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask) +{ + return (void *) crypto_alloc_skcipher(alg_name, type, mask); +} + static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) { kfree(tfm); } +static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm) +{ + crypto_free_skcipher(&tfm->base); +} + struct skcipher_request { unsigned cryptlen; u8 *iv; @@ -54,9 +69,14 @@ struct skcipher_request { struct crypto_tfm *tfm; }; -#define SKCIPHER_REQUEST_ON_STACK(name, tfm) \ - struct skcipher_request __##name##_desc; \ - struct skcipher_request *name = &__##name##_desc +#define MAX_SYNC_SKCIPHER_REQSIZE 384 +#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \ + char __##name##_desc[sizeof(struct skcipher_request) + \ + MAX_SYNC_SKCIPHER_REQSIZE + \ + (!(sizeof((struct crypto_sync_skcipher *)1 == \ + (typeof(tfm))1))) \ + ] CRYPTO_MINALIGN_ATTR; \ + struct skcipher_request *name = (void *)__##name##_desc static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -86,6 +106,12 @@ static inline void skcipher_request_set_tfm(struct skcipher_request *req, req->tfm = &tfm->base; } +static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req, + struct crypto_sync_skcipher *tfm) +{ + skcipher_request_set_tfm(req, &tfm->base); +} + static inline void skcipher_request_set_crypt( struct skcipher_request *req, struct scatterlist *src, struct scatterlist *dst, diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 915a6f8..2bfbfad 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -34,6 +34,7 @@ #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) #define __packed __attribute__((__packed__)) +#define __flatten __attribute__((flatten)) #define __force #define __nocast #define __iomem diff --git a/include/linux/crc64.h b/include/linux/crc64.h new file mode 100644 index 0000000..c756e65 --- /dev/null +++ b/include/linux/crc64.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * See lib/crc64.c for the related specification and polynomial arithmetic. + */ +#ifndef _LINUX_CRC64_H +#define _LINUX_CRC64_H + +#include + +u64 __pure crc64_be(u64 crc, const void *p, size_t len); +#endif /* _LINUX_CRC64_H */ diff --git a/libbcachefs/six.h b/include/linux/six.h similarity index 98% rename from libbcachefs/six.h rename to include/linux/six.h index 999c49d..40e213f 100644 --- a/libbcachefs/six.h +++ b/include/linux/six.h @@ -1,5 +1,7 @@ -#ifndef _BCACHEFS_SIX_H -#define _BCACHEFS_SIX_H +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _LINUX_SIX_H +#define _LINUX_SIX_H /* * Shared/intent/exclusive locks: sleepable read/write locks, much like rw @@ -61,8 +63,6 @@ #include #include -#include "util.h" - #define SIX_LOCK_SEPARATE_LOCKFNS union six_lock_state { @@ -227,4 +227,4 @@ bool six_trylock_convert(struct six_lock *, enum six_lock_type, void six_lock_increment(struct six_lock *, enum six_lock_type); -#endif /* _BCACHEFS_SIX_H */ +#endif /* _LINUX_SIX_H */ diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 955caa2..6de6e26 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -272,12 +272,19 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, size_t b, struct btree_iter *iter, u64 *journal_seq, unsigned flags) { +#if 0 __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key; +#else + /* hack: */ + __BKEY_PADDED(k, 8) alloc_key; +#endif struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k); struct bucket *g; struct bucket_mark m; int ret; + BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); + a->k.p = POS(ca->dev_idx, b); percpu_down_read_preempt_disable(&c->mark_lock); @@ -339,12 +346,14 @@ err: return ret; } -int bch2_alloc_write(struct bch_fs *c) +int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote) { struct bch_dev *ca; unsigned i; int ret = 0; + *wrote = false; + for_each_rw_member(ca, c, i) { struct btree_iter iter; struct bucket_array *buckets; @@ -362,9 +371,14 @@ int bch2_alloc_write(struct bch_fs *c) if (!buckets->b[b].mark.dirty) continue; - ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, 0); + ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, + nowait + ? BTREE_INSERT_NOWAIT + : 0); if (ret) break; + + *wrote = true; } up_read(&ca->bucket_lock); bch2_btree_iter_unlock(&iter); @@ -1262,20 +1276,23 @@ static void flush_held_btree_writes(struct bch_fs *c) struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; - bool flush_updates; - size_t i, nr_pending_updates; + bool nodes_blocked; + size_t i; + struct closure cl; + + closure_init_stack(&cl); clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); again: pr_debug("flushing dirty btree nodes"); cond_resched(); + closure_wait(&c->btree_interior_update_wait, &cl); - flush_updates = false; - nr_pending_updates = bch2_btree_interior_updates_nr_pending(c); + nodes_blocked = false; rcu_read_lock(); for_each_cached_btree(b, c, tbl, i, pos) - if (btree_node_dirty(b) && (!b->written || b->level)) { + if (btree_node_need_write(b)) { if (btree_node_may_write(b)) { rcu_read_unlock(); btree_node_lock_type(c, b, SIX_LOCK_read); @@ -1283,7 +1300,7 @@ again: six_unlock_read(&b->lock); goto again; } else { - flush_updates = true; + nodes_blocked = true; } } rcu_read_unlock(); @@ -1291,17 +1308,16 @@ again: if (c->btree_roots_dirty) bch2_journal_meta(&c->journal); - /* - * This is ugly, but it's needed to flush btree node writes - * without spinning... - */ - if (flush_updates) { - closure_wait_event(&c->btree_interior_update_wait, - bch2_btree_interior_updates_nr_pending(c) < - nr_pending_updates); + if (nodes_blocked) { + closure_sync(&cl); goto again; } + closure_wake_up(&c->btree_interior_update_wait); + closure_sync(&cl); + + closure_wait_event(&c->btree_interior_update_wait, + !bch2_btree_interior_updates_nr_pending(c)); } static void allocator_start_issue_discards(struct bch_fs *c) @@ -1323,13 +1339,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) unsigned dev_iter; u64 journal_seq = 0; long bu; - bool invalidating_data = false; int ret = 0; - if (test_alloc_startup(c)) { - invalidating_data = true; + if (test_alloc_startup(c)) goto not_enough; - } /* Scan for buckets that are already invalidated: */ for_each_rw_member(ca, c, dev_iter) { @@ -1376,21 +1389,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) not_enough: pr_debug("not enough empty buckets; scanning for reclaimable buckets"); - for_each_rw_member(ca, c, dev_iter) { - find_reclaimable_buckets(c, ca); - - while (!fifo_full(&ca->free[RESERVE_BTREE]) && - (bu = next_alloc_bucket(ca)) >= 0) { - invalidating_data |= - bch2_invalidate_one_bucket(c, ca, bu, &journal_seq); - - fifo_push(&ca->free[RESERVE_BTREE], bu); - bucket_set_dirty(ca, bu); - } - } - - pr_debug("done scanning for reclaimable buckets"); - /* * We're moving buckets to freelists _before_ they've been marked as * invalidated on disk - we have to so that we can allocate new btree @@ -1400,38 +1398,59 @@ not_enough: * have cached data in them, which is live until they're marked as * invalidated on disk: */ - if (invalidating_data) { - pr_debug("invalidating existing data"); - set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); - } else { - pr_debug("issuing discards"); - allocator_start_issue_discards(c); - } + set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); - /* - * XXX: it's possible for this to deadlock waiting on journal reclaim, - * since we're holding btree writes. What then? - */ - ret = bch2_alloc_write(c); - if (ret) - return ret; + while (1) { + bool wrote = false; - if (invalidating_data) { - pr_debug("flushing journal"); + for_each_rw_member(ca, c, dev_iter) { + find_reclaimable_buckets(c, ca); - ret = bch2_journal_flush_seq(&c->journal, journal_seq); - if (ret) - return ret; + while (!fifo_full(&ca->free[RESERVE_BTREE]) && + (bu = next_alloc_bucket(ca)) >= 0) { + bch2_invalidate_one_bucket(c, ca, bu, + &journal_seq); + + fifo_push(&ca->free[RESERVE_BTREE], bu); + bucket_set_dirty(ca, bu); + } + } + + pr_debug("done scanning for reclaimable buckets"); + + /* + * XXX: it's possible for this to deadlock waiting on journal reclaim, + * since we're holding btree writes. What then? + */ + ret = bch2_alloc_write(c, true, &wrote); - pr_debug("issuing discards"); - allocator_start_issue_discards(c); + /* + * If bch2_alloc_write() did anything, it may have used some + * buckets, and we need the RESERVE_BTREE freelist full - so we + * need to loop and scan again. + * And if it errored, it may have been because there weren't + * enough buckets, so just scan and loop again as long as it + * made some progress: + */ + if (!wrote && ret) + return ret; + if (!wrote && !ret) + break; } + pr_debug("flushing journal"); + + ret = bch2_journal_flush(&c->journal); + if (ret) + return ret; + + pr_debug("issuing discards"); + allocator_start_issue_discards(c); + set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags); /* now flush dirty btree nodes: */ - if (invalidating_data) - flush_held_btree_writes(c); + flush_held_btree_writes(c); return 0; } @@ -1440,6 +1459,7 @@ int bch2_fs_allocator_start(struct bch_fs *c) { struct bch_dev *ca; unsigned i; + bool wrote; int ret; down_read(&c->gc_lock); @@ -1457,7 +1477,7 @@ int bch2_fs_allocator_start(struct bch_fs *c) } } - return bch2_alloc_write(c); + return bch2_alloc_write(c, false, &wrote); } void bch2_fs_allocator_background_init(struct bch_fs *c) diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index b382c8b..a0c08e3 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -54,7 +54,7 @@ void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_stop(struct bch_dev *); int bch2_dev_allocator_start(struct bch_dev *); -int bch2_alloc_write(struct bch_fs *); +int bch2_alloc_write(struct bch_fs *, bool, bool *); int bch2_fs_allocator_start(struct bch_fs *); void bch2_fs_allocator_background_init(struct bch_fs *); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 596d3bc..14e6453 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -288,6 +288,7 @@ out: ob->valid = true; ob->sectors_free = ca->mi.bucket_size; ob->ptr = (struct bch_extent_ptr) { + .type = 1 << BCH_EXTENT_ENTRY_ptr, .gen = buckets->b[bucket].mark.gen, .offset = bucket_to_sector(ca, bucket), .dev = ca->dev_idx, diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 3056f3b..449eb0c 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -618,10 +618,11 @@ struct bch_fs { struct bch_fs_pcpu __percpu *pcpu; - struct bch_fs_usage __percpu *usage[2]; - struct percpu_rw_semaphore mark_lock; + struct bch_fs_usage __percpu *usage[2]; + struct bch_fs_usage __percpu *usage_scratch; + /* * When we invalidate buckets, we use both the priority and the amount * of good data to determine which buckets to reuse first - to weight @@ -685,7 +686,7 @@ struct bch_fs { ZSTD_parameters zstd_params; struct crypto_shash *sha256; - struct crypto_skcipher *chacha20; + struct crypto_sync_skcipher *chacha20; struct crypto_shash *poly1305; atomic64_t key_version; diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index d99441a..f77dc20 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -169,6 +169,10 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) if (!btree_node_may_write(b)) goto out_unlock; + if (btree_node_dirty(b) && + test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) + goto out_unlock; + if (btree_node_dirty(b) || btree_node_write_in_flight(b) || btree_node_read_in_flight(b)) { diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 9f5a79a..23013fb 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -475,29 +475,43 @@ static void bch2_gc_free(struct bch_fs *c) ca->usage[1] = NULL; } + percpu_down_write(&c->mark_lock); + free_percpu(c->usage[1]); c->usage[1] = NULL; -} -static void fs_usage_reset(struct bch_fs_usage *fs_usage) -{ - memset(&fs_usage->s.gc_start[0], 0, - sizeof(*fs_usage) - offsetof(typeof(*fs_usage), s.gc_start)); + percpu_up_write(&c->mark_lock); } -static void fs_usage_cpy(struct bch_fs_usage *dst, - struct bch_fs_usage *src) +/* + * Accumulate percpu counters onto one cpu's copy - only valid when access + * against any percpu counter is guarded against + */ +static u64 *acc_percpu_u64s(u64 __percpu *p, unsigned nr) { - memcpy(&dst->s.gc_start[0], - &src->s.gc_start[0], - sizeof(*dst) - offsetof(typeof(*dst), s.gc_start)); + u64 *ret; + int cpu; + + preempt_disable(); + ret = this_cpu_ptr(p); + preempt_enable(); + + for_each_possible_cpu(cpu) { + u64 *i = per_cpu_ptr(p, cpu); + + if (i != ret) { + acc_u64s(ret, i, nr); + memset(i, 0, nr * sizeof(u64)); + } + } + + return ret; } static void bch2_gc_done_nocheck(struct bch_fs *c) { struct bch_dev *ca; unsigned i; - int cpu; { struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0); @@ -527,42 +541,39 @@ static void bch2_gc_done_nocheck(struct bch_fs *c) }; for_each_member_device(ca, c, i) { - struct bch_dev_usage *p; + unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64); + struct bch_dev_usage *dst = (void *) + acc_percpu_u64s((void *) ca->usage[0], nr); + struct bch_dev_usage *src = (void *) + acc_percpu_u64s((void *) ca->usage[1], nr); - for_each_possible_cpu(cpu) { - p = per_cpu_ptr(ca->usage[0], cpu); - memset(p, 0, sizeof(*p)); - } - - preempt_disable(); - *this_cpu_ptr(ca->usage[0]) = __bch2_dev_usage_read(ca, 1); - preempt_enable(); + *dst = *src; } { - struct bch_fs_usage src = __bch2_fs_usage_read(c, 1); - - for_each_possible_cpu(cpu) - fs_usage_reset(per_cpu_ptr(c->usage[0], cpu)); - - preempt_disable(); - fs_usage_cpy(this_cpu_ptr(c->usage[0]), &src); - preempt_enable(); + unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) + + c->replicas.nr; + struct bch_fs_usage *dst = (void *) + acc_percpu_u64s((void *) c->usage[0], nr); + struct bch_fs_usage *src = (void *) + acc_percpu_u64s((void *) c->usage[1], nr); + + memcpy(&dst->s.gc_start[0], + &src->s.gc_start[0], + nr * sizeof(u64) - offsetof(typeof(*dst), s.gc_start)); } - } static void bch2_gc_done(struct bch_fs *c, bool initial) { struct bch_dev *ca; unsigned i; - int cpu; #define copy_field(_f, _msg, ...) \ - if (dst._f != src._f) { \ - bch_err(c, _msg ": got %llu, should be %llu, fixing"\ - , ##__VA_ARGS__, dst._f, src._f); \ - dst._f = src._f; \ + if (dst->_f != src->_f) { \ + bch_err(c, _msg ": got %llu, should be %llu, fixing" \ + , ##__VA_ARGS__, dst->_f, src->_f); \ + dst->_f = src->_f; \ } #define copy_stripe_field(_f, _msg, ...) \ if (dst->_f != src->_f) { \ @@ -643,9 +654,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) }; for_each_member_device(ca, c, i) { - struct bch_dev_usage dst = __bch2_dev_usage_read(ca, 0); - struct bch_dev_usage src = __bch2_dev_usage_read(ca, 1); - struct bch_dev_usage *p; + unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64); + struct bch_dev_usage *dst = (void *) + acc_percpu_u64s((void *) ca->usage[0], nr); + struct bch_dev_usage *src = (void *) + acc_percpu_u64s((void *) ca->usage[1], nr); unsigned b; for (b = 0; b < BCH_DATA_NR; b++) @@ -659,22 +672,15 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) "sectors[%s]", bch2_data_types[b]); copy_dev_field(sectors_fragmented, "sectors_fragmented"); - - for_each_possible_cpu(cpu) { - p = per_cpu_ptr(ca->usage[0], cpu); - memset(p, 0, sizeof(*p)); - } - - preempt_disable(); - p = this_cpu_ptr(ca->usage[0]); - *p = dst; - preempt_enable(); } { - struct bch_fs_usage dst = __bch2_fs_usage_read(c, 0); - struct bch_fs_usage src = __bch2_fs_usage_read(c, 1); - unsigned r, b; + unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) + + c->replicas.nr; + struct bch_fs_usage *dst = (void *) + acc_percpu_u64s((void *) c->usage[0], nr); + struct bch_fs_usage *src = (void *) + acc_percpu_u64s((void *) c->usage[1], nr); copy_fs_field(s.hidden, "hidden"); copy_fs_field(s.data, "data"); @@ -682,27 +688,16 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) copy_fs_field(s.reserved, "reserved"); copy_fs_field(s.nr_inodes, "nr_inodes"); - for (r = 0; r < BCH_REPLICAS_MAX; r++) { - for (b = 0; b < BCH_DATA_NR; b++) - copy_fs_field(replicas[r].data[b], - "replicas[%i].data[%s]", - r, bch2_data_types[b]); - copy_fs_field(replicas[r].ec_data, - "replicas[%i].ec_data", r); - copy_fs_field(replicas[r].persistent_reserved, - "replicas[%i].persistent_reserved", r); - } - - for (b = 0; b < BCH_DATA_NR; b++) - copy_fs_field(buckets[b], - "buckets[%s]", bch2_data_types[b]); + for (i = 0; i < BCH_REPLICAS_MAX; i++) + copy_fs_field(persistent_reserved[i], + "persistent_reserved[%i]", i); - for_each_possible_cpu(cpu) - fs_usage_reset(per_cpu_ptr(c->usage[0], cpu)); - - preempt_disable(); - fs_usage_cpy(this_cpu_ptr(c->usage[0]), &dst); - preempt_enable(); + for (i = 0; i < c->replicas.nr; i++) { + /* + * XXX: print out replicas entry + */ + copy_fs_field(data[i], "data[%i]", i); + } } out: percpu_up_write(&c->mark_lock); @@ -725,9 +720,15 @@ static int bch2_gc_start(struct bch_fs *c) */ gc_pos_set(c, gc_phase(GC_PHASE_START)); + percpu_down_write(&c->mark_lock); BUG_ON(c->usage[1]); - c->usage[1] = alloc_percpu(struct bch_fs_usage); + c->usage[1] = __alloc_percpu_gfp(sizeof(struct bch_fs_usage) + + sizeof(u64) * c->replicas.nr, + sizeof(u64), + GFP_KERNEL); + percpu_up_write(&c->mark_lock); + if (!c->usage[1]) return -ENOMEM; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 231ace4..25aa22a 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1330,8 +1330,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, if (!(old & (1 << BTREE_NODE_dirty))) return; - if (b->written && - !btree_node_may_write(b)) + if (!btree_node_may_write(b)) return; if (old & (1 << BTREE_NODE_write_in_flight)) { @@ -1347,7 +1346,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, } while (cmpxchg_acquire(&b->flags, old, new) != old); BUG_ON(btree_node_fake(b)); - BUG_ON(!list_empty(&b->write_blocked)); BUG_ON((b->will_make_reachable != 0) != !b->written); BUG_ON(b->written >= c->opts.btree_node_size); @@ -1684,15 +1682,13 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) unsigned long flags = READ_ONCE(b->flags); unsigned idx = (flags & (1 << BTREE_NODE_write_idx)) != 0; - if (//!(flags & (1 << BTREE_NODE_dirty)) && - !b->writes[0].wait.list.first && - !b->writes[1].wait.list.first && - !(b->will_make_reachable & 1)) + if (!(flags & (1 << BTREE_NODE_dirty))) continue; - pr_buf(&out, "%p d %u l %u w %u b %u r %u:%lu c %u p %u\n", + pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu c %u p %u\n", b, (flags & (1 << BTREE_NODE_dirty)) != 0, + (flags & (1 << BTREE_NODE_need_write)) != 0, b->level, b->written, !list_empty_careful(&b->write_blocked), diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 4be3221..4de1fb7 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -2,6 +2,7 @@ #define _BCACHEFS_BTREE_IO_H #include "bset.h" +#include "btree_locking.h" #include "extents.h" #include "io_types.h" @@ -47,7 +48,7 @@ static inline void btree_node_wait_on_io(struct btree *b) static inline bool btree_node_may_write(struct btree *b) { return list_empty_careful(&b->write_blocked) && - !b->will_make_reachable; + (!b->written || !b->will_make_reachable); } enum compact_mode { @@ -99,42 +100,36 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); void bch2_btree_node_write(struct bch_fs *, struct btree *, enum six_lock_type); -/* - * btree_node_dirty() can be cleared with only a read lock, - * and for bch2_btree_node_write_cond() we want to set need_write iff it's - * still dirty: - */ -static inline void set_btree_node_need_write_if_dirty(struct btree *b) +static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b) { - unsigned long old, new, v = READ_ONCE(b->flags); - - do { - old = new = v; - - if (!(old & (1 << BTREE_NODE_dirty))) - return; - - new |= (1 << BTREE_NODE_need_write); - } while ((v = cmpxchg(&b->flags, old, new)) != old); + while (b->written && + btree_node_need_write(b) && + btree_node_may_write(b)) { + if (!btree_node_write_in_flight(b)) { + bch2_btree_node_write(c, b, SIX_LOCK_read); + break; + } + + six_unlock_read(&b->lock); + btree_node_wait_on_io(b); + btree_node_lock_type(c, b, SIX_LOCK_read); + } } #define bch2_btree_node_write_cond(_c, _b, cond) \ do { \ - while ((_b)->written && btree_node_dirty(_b) && (cond)) { \ - if (!btree_node_may_write(_b)) { \ - set_btree_node_need_write_if_dirty(_b); \ - break; \ - } \ + unsigned long old, new, v = READ_ONCE((_b)->flags); \ + \ + do { \ + old = new = v; \ \ - if (!btree_node_write_in_flight(_b)) { \ - bch2_btree_node_write(_c, _b, SIX_LOCK_read); \ + if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \ break; \ - } \ \ - six_unlock_read(&(_b)->lock); \ - btree_node_wait_on_io(_b); \ - btree_node_lock_type(c, b, SIX_LOCK_read); \ - } \ + new |= (1 << BTREE_NODE_need_write); \ + } while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \ + \ + btree_node_write_if_need(_c, _b); \ } while (0) void bch2_btree_flush_all_reads(struct bch_fs *); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 1a1ca95..873332f 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -3,6 +3,7 @@ #include +#include "bset.h" #include "btree_types.h" static inline void btree_iter_set_dirty(struct btree_iter *iter, diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index 33260a9..9054de0 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -9,9 +9,9 @@ * updating the iterator state */ +#include + #include "btree_iter.h" -#include "btree_io.h" -#include "six.h" /* matches six lock types */ enum btree_node_locked_type { diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 0af2a7d..dce4ed3 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -3,10 +3,10 @@ #include #include +#include #include "bkey_methods.h" #include "journal_types.h" -#include "six.h" struct open_bucket; struct btree_update; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index e18655e..0f2fa6f 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -366,6 +366,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev set_btree_node_accessed(b); set_btree_node_dirty(b); + set_btree_node_need_write(b); bch2_bset_init_first(b, &b->data->keys); memset(&b->nr, 0, sizeof(b->nr)); @@ -654,6 +655,12 @@ retry: closure_wait(&btree_current_write(b)->wait, cl); list_del(&as->write_blocked_list); + + /* + * for flush_held_btree_writes() waiting on updates to flush or + * nodes to be writeable: + */ + closure_wake_up(&c->btree_interior_update_wait); mutex_unlock(&c->btree_interior_update_lock); /* @@ -957,6 +964,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) { list_del(&p->write_blocked_list); btree_update_reparent(as, p); + + /* + * for flush_held_btree_writes() waiting on updates to flush or + * nodes to be writeable: + */ + closure_wake_up(&c->btree_interior_update_wait); } clear_btree_node_dirty(b); @@ -1056,23 +1069,24 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; struct btree *old = btree_node_root(c, b); - struct bch_fs_usage stats = { 0 }; + struct bch_fs_usage *fs_usage; __bch2_btree_set_root_inmem(c, b); mutex_lock(&c->btree_interior_update_lock); percpu_down_read_preempt_disable(&c->mark_lock); + fs_usage = bch2_fs_usage_get_scratch(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), true, 0, gc_pos_btree_root(b->btree_id), - &stats, 0, 0); + fs_usage, 0, 0); if (old && !btree_node_fake(old)) bch2_btree_node_free_index(as, NULL, bkey_i_to_s_c(&old->key), - &stats); - bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res, + fs_usage); + bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, gc_pos_btree_root(b->btree_id)); percpu_up_read_preempt_enable(&c->mark_lock); @@ -1147,7 +1161,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b struct btree_node_iter *node_iter) { struct bch_fs *c = as->c; - struct bch_fs_usage stats = { 0 }; + struct bch_fs_usage *fs_usage; struct bkey_packed *k; struct bkey tmp; @@ -1155,10 +1169,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b mutex_lock(&c->btree_interior_update_lock); percpu_down_read_preempt_disable(&c->mark_lock); + fs_usage = bch2_fs_usage_get_scratch(c); bch2_mark_key_locked(c, bkey_i_to_s_c(insert), true, 0, - gc_pos_btree_node(b), &stats, 0, 0); + gc_pos_btree_node(b), fs_usage, 0, 0); while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && bkey_iter_pos_cmp(b, &insert->k.p, k) > 0) @@ -1171,9 +1186,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b if (k && !bkey_cmp_packed(b, k, &insert->k)) bch2_btree_node_free_index(as, b, bkey_disassemble(b, k, &tmp), - &stats); + fs_usage); - bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res, + bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, gc_pos_btree_node(b)); percpu_up_read_preempt_enable(&c->mark_lock); @@ -1957,7 +1972,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bkey_copy(&b->key, &new_key->k_i); } } else { - struct bch_fs_usage stats = { 0 }; + struct bch_fs_usage *fs_usage; BUG_ON(btree_node_root(c, b) != b); @@ -1965,15 +1980,16 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, mutex_lock(&c->btree_interior_update_lock); percpu_down_read_preempt_disable(&c->mark_lock); + fs_usage = bch2_fs_usage_get_scratch(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), true, 0, gc_pos_btree_root(b->btree_id), - &stats, 0, 0); + fs_usage, 0, 0); bch2_btree_node_free_index(as, NULL, bkey_i_to_s_c(&b->key), - &stats); - bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res, + fs_usage); + bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, gc_pos_btree_root(b->btree_id)); percpu_up_read_preempt_enable(&c->mark_lock); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index d72e595..d33d0bf 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -71,83 +71,11 @@ #include "ec.h" #include "error.h" #include "movinggc.h" +#include "replicas.h" #include #include -static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); - -#ifdef DEBUG_BUCKETS - -#define lg_local_lock lg_global_lock -#define lg_local_unlock lg_global_unlock - -static void bch2_fs_stats_verify(struct bch_fs *c) -{ - struct bch_fs_usage stats =_bch2_fs_usage_read(c); - unsigned i, j; - - for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) { - for (j = 0; j < ARRAY_SIZE(stats.replicas[i].data); j++) - if ((s64) stats.replicas[i].data[j] < 0) - panic("replicas %u %s sectors underflow: %lli\n", - i + 1, bch_data_types[j], - stats.replicas[i].data[j]); - - if ((s64) stats.replicas[i].persistent_reserved < 0) - panic("replicas %u reserved underflow: %lli\n", - i + 1, stats.replicas[i].persistent_reserved); - } - - for (j = 0; j < ARRAY_SIZE(stats.buckets); j++) - if ((s64) stats.replicas[i].data_buckets[j] < 0) - panic("%s buckets underflow: %lli\n", - bch_data_types[j], - stats.buckets[j]); - - if ((s64) stats.s.online_reserved < 0) - panic("sectors_online_reserved underflow: %lli\n", - stats.s.online_reserved); -} - -static void bch2_dev_stats_verify(struct bch_dev *ca) -{ - struct bch_dev_usage stats = - __bch2_dev_usage_read(ca); - u64 n = ca->mi.nbuckets - ca->mi.first_bucket; - unsigned i; - - for (i = 0; i < ARRAY_SIZE(stats.buckets); i++) - BUG_ON(stats.buckets[i] > n); - BUG_ON(stats.buckets_alloc > n); - BUG_ON(stats.buckets_unavailable > n); -} - -static void bch2_disk_reservations_verify(struct bch_fs *c, int flags) -{ - if (!(flags & BCH_DISK_RESERVATION_NOFAIL)) { - u64 used = __bch2_fs_sectors_used(c); - u64 cached = 0; - u64 avail = atomic64_read(&c->sectors_available); - int cpu; - - for_each_possible_cpu(cpu) - cached += per_cpu_ptr(c->usage_percpu, cpu)->available_cache; - - if (used + avail + cached > c->capacity) - panic("used %llu avail %llu cached %llu capacity %llu\n", - used, avail, cached, c->capacity); - } -} - -#else - -static void bch2_fs_stats_verify(struct bch_fs *c) {} -static void bch2_dev_stats_verify(struct bch_dev *ca) {} -static void bch2_disk_reservations_verify(struct bch_fs *c, int flags) {} - -#endif - /* * Clear journal_seq_valid for buckets for which it's not needed, to prevent * wraparound: @@ -185,46 +113,47 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c) } } -#define bch2_usage_add(_acc, _stats) \ -do { \ - typeof(_acc) _a = (_acc), _s = (_stats); \ - unsigned i; \ - \ - for (i = 0; i < sizeof(*_a) / sizeof(u64); i++) \ - ((u64 *) (_a))[i] += ((u64 *) (_s))[i]; \ -} while (0) - #define bch2_usage_read_raw(_stats) \ ({ \ typeof(*this_cpu_ptr(_stats)) _acc; \ - int cpu; \ \ memset(&_acc, 0, sizeof(_acc)); \ - \ - for_each_possible_cpu(cpu) \ - bch2_usage_add(&_acc, per_cpu_ptr((_stats), cpu)); \ + acc_u64s_percpu((u64 *) &_acc, \ + (u64 __percpu *) _stats, \ + sizeof(_acc) / sizeof(u64)); \ \ _acc; \ }) -struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *ca, bool gc) -{ - return bch2_usage_read_raw(ca->usage[gc]); -} - struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca) { return bch2_usage_read_raw(ca->usage[0]); } -struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *c, bool gc) +struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c) { - return bch2_usage_read_raw(c->usage[gc]); -} + struct bch_fs_usage *ret; + unsigned nr = READ_ONCE(c->replicas.nr); +retry: + ret = kzalloc(sizeof(*ret) + nr * sizeof(u64), GFP_NOFS); + if (unlikely(!ret)) + return NULL; -struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *c) -{ - return bch2_usage_read_raw(c->usage[0]); + percpu_down_read_preempt_disable(&c->mark_lock); + + if (unlikely(nr < c->replicas.nr)) { + nr = c->replicas.nr; + percpu_up_read_preempt_enable(&c->mark_lock); + kfree(ret); + goto retry; + } + + acc_u64s_percpu((u64 *) ret, + (u64 __percpu *) c->usage[0], + sizeof(*ret) / sizeof(u64) + nr); + percpu_up_read_preempt_enable(&c->mark_lock); + + return ret; } #define RESERVE_FACTOR 6 @@ -239,17 +168,13 @@ static u64 avail_factor(u64 r) return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1); } -static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage) -{ - return fs_usage.s.hidden + - fs_usage.s.data + - reserve_factor(fs_usage.s.reserved + - fs_usage.s.online_reserved); -} - u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage) { - return min(c->capacity, __bch2_fs_sectors_used(c, fs_usage)); + return min(fs_usage.s.hidden + + fs_usage.s.data + + reserve_factor(fs_usage.s.reserved + + fs_usage.s.online_reserved), + c->capacity); } struct bch_fs_usage_short @@ -324,14 +249,16 @@ void bch2_fs_usage_apply(struct bch_fs *c, fs_usage->s.online_reserved -= added; } - bch2_usage_add(this_cpu_ptr(c->usage[0]), fs_usage); - - if (gc_visited(c, gc_pos)) - bch2_usage_add(this_cpu_ptr(c->usage[1]), fs_usage); - - bch2_fs_stats_verify(c); + acc_u64s((u64 *) this_cpu_ptr(c->usage[0]), + (u64 *) fs_usage, + sizeof(*fs_usage) / sizeof(u64) + c->replicas.nr); - memset(fs_usage, 0, sizeof(*fs_usage)); + if (gc_visited(c, gc_pos)) { + BUG_ON(!c->usage[1]); + acc_u64s((u64 *) this_cpu_ptr(c->usage[1]), + (u64 *) fs_usage, + sizeof(*fs_usage) / sizeof(u64) + c->replicas.nr); + } } static inline void account_bucket(struct bch_fs_usage *fs_usage, @@ -342,7 +269,6 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage, if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL) fs_usage->s.hidden += size; - fs_usage->buckets[type] += size; dev_usage->buckets[type] += nr; } @@ -387,8 +313,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, if (!is_available_bucket(old) && is_available_bucket(new)) bch2_wake_allocator(ca); - - bch2_dev_stats_verify(ca); } void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca) @@ -416,6 +340,37 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca) _old; \ }) +static inline void update_replicas(struct bch_fs *c, + struct bch_fs_usage *fs_usage, + struct bch_replicas_entry *r, + s64 sectors) +{ + int idx = bch2_replicas_entry_idx(c, r); + + BUG_ON(idx < 0); + BUG_ON(!sectors); + + if (r->data_type == BCH_DATA_CACHED) + fs_usage->s.cached += sectors; + else + fs_usage->s.data += sectors; + fs_usage->data[idx] += sectors; +} + +static inline void update_cached_sectors(struct bch_fs *c, + struct bch_fs_usage *fs_usage, + unsigned dev, s64 sectors) +{ + struct bch_replicas_padded r; + + r.e.data_type = BCH_DATA_CACHED; + r.e.nr_devs = 1; + r.e.nr_required = 1; + r.e.devs[0] = dev; + + update_replicas(c, fs_usage, &r.e, sectors); +} + static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, struct bucket_mark *old, bool gc) @@ -434,8 +389,9 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, new.gen++; })); - fs_usage->replicas[0].data[BCH_DATA_CACHED] -= old->cached_sectors; - fs_usage->s.cached -= old->cached_sectors; + if (old->cached_sectors) + update_cached_sectors(c, fs_usage, ca->dev_idx, + -old->cached_sectors); } void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, @@ -502,11 +458,6 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, new.data_type = type; checked_add(new.dirty_sectors, sectors); })); - - if (type == BCH_DATA_BTREE || - type == BCH_DATA_USER) - fs_usage->s.data += sectors; - fs_usage->replicas[0].data[type] += sectors; } void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, @@ -630,9 +581,9 @@ static void bch2_mark_pointer(struct bch_fs *c, static int bch2_mark_stripe_ptr(struct bch_fs *c, struct bch_extent_stripe_ptr p, + enum bch_data_type data_type, + struct bch_fs_usage *fs_usage, s64 sectors, unsigned flags, - s64 *adjusted_disk_sectors, - unsigned *redundancy, bool gc) { struct stripe *m; @@ -648,16 +599,15 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, return -1; } + BUG_ON(m->r.e.data_type != data_type); + nr_data = m->nr_blocks - m->nr_redundant; parity_sectors = DIV_ROUND_UP(abs(sectors) * m->nr_redundant, nr_data); if (sectors < 0) parity_sectors = -parity_sectors; - - *adjusted_disk_sectors += parity_sectors; - - *redundancy = max_t(unsigned, *redundancy, m->nr_redundant + 1); + sectors += parity_sectors; new = atomic_add_return(sectors, &m->block_sectors[p.block]); old = new - sectors; @@ -673,11 +623,14 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, if (!gc) bch2_stripes_heap_update(c, m, p.idx); + update_replicas(c, fs_usage, &m->r.e, sectors); + return 0; } static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, - s64 sectors, enum bch_data_type data_type, + s64 sectors, + enum bch_data_type data_type, struct bch_fs_usage *fs_usage, unsigned journal_seq, unsigned flags, bool gc) @@ -685,58 +638,46 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - s64 cached_sectors = 0; - s64 dirty_sectors = 0; - s64 ec_sectors = 0; - unsigned replicas = 0; - unsigned ec_redundancy = 0; + struct bch_replicas_padded r; + s64 dirty_sectors = 0; unsigned i; int ret; + r.e.data_type = data_type; + r.e.nr_devs = 0; + r.e.nr_required = 1; + BUG_ON(!sectors); bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { s64 disk_sectors = data_type == BCH_DATA_BTREE ? sectors : ptr_disk_sectors_delta(p, sectors); - s64 adjusted_disk_sectors = disk_sectors; bch2_mark_pointer(c, p, disk_sectors, data_type, fs_usage, journal_seq, flags, gc); - if (!p.ptr.cached) + if (p.ptr.cached) { + update_cached_sectors(c, fs_usage, p.ptr.dev, + disk_sectors); + } else if (!p.ec_nr) { + dirty_sectors += disk_sectors; + r.e.devs[r.e.nr_devs++] = p.ptr.dev; + } else { for (i = 0; i < p.ec_nr; i++) { ret = bch2_mark_stripe_ptr(c, p.ec[i], - disk_sectors, flags, - &adjusted_disk_sectors, - &ec_redundancy, gc); + data_type, fs_usage, + disk_sectors, flags, gc); if (ret) return ret; } - if (!p.ptr.cached) - replicas++; - if (p.ptr.cached) - cached_sectors += adjusted_disk_sectors; - else if (!p.ec_nr) - dirty_sectors += adjusted_disk_sectors; - else - ec_sectors += adjusted_disk_sectors; + r.e.nr_required = 0; + } } - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(fs_usage->replicas)); - ec_redundancy = clamp_t(unsigned, ec_redundancy, - 1, ARRAY_SIZE(fs_usage->replicas)); - - fs_usage->s.cached += cached_sectors; - fs_usage->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; - - fs_usage->s.data += dirty_sectors; - fs_usage->replicas[replicas - 1].data[data_type] += dirty_sectors; - - fs_usage->s.data += ec_sectors; - fs_usage->replicas[ec_redundancy - 1].ec_data += ec_sectors; + if (dirty_sectors) + update_replicas(c, fs_usage, &r.e, dirty_sectors); return 0; } @@ -804,8 +745,24 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, m->algorithm = s.v->algorithm; m->nr_blocks = s.v->nr_blocks; m->nr_redundant = s.v->nr_redundant; + + memset(&m->r, 0, sizeof(m->r)); + + m->r.e.data_type = BCH_DATA_USER; + m->r.e.nr_devs = s.v->nr_blocks; + m->r.e.nr_required = s.v->nr_blocks - s.v->nr_redundant; + + for (i = 0; i < s.v->nr_blocks; i++) + m->r.e.devs[i] = s.v->ptrs[i].dev; } + /* + * XXX: account for stripes somehow here + */ +#if 0 + update_replicas(c, fs_usage, &m->r.e, stripe_sectors); +#endif + if (!gc) { if (inserting) bch2_stripes_heap_insert(c, m, idx); @@ -853,11 +810,11 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; sectors *= replicas; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(fs_usage->replicas)); + replicas = clamp_t(unsigned, replicas, 1, + ARRAY_SIZE(fs_usage->persistent_reserved)); - fs_usage->s.reserved += sectors; - fs_usage->replicas[replicas - 1].persistent_reserved += sectors; + fs_usage->s.reserved += sectors; + fs_usage->persistent_reserved[replicas - 1] += sectors; break; } default: @@ -919,7 +876,7 @@ void bch2_mark_update(struct btree_insert *trans, struct btree_iter *iter = insert->iter; struct btree *b = iter->l[0].b; struct btree_node_iter node_iter = iter->l[0].iter; - struct bch_fs_usage fs_usage = { 0 }; + struct bch_fs_usage *fs_usage; struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; @@ -927,12 +884,13 @@ void bch2_mark_update(struct btree_insert *trans, return; percpu_down_read_preempt_disable(&c->mark_lock); + fs_usage = bch2_fs_usage_get_scratch(c); if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - bkey_start_offset(&insert->k->k), - pos, &fs_usage, trans->journal_res.seq, 0); + pos, fs_usage, trans->journal_res.seq, 0); while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, KEY_TYPE_discard))) { @@ -965,7 +923,7 @@ void bch2_mark_update(struct btree_insert *trans, BUG_ON(sectors <= 0); bch2_mark_key_locked(c, k, true, sectors, - pos, &fs_usage, trans->journal_res.seq, 0); + pos, fs_usage, trans->journal_res.seq, 0); sectors = bkey_start_offset(&insert->k->k) - k.k->p.offset; @@ -976,12 +934,12 @@ void bch2_mark_update(struct btree_insert *trans, } bch2_mark_key_locked(c, k, false, sectors, - pos, &fs_usage, trans->journal_res.seq, 0); + pos, fs_usage, trans->journal_res.seq, 0); bch2_btree_node_iter_advance(&node_iter, b); } - bch2_fs_usage_apply(c, &fs_usage, trans->disk_res, pos); + bch2_fs_usage_apply(c, fs_usage, trans->disk_res, pos); percpu_up_read_preempt_enable(&c->mark_lock); } @@ -1003,8 +961,6 @@ void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) percpu_down_read_preempt_disable(&c->mark_lock); this_cpu_sub(c->usage[0]->s.online_reserved, res->sectors); - - bch2_fs_stats_verify(c); percpu_up_read_preempt_enable(&c->mark_lock); res->sectors = 0; @@ -1045,8 +1001,6 @@ out: this_cpu_add(c->usage[0]->s.online_reserved, sectors); res->sectors += sectors; - bch2_disk_reservations_verify(c, flags); - bch2_fs_stats_verify(c); percpu_up_read_preempt_enable(&c->mark_lock); return 0; @@ -1078,14 +1032,11 @@ recalculate: this_cpu_add(c->usage[0]->s.online_reserved, sectors); res->sectors += sectors; ret = 0; - - bch2_disk_reservations_verify(c, flags); } else { atomic64_set(&c->sectors_available, sectors_available); ret = -ENOSPC; } - bch2_fs_stats_verify(c); percpu_up_write(&c->mark_lock); if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD)) @@ -1123,7 +1074,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) size_t reserve_none = max_t(size_t, 1, nbuckets >> 9); size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7); size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12), - btree_reserve); + btree_reserve * 2); bool resize = ca->buckets[0] != NULL, start_copygc = ca->copygc_thread != NULL; int ret = -ENOMEM; diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 8405911..ebd39e8 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -179,7 +179,6 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m, /* Device usage: */ -struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *, bool); struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *); static inline u64 __dev_buckets_available(struct bch_dev *ca, @@ -218,8 +217,18 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca) /* Filesystem usage: */ -struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *, bool); -struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *); +static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c) +{ + struct bch_fs_usage *ret; + + ret = this_cpu_ptr(c->usage_scratch); + + memset(ret, 0, sizeof(*ret) + c->replicas.nr * sizeof(u64)); + + return ret; +} + +struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *); u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index c5537a2..56863c2 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -74,16 +74,18 @@ struct bch_fs_usage { u64 cached; u64 reserved; u64 nr_inodes; + + /* XXX: add stats for compression ratio */ +#if 0 + u64 uncompressed; + u64 compressed; +#endif } s; /* broken out: */ - struct { - u64 data[BCH_DATA_NR]; - u64 ec_data; - u64 persistent_reserved; - } replicas[BCH_REPLICAS_MAX]; - u64 buckets[BCH_DATA_NR]; + u64 persistent_reserved[BCH_REPLICAS_MAX]; + u64 data[]; }; struct bch_fs_usage_short { diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index ac1ec5f..56ceb26 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -393,21 +393,29 @@ static long bch2_ioctl_usage(struct bch_fs *c, } { - struct bch_fs_usage src = bch2_fs_usage_read(c); + struct bch_fs_usage *src; struct bch_ioctl_fs_usage dst = { .capacity = c->capacity, - .used = bch2_fs_sectors_used(c, src), - .online_reserved = src.s.online_reserved, }; + src = bch2_fs_usage_read(c); + if (!src) + return -ENOMEM; + + dst.used = bch2_fs_sectors_used(c, *src); + dst.online_reserved = src->s.online_reserved; + for (i = 0; i < BCH_REPLICAS_MAX; i++) { dst.persistent_reserved[i] = - src.replicas[i].persistent_reserved; - + src->persistent_reserved[i]; +#if 0 for (j = 0; j < BCH_DATA_NR; j++) dst.sectors[j][i] = src.replicas[i].data[j]; +#endif } + kfree(src); + ret = copy_to_user(&user_arg->fs, &dst, sizeof(dst)); if (ret) return ret; diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 28d086b..dfa2de9 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -14,128 +14,6 @@ #include #include -/* - * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any - * use permitted, subject to terms of PostgreSQL license; see.) - - * If we have a 64-bit integer type, then a 64-bit CRC looks just like the - * usual sort of implementation. (See Ross Williams' excellent introduction - * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from - * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) - * If we have no working 64-bit type, then fake it with two 32-bit registers. - * - * The present implementation is a normal (not "reflected", in Williams' - * terms) 64-bit CRC, using initial all-ones register contents and a final - * bit inversion. The chosen polynomial is borrowed from the DLT1 spec - * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): - * - * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + - * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + - * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + - * x^7 + x^4 + x + 1 -*/ - -static const u64 crc_table[256] = { - 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL, - 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL, - 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL, - 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL, - 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL, - 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL, - 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL, - 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL, - 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL, - 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL, - 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL, - 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL, - 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL, - 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL, - 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL, - 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL, - 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL, - 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL, - 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL, - 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL, - 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL, - 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL, - 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL, - 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL, - 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL, - 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL, - 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL, - 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL, - 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL, - 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL, - 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL, - 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL, - 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL, - 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL, - 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL, - 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL, - 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL, - 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL, - 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL, - 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL, - 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL, - 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL, - 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL, - 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL, - 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL, - 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL, - 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL, - 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL, - 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL, - 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL, - 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL, - 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL, - 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL, - 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL, - 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL, - 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL, - 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL, - 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL, - 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL, - 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL, - 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL, - 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL, - 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL, - 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL, - 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL, - 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL, - 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL, - 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL, - 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL, - 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL, - 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL, - 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL, - 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL, - 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL, - 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL, - 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL, - 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL, - 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL, - 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL, - 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL, - 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL, - 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL, - 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL, - 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL, - 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL, - 0x9AFCE626CE85B507ULL, -}; - -u64 bch2_crc64_update(u64 crc, const void *_data, size_t len) -{ - const unsigned char *data = _data; - - while (len--) { - int i = ((int) (crc >> 56) ^ *data++) & 0xFF; - crc = crc_table[i] ^ (crc << 8); - } - - return crc; -} - static u64 bch2_checksum_init(unsigned type) { switch (type) { @@ -188,21 +66,21 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t } } -static inline void do_encrypt_sg(struct crypto_skcipher *tfm, +static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm, struct nonce nonce, struct scatterlist *sg, size_t len) { - SKCIPHER_REQUEST_ON_STACK(req, tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); int ret; - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_sync_tfm(req, tfm); skcipher_request_set_crypt(req, sg, sg, len, nonce.d); ret = crypto_skcipher_encrypt(req); BUG_ON(ret); } -static inline void do_encrypt(struct crypto_skcipher *tfm, +static inline void do_encrypt(struct crypto_sync_skcipher *tfm, struct nonce nonce, void *buf, size_t len) { @@ -213,10 +91,10 @@ static inline void do_encrypt(struct crypto_skcipher *tfm, } int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, - void *buf, size_t len) + void *buf, size_t len) { - struct crypto_skcipher *chacha20 = - crypto_alloc_skcipher("chacha20", 0, 0); + struct crypto_sync_skcipher *chacha20 = + crypto_alloc_sync_skcipher("chacha20", 0, 0); int ret; if (!chacha20) { @@ -224,7 +102,8 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, return PTR_ERR(chacha20); } - ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key)); + ret = crypto_skcipher_setkey(&chacha20->base, + (void *) key, sizeof(*key)); if (ret) { pr_err("crypto_skcipher_setkey() error: %i", ret); goto err; @@ -232,7 +111,7 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, do_encrypt(chacha20, nonce, buf, len); err: - crypto_free_skcipher(chacha20); + crypto_free_sync_skcipher(chacha20); return ret; } @@ -597,7 +476,7 @@ err: static int bch2_alloc_ciphers(struct bch_fs *c) { if (!c->chacha20) - c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0); + c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); if (IS_ERR(c->chacha20)) { bch_err(c, "error requesting chacha20 module: %li", PTR_ERR(c->chacha20)); @@ -680,7 +559,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) goto err; } - ret = crypto_skcipher_setkey(c->chacha20, + ret = crypto_skcipher_setkey(&c->chacha20->base, (void *) &key.key, sizeof(key.key)); if (ret) goto err; @@ -708,7 +587,7 @@ void bch2_fs_encryption_exit(struct bch_fs *c) if (!IS_ERR_OR_NULL(c->poly1305)) crypto_free_shash(c->poly1305); if (!IS_ERR_OR_NULL(c->chacha20)) - crypto_free_skcipher(c->chacha20); + crypto_free_sync_skcipher(c->chacha20); if (!IS_ERR_OR_NULL(c->sha256)) crypto_free_shash(c->sha256); } @@ -740,7 +619,7 @@ int bch2_fs_encryption_init(struct bch_fs *c) if (ret) goto out; - ret = crypto_skcipher_setkey(c->chacha20, + ret = crypto_skcipher_setkey(&c->chacha20->base, (void *) &key.key, sizeof(key.key)); if (ret) goto out; diff --git a/libbcachefs/checksum.h b/libbcachefs/checksum.h index 031b36f..fb72c6a 100644 --- a/libbcachefs/checksum.h +++ b/libbcachefs/checksum.h @@ -5,9 +5,13 @@ #include "extents_types.h" #include "super-io.h" +#include #include -u64 bch2_crc64_update(u64, const void *, size_t); +static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len) +{ + return crc64_be(crc, p, len); +} #define BCH_NONCE_EXTENT cpu_to_le32(1 << 28) #define BCH_NONCE_BTREE cpu_to_le32(2 << 28) diff --git a/libbcachefs/ec_types.h b/libbcachefs/ec_types.h index d042981..44c5d38 100644 --- a/libbcachefs/ec_types.h +++ b/libbcachefs/ec_types.h @@ -5,6 +5,11 @@ #define EC_STRIPE_MAX 16 +struct bch_replicas_padded { + struct bch_replicas_entry e; + u8 pad[EC_STRIPE_MAX]; +}; + struct stripe { size_t heap_idx; @@ -17,6 +22,8 @@ struct stripe { u8 alive; atomic_t blocks_nonempty; atomic_t block_sectors[EC_STRIPE_MAX]; + + struct bch_replicas_padded r; }; struct ec_stripe_heap_entry { diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 41ac5d4..fdc24be 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -899,11 +899,8 @@ static void readpage_bio_extend(struct readpages_iter *iter, if (!get_more) break; - rcu_read_lock(); - page = radix_tree_lookup(&iter->mapping->i_pages, page_offset); - rcu_read_unlock(); - - if (page && !radix_tree_exceptional_entry(page)) + page = xa_load(&iter->mapping->i_pages, page_offset); + if (page && !xa_is_value(page)) break; page = __page_cache_alloc(readahead_gfp_mask(iter->mapping)); @@ -2705,7 +2702,7 @@ static bool page_slot_is_data(struct address_space *mapping, pgoff_t index) bool ret; page = find_lock_entry(mapping, index); - if (!page || radix_tree_exception(page)) + if (!page || xa_is_value(page)) return false; ret = page_is_data(page); diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c index d6cb21d..26d5f34 100644 --- a/libbcachefs/fs-ioctl.c +++ b/libbcachefs/fs-ioctl.c @@ -183,7 +183,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, if (unlikely(ret < 0)) goto err1; - qstr.hash_len = ret; + qstr.len = ret; qstr.name = kname; ret = -ENOENT; diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 1d3161e..66ca13a 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -3,11 +3,6 @@ #include "replicas.h" #include "super-io.h" -struct bch_replicas_entry_padded { - struct bch_replicas_entry e; - u8 pad[BCH_SB_MEMBERS_MAX]; -}; - static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, struct bch_replicas_cpu *); @@ -124,8 +119,6 @@ static void bkey_to_replicas(struct bkey_s_c k, stripe_to_replicas(k, e); break; } - - replicas_entry_sort(e); } static inline void devlist_to_replicas(struct bch_devs_list devs, @@ -144,8 +137,6 @@ static inline void devlist_to_replicas(struct bch_devs_list devs, for (i = 0; i < devs.nr; i++) e->devs[e->nr_devs++] = devs.devs[i]; - - replicas_entry_sort(e); } static struct bch_replicas_cpu @@ -176,13 +167,35 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old, return new; } +static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, + struct bch_replicas_entry *search) +{ + int idx, entry_size = replicas_entry_bytes(search); + + if (unlikely(entry_size > r->entry_size)) + return -1; + + replicas_entry_sort(search); + + while (entry_size < r->entry_size) + ((char *) search)[entry_size++] = 0; + + idx = eytzinger0_find(r->entries, r->nr, r->entry_size, + memcmp, search); + + return idx < r->nr ? idx : -1; +} + +int bch2_replicas_entry_idx(struct bch_fs *c, + struct bch_replicas_entry *search) +{ + return __replicas_entry_idx(&c->replicas, search); +} + static bool __replicas_has_entry(struct bch_replicas_cpu *r, struct bch_replicas_entry *search) { - return replicas_entry_bytes(search) <= r->entry_size && - eytzinger0_find(r->entries, r->nr, - r->entry_size, - memcmp, search) < r->nr; + return __replicas_entry_idx(r, search) >= 0; } static bool replicas_has_entry(struct bch_fs *c, @@ -201,6 +214,80 @@ static bool replicas_has_entry(struct bch_fs *c, return marked; } +static void __replicas_table_update(struct bch_fs_usage __percpu *dst, + struct bch_replicas_cpu *dst_r, + struct bch_fs_usage __percpu *src, + struct bch_replicas_cpu *src_r) +{ + int src_idx, dst_idx, cpu; + + for (src_idx = 0; src_idx < src_r->nr; src_idx++) { + u64 *dst_v, src_v = 0; + + for_each_possible_cpu(cpu) + src_v += *per_cpu_ptr(&src->data[src_idx], cpu); + + dst_idx = __replicas_entry_idx(dst_r, + cpu_replicas_entry(src_r, src_idx)); + + if (dst_idx < 0) { + BUG_ON(src_v); + continue; + } + + preempt_disable(); + + dst_v = this_cpu_ptr(&dst->data[dst_idx]); + BUG_ON(*dst_v); + + *dst_v = src_v; + + preempt_enable(); + } +} + +/* + * Resize filesystem accounting: + */ +static int replicas_table_update(struct bch_fs *c, + struct bch_replicas_cpu *new_r) +{ + struct bch_fs_usage __percpu *new_usage[3] = { NULL, NULL, NULL }; + unsigned bytes = sizeof(struct bch_fs_usage) + + sizeof(u64) * new_r->nr; + unsigned i; + int ret = -ENOMEM; + + for (i = 0; i < 3; i++) { + if (i < 2 && !c->usage[i]) + continue; + + new_usage[i] = __alloc_percpu_gfp(bytes, sizeof(u64), + GFP_NOIO); + if (!new_usage[i]) + goto err; + } + + for (i = 0; i < 2; i++) { + if (!c->usage[i]) + continue; + + __replicas_table_update(new_usage[i], new_r, + c->usage[i], &c->replicas); + + swap(c->usage[i], new_usage[i]); + } + + swap(c->usage_scratch, new_usage[2]); + + swap(c->replicas, *new_r); + ret = 0; +err: + for (i = 0; i < 3; i++) + free_percpu(new_usage[i]); + return ret; +} + noinline static int bch2_mark_replicas_slowpath(struct bch_fs *c, struct bch_replicas_entry *new_entry) @@ -242,7 +329,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, /* don't update in memory replicas until changes are persistent */ percpu_down_write(&c->mark_lock); if (new_r.entries) - swap(new_r, c->replicas); + ret = replicas_table_update(c, &new_r); if (new_gc.entries) swap(new_gc, c->replicas_gc); percpu_up_write(&c->mark_lock); @@ -269,7 +356,7 @@ int bch2_mark_replicas(struct bch_fs *c, enum bch_data_type data_type, struct bch_devs_list devs) { - struct bch_replicas_entry_padded search; + struct bch_replicas_padded search; if (!devs.nr) return 0; @@ -285,7 +372,7 @@ int bch2_mark_replicas(struct bch_fs *c, int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) { - struct bch_replicas_entry_padded search; + struct bch_replicas_padded search; struct bch_devs_list cached = bch2_bkey_cached_devs(k); unsigned i; int ret; @@ -306,6 +393,8 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) int bch2_replicas_gc_end(struct bch_fs *c, int ret) { + unsigned i; + lockdep_assert_held(&c->replicas_gc_lock); mutex_lock(&c->sb_lock); @@ -313,6 +402,39 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) if (ret) goto err; + /* + * this is kind of crappy; the replicas gc mechanism needs to be ripped + * out + */ + + for (i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry *e = + cpu_replicas_entry(&c->replicas, i); + struct bch_replicas_cpu n; + u64 v = 0; + int cpu; + + if (__replicas_has_entry(&c->replicas_gc, e)) + continue; + + for_each_possible_cpu(cpu) + v += *per_cpu_ptr(&c->usage[0]->data[i], cpu); + if (!v) + continue; + + n = cpu_replicas_add_entry(&c->replicas_gc, e); + if (!n.entries) { + ret = -ENOSPC; + goto err; + } + + percpu_down_write(&c->mark_lock); + swap(n, c->replicas_gc); + percpu_up_write(&c->mark_lock); + + kfree(n.entries); + } + if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) { ret = -ENOSPC; goto err; @@ -324,7 +446,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) err: percpu_down_write(&c->mark_lock); if (!ret) - swap(c->replicas, c->replicas_gc); + ret = replicas_table_update(c, &c->replicas_gc); kfree(c->replicas_gc.entries); c->replicas_gc.entries = NULL; @@ -460,7 +582,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) bch2_cpu_replicas_sort(&new_r); percpu_down_write(&c->mark_lock); - swap(c->replicas, new_r); + ret = replicas_table_update(c, &new_r); percpu_up_write(&c->mark_lock); kfree(new_r.entries); @@ -681,7 +803,7 @@ bool bch2_replicas_marked(struct bch_fs *c, struct bch_devs_list devs, bool check_gc_replicas) { - struct bch_replicas_entry_padded search; + struct bch_replicas_padded search; if (!devs.nr) return true; @@ -697,7 +819,7 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c, struct bkey_s_c k, bool check_gc_replicas) { - struct bch_replicas_entry_padded search; + struct bch_replicas_padded search; struct bch_devs_list cached = bch2_bkey_cached_devs(k); unsigned i; diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index 87246a0..fc83365 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -1,8 +1,11 @@ #ifndef _BCACHEFS_REPLICAS_H #define _BCACHEFS_REPLICAS_H +#include "eytzinger.h" #include "replicas_types.h" +int bch2_replicas_entry_idx(struct bch_fs *, + struct bch_replicas_entry *); bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type, struct bch_devs_list, bool); bool bch2_bkey_replicas_marked(struct bch_fs *, diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 032b34a..1f343e6 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -11,6 +11,7 @@ #include #include +#include struct bch_hash_info { u8 type; @@ -37,7 +38,7 @@ bch2_hash_info_init(struct bch_fs *c, break; case BCH_STR_HASH_SIPHASH: { SHASH_DESC_ON_STACK(desc, c->sha256); - u8 digest[crypto_shash_digestsize(c->sha256)]; + u8 digest[SHA256_DIGEST_SIZE]; desc->tfm = c->sha256; desc->flags = 0; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 82a0bf0..a539f2a 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -406,6 +406,7 @@ static void bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); percpu_free_rwsem(&c->mark_lock); + free_percpu(c->usage_scratch); free_percpu(c->usage[0]); free_percpu(c->pcpu); mempool_exit(&c->btree_iters_pool); @@ -536,7 +537,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) { struct bch_sb_field_members *mi; struct bch_fs *c; - unsigned i, iter_size; + unsigned i, iter_size, fs_usage_size; const char *err; pr_verbose_init(opts, ""); @@ -630,6 +631,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) (btree_blocks(c) + 1) * 2 * sizeof(struct btree_node_iter_set); + fs_usage_size = sizeof(struct bch_fs_usage) + + sizeof(u64) * c->replicas.nr; + if (!(c->wq = alloc_workqueue("bcachefs", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || !(c->copygc_wq = alloc_workqueue("bcache_copygc", @@ -644,7 +648,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) max(offsetof(struct btree_read_bio, bio), offsetof(struct btree_write_bio, wbio.bio)), BIOSET_NEED_BVECS) || - !(c->usage[0] = alloc_percpu(struct bch_fs_usage)) || + !(c->usage[0] = __alloc_percpu(fs_usage_size, sizeof(u64))) || + !(c->usage_scratch = __alloc_percpu(fs_usage_size, sizeof(u64))) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 316f827..2e6e9bd 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -233,33 +233,34 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) { struct printbuf out = _PBUF(buf, PAGE_SIZE); - struct bch_fs_usage stats = bch2_fs_usage_read(c); - unsigned replicas, type; + struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c); + unsigned replicas; + + if (!fs_usage) + return -ENOMEM; pr_buf(&out, "capacity:\t\t%llu\n", c->capacity); - for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) { + for (replicas = 0; + replicas < ARRAY_SIZE(fs_usage->persistent_reserved); + replicas++) { pr_buf(&out, "%u replicas:\n", replicas + 1); - +#if 0 for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++) pr_buf(&out, "\t%s:\t\t%llu\n", bch2_data_types[type], stats.replicas[replicas].data[type]); pr_buf(&out, "\terasure coded:\t%llu\n", stats.replicas[replicas].ec_data); +#endif pr_buf(&out, "\treserved:\t%llu\n", - stats.replicas[replicas].persistent_reserved); + fs_usage->persistent_reserved[replicas]); } - pr_buf(&out, "bucket usage\n"); - - for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++) - pr_buf(&out, "\t%s:\t\t%llu\n", - bch2_data_types[type], - stats.buckets[type]); - pr_buf(&out, "online reserved:\t%llu\n", - stats.s.online_reserved); + fs_usage->s.online_reserved); + + kfree(fs_usage); return out.pos - buf; } diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 80d0818..5c060e7 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -24,9 +24,6 @@ #include "eytzinger.h" #include "util.h" -#define simple_strtoint(c, end, base) simple_strtol(c, end, base) -#define simple_strtouint(c, end, base) simple_strtoul(c, end, base) - static const char si_units[] = "?kMGTPEZY"; static int __bch2_strtoh(const char *cp, u64 *res, diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 905b24f..25d6750 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -61,13 +62,6 @@ struct closure; #endif -#ifndef __CHECKER__ -#define __flatten __attribute__((flatten)) -#else -/* sparse doesn't know about attribute((flatten)) */ -#define __flatten -#endif - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CPU_BIG_ENDIAN 0 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -704,4 +698,21 @@ do { \ } \ } while (0) +static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr) +{ + unsigned i; + + for (i = 0; i < nr; i++) + acc[i] += src[i]; +} + +static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src, + unsigned nr) +{ + int cpu; + + for_each_possible_cpu(cpu) + acc_u64s(acc, per_cpu_ptr(src, cpu), nr); +} + #endif /* _BCACHEFS_UTIL_H */ diff --git a/linux/crc64.c b/linux/crc64.c new file mode 100644 index 0000000..0ef8ae6 --- /dev/null +++ b/linux/crc64.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Normal 64-bit CRC calculation. + * + * This is a basic crc64 implementation following ECMA-182 specification, + * which can be found from, + * http://www.ecma-international.org/publications/standards/Ecma-182.htm + * + * Dr. Ross N. Williams has a great document to introduce the idea of CRC + * algorithm, here the CRC64 code is also inspired by the table-driven + * algorithm and detail example from this paper. This paper can be found + * from, + * http://www.ross.net/crc/download/crc_v3.txt + * + * crc64table[256] is the lookup table of a table-driven 64-bit CRC + * calculation, which is generated by gen_crc64table.c in kernel build + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification + * as well, which is defined as, + * + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x + 1 + * + * Copyright 2018 SUSE Linux. + * Author: Coly Li + */ + +#include +#include +#include "crc64table.h" + +MODULE_DESCRIPTION("CRC64 calculations"); +MODULE_LICENSE("GPL v2"); + +/** + * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 + * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, + or the previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + */ +u64 __pure crc64_be(u64 crc, const void *p, size_t len) +{ + size_t i, t; + + const unsigned char *_p = p; + + for (i = 0; i < len; i++) { + t = ((crc >> 56) ^ (*_p++)) & 0xFF; + crc = crc64table[t] ^ (crc << 8); + } + + return crc; +} +EXPORT_SYMBOL_GPL(crc64_be); diff --git a/linux/crc64table.h b/linux/crc64table.h new file mode 100644 index 0000000..9964164 --- /dev/null +++ b/linux/crc64table.h @@ -0,0 +1,135 @@ +/* this file is generated - do not edit */ + +#include +#include + +static const u64 ____cacheline_aligned crc64table[256] = { + 0x0000000000000000ULL, 0x42f0e1eba9ea3693ULL, + 0x85e1c3d753d46d26ULL, 0xc711223cfa3e5bb5ULL, + 0x493366450e42ecdfULL, 0x0bc387aea7a8da4cULL, + 0xccd2a5925d9681f9ULL, 0x8e224479f47cb76aULL, + 0x9266cc8a1c85d9beULL, 0xd0962d61b56fef2dULL, + 0x17870f5d4f51b498ULL, 0x5577eeb6e6bb820bULL, + 0xdb55aacf12c73561ULL, 0x99a54b24bb2d03f2ULL, + 0x5eb4691841135847ULL, 0x1c4488f3e8f96ed4ULL, + 0x663d78ff90e185efULL, 0x24cd9914390bb37cULL, + 0xe3dcbb28c335e8c9ULL, 0xa12c5ac36adfde5aULL, + 0x2f0e1eba9ea36930ULL, 0x6dfeff5137495fa3ULL, + 0xaaefdd6dcd770416ULL, 0xe81f3c86649d3285ULL, + 0xf45bb4758c645c51ULL, 0xb6ab559e258e6ac2ULL, + 0x71ba77a2dfb03177ULL, 0x334a9649765a07e4ULL, + 0xbd68d2308226b08eULL, 0xff9833db2bcc861dULL, + 0x388911e7d1f2dda8ULL, 0x7a79f00c7818eb3bULL, + 0xcc7af1ff21c30bdeULL, 0x8e8a101488293d4dULL, + 0x499b3228721766f8ULL, 0x0b6bd3c3dbfd506bULL, + 0x854997ba2f81e701ULL, 0xc7b97651866bd192ULL, + 0x00a8546d7c558a27ULL, 0x4258b586d5bfbcb4ULL, + 0x5e1c3d753d46d260ULL, 0x1cecdc9e94ace4f3ULL, + 0xdbfdfea26e92bf46ULL, 0x990d1f49c77889d5ULL, + 0x172f5b3033043ebfULL, 0x55dfbadb9aee082cULL, + 0x92ce98e760d05399ULL, 0xd03e790cc93a650aULL, + 0xaa478900b1228e31ULL, 0xe8b768eb18c8b8a2ULL, + 0x2fa64ad7e2f6e317ULL, 0x6d56ab3c4b1cd584ULL, + 0xe374ef45bf6062eeULL, 0xa1840eae168a547dULL, + 0x66952c92ecb40fc8ULL, 0x2465cd79455e395bULL, + 0x3821458aada7578fULL, 0x7ad1a461044d611cULL, + 0xbdc0865dfe733aa9ULL, 0xff3067b657990c3aULL, + 0x711223cfa3e5bb50ULL, 0x33e2c2240a0f8dc3ULL, + 0xf4f3e018f031d676ULL, 0xb60301f359dbe0e5ULL, + 0xda050215ea6c212fULL, 0x98f5e3fe438617bcULL, + 0x5fe4c1c2b9b84c09ULL, 0x1d14202910527a9aULL, + 0x93366450e42ecdf0ULL, 0xd1c685bb4dc4fb63ULL, + 0x16d7a787b7faa0d6ULL, 0x5427466c1e109645ULL, + 0x4863ce9ff6e9f891ULL, 0x0a932f745f03ce02ULL, + 0xcd820d48a53d95b7ULL, 0x8f72eca30cd7a324ULL, + 0x0150a8daf8ab144eULL, 0x43a04931514122ddULL, + 0x84b16b0dab7f7968ULL, 0xc6418ae602954ffbULL, + 0xbc387aea7a8da4c0ULL, 0xfec89b01d3679253ULL, + 0x39d9b93d2959c9e6ULL, 0x7b2958d680b3ff75ULL, + 0xf50b1caf74cf481fULL, 0xb7fbfd44dd257e8cULL, + 0x70eadf78271b2539ULL, 0x321a3e938ef113aaULL, + 0x2e5eb66066087d7eULL, 0x6cae578bcfe24bedULL, + 0xabbf75b735dc1058ULL, 0xe94f945c9c3626cbULL, + 0x676dd025684a91a1ULL, 0x259d31cec1a0a732ULL, + 0xe28c13f23b9efc87ULL, 0xa07cf2199274ca14ULL, + 0x167ff3eacbaf2af1ULL, 0x548f120162451c62ULL, + 0x939e303d987b47d7ULL, 0xd16ed1d631917144ULL, + 0x5f4c95afc5edc62eULL, 0x1dbc74446c07f0bdULL, + 0xdaad56789639ab08ULL, 0x985db7933fd39d9bULL, + 0x84193f60d72af34fULL, 0xc6e9de8b7ec0c5dcULL, + 0x01f8fcb784fe9e69ULL, 0x43081d5c2d14a8faULL, + 0xcd2a5925d9681f90ULL, 0x8fdab8ce70822903ULL, + 0x48cb9af28abc72b6ULL, 0x0a3b7b1923564425ULL, + 0x70428b155b4eaf1eULL, 0x32b26afef2a4998dULL, + 0xf5a348c2089ac238ULL, 0xb753a929a170f4abULL, + 0x3971ed50550c43c1ULL, 0x7b810cbbfce67552ULL, + 0xbc902e8706d82ee7ULL, 0xfe60cf6caf321874ULL, + 0xe224479f47cb76a0ULL, 0xa0d4a674ee214033ULL, + 0x67c58448141f1b86ULL, 0x253565a3bdf52d15ULL, + 0xab1721da49899a7fULL, 0xe9e7c031e063acecULL, + 0x2ef6e20d1a5df759ULL, 0x6c0603e6b3b7c1caULL, + 0xf6fae5c07d3274cdULL, 0xb40a042bd4d8425eULL, + 0x731b26172ee619ebULL, 0x31ebc7fc870c2f78ULL, + 0xbfc9838573709812ULL, 0xfd39626eda9aae81ULL, + 0x3a28405220a4f534ULL, 0x78d8a1b9894ec3a7ULL, + 0x649c294a61b7ad73ULL, 0x266cc8a1c85d9be0ULL, + 0xe17dea9d3263c055ULL, 0xa38d0b769b89f6c6ULL, + 0x2daf4f0f6ff541acULL, 0x6f5faee4c61f773fULL, + 0xa84e8cd83c212c8aULL, 0xeabe6d3395cb1a19ULL, + 0x90c79d3fedd3f122ULL, 0xd2377cd44439c7b1ULL, + 0x15265ee8be079c04ULL, 0x57d6bf0317edaa97ULL, + 0xd9f4fb7ae3911dfdULL, 0x9b041a914a7b2b6eULL, + 0x5c1538adb04570dbULL, 0x1ee5d94619af4648ULL, + 0x02a151b5f156289cULL, 0x4051b05e58bc1e0fULL, + 0x87409262a28245baULL, 0xc5b073890b687329ULL, + 0x4b9237f0ff14c443ULL, 0x0962d61b56fef2d0ULL, + 0xce73f427acc0a965ULL, 0x8c8315cc052a9ff6ULL, + 0x3a80143f5cf17f13ULL, 0x7870f5d4f51b4980ULL, + 0xbf61d7e80f251235ULL, 0xfd913603a6cf24a6ULL, + 0x73b3727a52b393ccULL, 0x31439391fb59a55fULL, + 0xf652b1ad0167feeaULL, 0xb4a25046a88dc879ULL, + 0xa8e6d8b54074a6adULL, 0xea16395ee99e903eULL, + 0x2d071b6213a0cb8bULL, 0x6ff7fa89ba4afd18ULL, + 0xe1d5bef04e364a72ULL, 0xa3255f1be7dc7ce1ULL, + 0x64347d271de22754ULL, 0x26c49cccb40811c7ULL, + 0x5cbd6cc0cc10fafcULL, 0x1e4d8d2b65facc6fULL, + 0xd95caf179fc497daULL, 0x9bac4efc362ea149ULL, + 0x158e0a85c2521623ULL, 0x577eeb6e6bb820b0ULL, + 0x906fc95291867b05ULL, 0xd29f28b9386c4d96ULL, + 0xcedba04ad0952342ULL, 0x8c2b41a1797f15d1ULL, + 0x4b3a639d83414e64ULL, 0x09ca82762aab78f7ULL, + 0x87e8c60fded7cf9dULL, 0xc51827e4773df90eULL, + 0x020905d88d03a2bbULL, 0x40f9e43324e99428ULL, + 0x2cffe7d5975e55e2ULL, 0x6e0f063e3eb46371ULL, + 0xa91e2402c48a38c4ULL, 0xebeec5e96d600e57ULL, + 0x65cc8190991cb93dULL, 0x273c607b30f68faeULL, + 0xe02d4247cac8d41bULL, 0xa2dda3ac6322e288ULL, + 0xbe992b5f8bdb8c5cULL, 0xfc69cab42231bacfULL, + 0x3b78e888d80fe17aULL, 0x7988096371e5d7e9ULL, + 0xf7aa4d1a85996083ULL, 0xb55aacf12c735610ULL, + 0x724b8ecdd64d0da5ULL, 0x30bb6f267fa73b36ULL, + 0x4ac29f2a07bfd00dULL, 0x08327ec1ae55e69eULL, + 0xcf235cfd546bbd2bULL, 0x8dd3bd16fd818bb8ULL, + 0x03f1f96f09fd3cd2ULL, 0x41011884a0170a41ULL, + 0x86103ab85a2951f4ULL, 0xc4e0db53f3c36767ULL, + 0xd8a453a01b3a09b3ULL, 0x9a54b24bb2d03f20ULL, + 0x5d45907748ee6495ULL, 0x1fb5719ce1045206ULL, + 0x919735e51578e56cULL, 0xd367d40ebc92d3ffULL, + 0x1476f63246ac884aULL, 0x568617d9ef46bed9ULL, + 0xe085162ab69d5e3cULL, 0xa275f7c11f7768afULL, + 0x6564d5fde549331aULL, 0x279434164ca30589ULL, + 0xa9b6706fb8dfb2e3ULL, 0xeb46918411358470ULL, + 0x2c57b3b8eb0bdfc5ULL, 0x6ea7525342e1e956ULL, + 0x72e3daa0aa188782ULL, 0x30133b4b03f2b111ULL, + 0xf7021977f9cceaa4ULL, 0xb5f2f89c5026dc37ULL, + 0x3bd0bce5a45a6b5dULL, 0x79205d0e0db05dceULL, + 0xbe317f32f78e067bULL, 0xfcc19ed95e6430e8ULL, + 0x86b86ed5267cdbd3ULL, 0xc4488f3e8f96ed40ULL, + 0x0359ad0275a8b6f5ULL, 0x41a94ce9dc428066ULL, + 0xcf8b0890283e370cULL, 0x8d7be97b81d4019fULL, + 0x4a6acb477bea5a2aULL, 0x089a2aacd2006cb9ULL, + 0x14dea25f3af9026dULL, 0x562e43b4931334feULL, + 0x913f6188692d6f4bULL, 0xd3cf8063c0c759d8ULL, + 0x5dedc41a34bbeeb2ULL, 0x1f1d25f19d51d821ULL, + 0xd80c07cd676f8394ULL, 0x9afce626ce85b507ULL, +}; diff --git a/libbcachefs/six.c b/linux/six.c similarity index 94% rename from libbcachefs/six.c rename to linux/six.c index afa59a4..aceeabb 100644 --- a/libbcachefs/six.c +++ b/linux/six.c @@ -1,11 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include #include #include #include #include #include +#include -#include "six.h" +#ifdef DEBUG +#define EBUG_ON(cond) BUG_ON(cond) +#else +#define EBUG_ON(cond) do {} while (0) +#endif #define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_) #define six_release(l) lock_release(l, 0, _RET_IP_) @@ -401,21 +408,25 @@ bool six_trylock_##type(struct six_lock *lock) \ { \ return __six_trylock_type(lock, SIX_LOCK_##type); \ } \ +EXPORT_SYMBOL_GPL(six_trylock_##type); \ \ bool six_relock_##type(struct six_lock *lock, u32 seq) \ { \ return __six_relock_type(lock, SIX_LOCK_##type, seq); \ } \ +EXPORT_SYMBOL_GPL(six_relock_##type); \ \ void six_lock_##type(struct six_lock *lock) \ { \ __six_lock_type(lock, SIX_LOCK_##type); \ } \ +EXPORT_SYMBOL_GPL(six_lock_##type); \ \ void six_unlock_##type(struct six_lock *lock) \ { \ __six_unlock_type(lock, SIX_LOCK_##type); \ -} +} \ +EXPORT_SYMBOL_GPL(six_unlock_##type); __SIX_LOCK(read) __SIX_LOCK(intent) @@ -429,6 +440,7 @@ bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) { return __six_trylock_type(lock, type); } +EXPORT_SYMBOL_GPL(six_trylock_type); bool six_relock_type(struct six_lock *lock, enum six_lock_type type, unsigned seq) @@ -436,16 +448,19 @@ bool six_relock_type(struct six_lock *lock, enum six_lock_type type, return __six_relock_type(lock, type, seq); } +EXPORT_SYMBOL_GPL(six_relock_type); void six_lock_type(struct six_lock *lock, enum six_lock_type type) { __six_lock_type(lock, type); } +EXPORT_SYMBOL_GPL(six_lock_type); void six_unlock_type(struct six_lock *lock, enum six_lock_type type) { __six_unlock_type(lock, type); } +EXPORT_SYMBOL_GPL(six_unlock_type); #endif @@ -455,6 +470,7 @@ void six_lock_downgrade(struct six_lock *lock) six_lock_increment(lock, SIX_LOCK_read); six_unlock_intent(lock); } +EXPORT_SYMBOL_GPL(six_lock_downgrade); bool six_lock_tryupgrade(struct six_lock *lock) { @@ -481,6 +497,7 @@ bool six_lock_tryupgrade(struct six_lock *lock) return true; } +EXPORT_SYMBOL_GPL(six_lock_tryupgrade); bool six_trylock_convert(struct six_lock *lock, enum six_lock_type from, @@ -498,6 +515,7 @@ bool six_trylock_convert(struct six_lock *lock, return six_lock_tryupgrade(lock); } } +EXPORT_SYMBOL_GPL(six_trylock_convert); /* * Increment read/intent lock count, assuming we already have it read or intent @@ -514,3 +532,4 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type) atomic64_add(l[type].lock_val, &lock->state.counter); } +EXPORT_SYMBOL_GPL(six_lock_increment); -- 2.39.2