From 188b6d0c8ef1c02462a744b176557c27220112c9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Oct 2022 13:25:25 -0400 Subject: [PATCH] Update bcachefs sources to cd779e0cc5 bcachefs: Skip inode unpack/pack in bch2_extent_update() --- .bcachefs_revision | 2 +- cmd_migrate.c | 2 +- include/linux/bug.h | 1 + include/linux/prefetch.h | 3 + include/linux/string.h | 1 + libbcachefs/acl.c | 2 +- libbcachefs/alloc_background.c | 98 ++++++++++------- libbcachefs/alloc_foreground.c | 46 +++++--- libbcachefs/backpointers.c | 10 +- libbcachefs/bbpos.h | 2 + libbcachefs/bcachefs.h | 3 +- libbcachefs/bcachefs_format.h | 75 ++++++++++--- libbcachefs/bkey.c | 108 +----------------- libbcachefs/bkey.h | 99 ++++++++++++++++- libbcachefs/bkey_buf.h | 1 + libbcachefs/bkey_cmp.h | 129 ++++++++++++++++++++++ libbcachefs/bkey_methods.c | 1 + libbcachefs/bkey_sort.c | 3 +- libbcachefs/bset.c | 9 +- libbcachefs/bset.h | 94 ---------------- libbcachefs/btree_cache.c | 97 +++++++++++----- libbcachefs/btree_cache.h | 3 +- libbcachefs/btree_gc.c | 2 +- libbcachefs/btree_iter.c | 106 ++++++++++++------ libbcachefs/btree_iter.h | 30 ++++- libbcachefs/btree_key_cache.c | 41 +++++-- libbcachefs/btree_locking.c | 4 +- libbcachefs/btree_types.h | 13 ++- libbcachefs/btree_update_interior.c | 2 +- libbcachefs/btree_update_leaf.c | 64 ++++++----- libbcachefs/buckets.c | 21 ++-- libbcachefs/buckets.h | 12 +- libbcachefs/checksum.c | 2 +- libbcachefs/compress.c | 2 +- libbcachefs/data_update.c | 2 +- libbcachefs/debug.c | 8 +- libbcachefs/dirent.c | 2 +- libbcachefs/extents.c | 4 +- libbcachefs/fs-common.c | 4 +- libbcachefs/fs-io.c | 8 +- libbcachefs/fs.c | 7 +- libbcachefs/fsck.c | 3 +- libbcachefs/inode.c | 165 ++++++++++++++++++++++++---- libbcachefs/inode.h | 25 ++++- libbcachefs/io.c | 84 ++++++++++---- libbcachefs/journal.c | 12 +- libbcachefs/journal.h | 23 +++- libbcachefs/journal_reclaim.c | 4 +- libbcachefs/journal_sb.c | 4 +- libbcachefs/keylist.c | 1 + libbcachefs/move.c | 4 +- libbcachefs/recovery.c | 11 +- libbcachefs/replicas.h | 1 + libbcachefs/siphash.c | 2 +- libbcachefs/super-io.c | 6 +- libbcachefs/super.c | 37 +++---- libbcachefs/sysfs.c | 17 ++- libbcachefs/util.c | 4 +- linux/string.c | 27 +++++ linux/string_helpers.c | 2 +- 60 files changed, 1033 insertions(+), 522 deletions(-) create mode 100644 libbcachefs/bkey_cmp.h diff --git a/.bcachefs_revision b/.bcachefs_revision index da204b9..e8327f5 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -44be8c1da2e1d4edb23d5dcf3b522971c245c3f6 +cd779e0cc51cb232d17eec4537cb4769af202b5f diff --git a/cmd_migrate.c b/cmd_migrate.c index 9e8ceef..3ba51c0 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -122,7 +122,7 @@ static void update_inode(struct bch_fs *c, struct bkey_inode_buf packed; int ret; - bch2_inode_pack(c, &packed, inode); + bch2_inode_pack(&packed, inode); packed.inode.k.p.snapshot = U32_MAX; ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, NULL, NULL, 0); diff --git a/include/linux/bug.h b/include/linux/bug.h index 957d408..1a10f7e 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -2,6 +2,7 @@ #define __TOOLS_LINUX_BUG_H #include +#include #include #ifdef CONFIG_VALGRIND diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h index 13cb826..b14fbe9 100644 --- a/include/linux/prefetch.h +++ b/include/linux/prefetch.h @@ -4,4 +4,7 @@ #define prefetch(p) \ ({ __maybe_unused typeof(p) __var = (p); }) +#define prefetchw(p) \ + ({ __maybe_unused typeof(p) __var = (p); }) + #endif /* _LINUX_PREFETCH_H */ diff --git a/include/linux/string.h b/include/linux/string.h index b5e00a0..3ceda3a 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -6,6 +6,7 @@ #include /* for size_t */ extern size_t strlcpy(char *dest, const char *src, size_t size); +extern ssize_t strscpy(char *dest, const char *src, size_t count); extern char *strim(char *); extern void memzero_explicit(void *, size_t); int match_string(const char * const *, size_t, const char *); diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 5c6ccf6..9592541 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -173,7 +173,7 @@ bch2_acl_to_xattr(struct btree_trans *trans, bkey_xattr_init(&xattr->k_i); xattr->k.u64s = u64s; xattr->v.x_type = acl_to_xattr_type(type); - xattr->v.x_name_len = 0, + xattr->v.x_name_len = 0; xattr->v.x_val_len = cpu_to_le16(acl_len); acl_header = xattr_val(&xattr->v); diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index d0d7690..796b9f5 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -210,31 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) return ret; } -struct bkey_i_alloc_v4 * -bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, - struct bpos pos) -{ - struct bkey_s_c k; - struct bkey_i_alloc_v4 *a; - int ret; - - bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, - BTREE_ITER_WITH_UPDATES| - BTREE_ITER_CACHED| - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) { - bch2_trans_iter_exit(trans, iter); - return ERR_PTR(ret); - } - - a = bch2_alloc_to_v4_mut(trans, k); - if (IS_ERR(a)) - bch2_trans_iter_exit(trans, iter); - return a; -} - static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) { unsigned i, bytes = offsetof(struct bch_alloc, data); @@ -475,12 +450,13 @@ void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) } } -struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) +static noinline struct bkey_i_alloc_v4 * +__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) { + struct bkey_i_alloc_v4 *ret; unsigned bytes = k.k->type == KEY_TYPE_alloc_v4 ? bkey_bytes(k.k) : sizeof(struct bkey_i_alloc_v4); - struct bkey_i_alloc_v4 *ret; /* * Reserve space for one more backpointer here: @@ -491,20 +467,18 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b return ret; if (k.k->type == KEY_TYPE_alloc_v4) { - bkey_reassemble(&ret->k_i, k); + struct bch_backpointer *src, *dst; - if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) { - struct bch_backpointer *src, *dst; + bkey_reassemble(&ret->k_i, k); - src = alloc_v4_backpointers(&ret->v); - SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s); - dst = alloc_v4_backpointers(&ret->v); + src = alloc_v4_backpointers(&ret->v); + SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s); + dst = alloc_v4_backpointers(&ret->v); - memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) * - sizeof(struct bch_backpointer)); - memset(src, 0, dst - src); - set_alloc_v4_u64s(ret); - } + memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) * + sizeof(struct bch_backpointer)); + memset(src, 0, dst - src); + set_alloc_v4_u64s(ret); } else { bkey_alloc_v4_init(&ret->k_i); ret->k.p = k.k->p; @@ -513,6 +487,54 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b return ret; } +static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k) +{ + if (likely(k.k->type == KEY_TYPE_alloc_v4) && + BCH_ALLOC_V4_BACKPOINTERS_START(bkey_s_c_to_alloc_v4(k).v) == BCH_ALLOC_V4_U64s) { + /* + * Reserve space for one more backpointer here: + * Not sketchy at doing it this way, nope... + */ + struct bkey_i_alloc_v4 *ret = + bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(struct bch_backpointer)); + if (!IS_ERR(ret)) + bkey_reassemble(&ret->k_i, k); + return ret; + } + + return __bch2_alloc_to_v4_mut(trans, k); +} + +struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) +{ + return bch2_alloc_to_v4_mut_inlined(trans, k); +} + +struct bkey_i_alloc_v4 * +bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, + struct bpos pos) +{ + struct bkey_s_c k; + struct bkey_i_alloc_v4 *a; + int ret; + + bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, + BTREE_ITER_WITH_UPDATES| + BTREE_ITER_CACHED| + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) { + bch2_trans_iter_exit(trans, iter); + return ERR_PTR(ret); + } + + a = bch2_alloc_to_v4_mut_inlined(trans, k); + if (IS_ERR(a)) + bch2_trans_iter_exit(trans, iter); + return a; +} + int bch2_alloc_read(struct bch_fs *c) { struct btree_trans trans; diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index e89999c..a9e0c73 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -489,16 +489,16 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, * bch_bucket_alloc - allocate a single bucket from a specific device * * Returns index of bucket on success, 0 on failure - * */ + */ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct bch_dev *ca, enum alloc_reserve reserve, bool may_alloc_partial, - struct closure *cl) + struct closure *cl, + struct bch_dev_usage *usage) { struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; - struct bch_dev_usage usage; bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor; u64 avail; @@ -509,16 +509,16 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, u64 skipped_nouse = 0; bool waiting = false; again: - usage = bch2_dev_usage_read(ca); - avail = dev_buckets_free(ca, usage, reserve); + bch2_dev_usage_read_fast(ca, usage); + avail = dev_buckets_free(ca, *usage, reserve); - if (usage.d[BCH_DATA_need_discard].buckets > avail) + if (usage->d[BCH_DATA_need_discard].buckets > avail) bch2_do_discards(c); - if (usage.d[BCH_DATA_need_gc_gens].buckets > avail) + if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) bch2_do_gc_gens(c); - if (should_invalidate_buckets(ca, usage)) + if (should_invalidate_buckets(ca, *usage)) bch2_do_invalidates(c); if (!avail) { @@ -577,10 +577,10 @@ err: if (!IS_ERR(ob)) trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve], may_alloc_partial, ob->bucket); - else + else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart)) trace_and_count(c, bucket_alloc_fail, ca, bch2_alloc_reserves[reserve], - usage.d[BCH_DATA_free].buckets, + usage->d[BCH_DATA_free].buckets, avail, bch2_copygc_wait_amount(c), c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), @@ -599,11 +599,12 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, bool may_alloc_partial, struct closure *cl) { + struct bch_dev_usage usage; struct open_bucket *ob; bch2_trans_do(c, NULL, NULL, 0, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, - may_alloc_partial, cl))); + may_alloc_partial, cl, &usage))); return ob; } @@ -630,8 +631,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, return ret; } -void bch2_dev_stripe_increment(struct bch_dev *ca, - struct dev_stripe_state *stripe) +static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca, + struct dev_stripe_state *stripe, + struct bch_dev_usage *usage) { u64 *v = stripe->next_alloc + ca->dev_idx; u64 free_space = dev_buckets_available(ca, RESERVE_none); @@ -650,6 +652,15 @@ void bch2_dev_stripe_increment(struct bch_dev *ca, *v = *v < scale ? 0 : *v - scale; } +void bch2_dev_stripe_increment(struct bch_dev *ca, + struct dev_stripe_state *stripe) +{ + struct bch_dev_usage usage; + + bch2_dev_usage_read_fast(ca, &usage); + bch2_dev_stripe_increment_inlined(ca, stripe, &usage); +} + #define BUCKET_MAY_ALLOC_PARTIAL (1 << 0) #define BUCKET_ALLOC_USE_DURABILITY (1 << 1) @@ -694,6 +705,7 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, BUG_ON(*nr_effective >= nr_replicas); for (i = 0; i < devs_sorted.nr; i++) { + struct bch_dev_usage usage; struct open_bucket *ob; dev = devs_sorted.devs[i]; @@ -713,9 +725,9 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, } ob = bch2_bucket_alloc_trans(trans, ca, reserve, - flags & BUCKET_MAY_ALLOC_PARTIAL, cl); + flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage); if (!IS_ERR(ob)) - bch2_dev_stripe_increment(ca, stripe); + bch2_dev_stripe_increment_inlined(ca, stripe, &usage); percpu_ref_put(&ca->ref); if (IS_ERR(ob)) { @@ -1110,7 +1122,7 @@ restart_find_oldest: hlist_add_head_rcu(&wp->node, head); mutex_unlock(&c->write_points_hash_lock); out: - wp->last_used = sched_clock(); + wp->last_used = local_clock(); return wp; } @@ -1356,7 +1368,7 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c) wp < c->write_points + c->write_points_nr; wp++) { writepoint_init(wp, BCH_DATA_user); - wp->last_used = sched_clock(); + wp->last_used = local_clock(); wp->write_point = (unsigned long) wp; hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point)); diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index a537768..d74de1d 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -535,7 +535,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, if (bp.level == c->btree_roots[bp.btree_id].level + 1) k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key); - if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) + if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) return k; bch2_trans_iter_exit(trans, iter); @@ -585,12 +585,12 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, if (IS_ERR(b)) goto err; - if (extent_matches_bp(c, bp.btree_id, bp.level, - bkey_i_to_s_c(&b->key), - bucket, bp)) + if (b && extent_matches_bp(c, bp.btree_id, bp.level, + bkey_i_to_s_c(&b->key), + bucket, bp)) return b; - if (btree_node_will_make_reachable(b)) { + if (b && btree_node_will_make_reachable(b)) { b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); } else { backpointer_not_found(trans, bucket, bp_offset, bp, diff --git a/libbcachefs/bbpos.h b/libbcachefs/bbpos.h index eaf3dbf..1fbed1f 100644 --- a/libbcachefs/bbpos.h +++ b/libbcachefs/bbpos.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_BBPOS_H #define _BCACHEFS_BBPOS_H +#include "bkey_methods.h" + struct bbpos { enum btree_id btree; struct bpos pos; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index ccac2a3..33186fa 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -107,7 +107,7 @@ * * BTREE NODES: * - * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and + * Our unit of allocation is a bucket, and we can't arbitrarily allocate and * free smaller than a bucket - so, that's how big our btree nodes are. * * (If buckets are really big we'll only use part of the bucket for a btree node @@ -930,7 +930,6 @@ struct bch_fs { struct time_stats times[BCH_TIME_STAT_NR]; - const char *btree_transaction_fns[BCH_TRANSACTIONS_NR]; struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; }; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 9e10fc8..bfcb75a 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -336,7 +336,7 @@ static inline void bkey_init(struct bkey *k) * number. * * - WHITEOUT: for hash table btrees -*/ + */ #define BCH_BKEY_TYPES() \ x(deleted, 0) \ x(whiteout, 1) \ @@ -366,7 +366,8 @@ static inline void bkey_init(struct bkey *k) x(set, 25) \ x(lru, 26) \ x(alloc_v4, 27) \ - x(backpointer, 28) + x(backpointer, 28) \ + x(inode_v3, 29) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -717,6 +718,21 @@ struct bch_inode_v2 { __u8 fields[0]; } __attribute__((packed, aligned(8))); +struct bch_inode_v3 { + struct bch_val v; + + __le64 bi_journal_seq; + __le64 bi_hash_seed; + __le64 bi_flags; + __le64 bi_sectors; + __le64 bi_size; + __le64 bi_version; + __u8 fields[0]; +} __attribute__((packed, aligned(8))); + +#define INODEv3_FIELDS_START_INITIAL 6 +#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64)) + struct bch_inode_generation { struct bch_val v; @@ -728,7 +744,7 @@ struct bch_inode_generation { * bi_subvol and bi_parent_subvol are only set for subvolume roots: */ -#define BCH_INODE_FIELDS() \ +#define BCH_INODE_FIELDS_v2() \ x(bi_atime, 96) \ x(bi_ctime, 96) \ x(bi_mtime, 96) \ @@ -755,6 +771,31 @@ struct bch_inode_generation { x(bi_subvol, 32) \ x(bi_parent_subvol, 32) +#define BCH_INODE_FIELDS_v3() \ + x(bi_atime, 96) \ + x(bi_ctime, 96) \ + x(bi_mtime, 96) \ + x(bi_otime, 96) \ + x(bi_uid, 32) \ + x(bi_gid, 32) \ + x(bi_nlink, 32) \ + x(bi_generation, 32) \ + x(bi_dev, 32) \ + x(bi_data_checksum, 8) \ + x(bi_compression, 8) \ + x(bi_project, 32) \ + x(bi_background_compression, 8) \ + x(bi_data_replicas, 8) \ + x(bi_promote_target, 16) \ + x(bi_foreground_target, 16) \ + x(bi_background_target, 16) \ + x(bi_erasure_code, 16) \ + x(bi_fields_set, 16) \ + x(bi_dir, 64) \ + x(bi_dir_offset, 64) \ + x(bi_subvol, 32) \ + x(bi_parent_subvol, 32) + /* subset of BCH_INODE_FIELDS */ #define BCH_INODE_OPTS() \ x(data_checksum, 8) \ @@ -780,16 +821,16 @@ enum { * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL * flags) */ - __BCH_INODE_SYNC = 0, - __BCH_INODE_IMMUTABLE = 1, - __BCH_INODE_APPEND = 2, - __BCH_INODE_NODUMP = 3, - __BCH_INODE_NOATIME = 4, + __BCH_INODE_SYNC = 0, + __BCH_INODE_IMMUTABLE = 1, + __BCH_INODE_APPEND = 2, + __BCH_INODE_NODUMP = 3, + __BCH_INODE_NOATIME = 4, - __BCH_INODE_I_SIZE_DIRTY= 5, - __BCH_INODE_I_SECTORS_DIRTY= 6, - __BCH_INODE_UNLINKED = 7, - __BCH_INODE_BACKPTR_UNTRUSTED = 8, + __BCH_INODE_I_SIZE_DIRTY = 5, + __BCH_INODE_I_SECTORS_DIRTY = 6, + __BCH_INODE_UNLINKED = 7, + __BCH_INODE_BACKPTR_UNTRUSTED = 8, /* bits 20+ reserved for packed fields below: */ }; @@ -811,6 +852,13 @@ LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); +LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24); +LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31); + +LE64_BITMASK(INODEv3_FIELDS_START, + struct bch_inode_v3, bi_flags, 31, 36); +LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); + /* Dirents */ /* @@ -1494,7 +1542,8 @@ struct bch_sb_field_journal_seq_blacklist { x(freespace, 19) \ x(alloc_v4, 20) \ x(new_data_types, 21) \ - x(backpointers, 22) + x(backpointers, 22) \ + x(inode_v3, 23) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c index d348175..f7e5d0c 100644 --- a/libbcachefs/bkey.c +++ b/libbcachefs/bkey.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "bkey.h" +#include "bkey_cmp.h" #include "bkey_methods.h" #include "bset.h" #include "util.h" @@ -763,50 +764,6 @@ unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k) #ifdef CONFIG_X86_64 -static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, - unsigned nr_key_bits) -{ - long d0, d1, d2, d3; - int cmp; - - /* we shouldn't need asm for this, but gcc is being retarded: */ - - asm(".intel_syntax noprefix;" - "xor eax, eax;" - "xor edx, edx;" - "1:;" - "mov r8, [rdi];" - "mov r9, [rsi];" - "sub ecx, 64;" - "jl 2f;" - - "cmp r8, r9;" - "jnz 3f;" - - "lea rdi, [rdi - 8];" - "lea rsi, [rsi - 8];" - "jmp 1b;" - - "2:;" - "not ecx;" - "shr r8, 1;" - "shr r9, 1;" - "shr r8, cl;" - "shr r9, cl;" - "cmp r8, r9;" - - "3:\n" - "seta al;" - "setb dl;" - "sub eax, edx;" - ".att_syntax prefix;" - : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp) - : "0" (l), "1" (r), "3" (nr_key_bits) - : "r8", "r9", "cc", "memory"); - - return cmp; -} - #define I(_x) (*(out)++ = (_x)) #define I1(i0) I(i0) #define I2(i0, i1) (I1(i0), I(i1)) @@ -1037,40 +994,6 @@ int bch2_compile_bkey_format(const struct bkey_format *format, void *_out) } #else -static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, - unsigned nr_key_bits) -{ - u64 l_v, r_v; - - if (!nr_key_bits) - return 0; - - /* for big endian, skip past header */ - nr_key_bits += high_bit_offset; - l_v = *l & (~0ULL >> high_bit_offset); - r_v = *r & (~0ULL >> high_bit_offset); - - while (1) { - if (nr_key_bits < 64) { - l_v >>= 64 - nr_key_bits; - r_v >>= 64 - nr_key_bits; - nr_key_bits = 0; - } else { - nr_key_bits -= 64; - } - - if (!nr_key_bits || l_v != r_v) - break; - - l = next_word(l); - r = next_word(r); - - l_v = *l; - r_v = *r; - } - - return cmp_int(l_v, r_v); -} #endif __pure @@ -1078,19 +1001,7 @@ int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l, const struct bkey_packed *r, const struct btree *b) { - const struct bkey_format *f = &b->format; - int ret; - - EBUG_ON(!bkey_packed(l) || !bkey_packed(r)); - EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); - - ret = __bkey_cmp_bits(high_word(f, l), - high_word(f, r), - b->nr_key_bits); - - EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l), - bkey_unpack_pos(b, r))); - return ret; + return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b); } __pure __flatten @@ -1106,20 +1017,7 @@ int bch2_bkey_cmp_packed(const struct btree *b, const struct bkey_packed *l, const struct bkey_packed *r) { - struct bkey unpacked; - - if (likely(bkey_packed(l) && bkey_packed(r))) - return __bch2_bkey_cmp_packed_format_checked(l, r, b); - - if (bkey_packed(l)) { - __bkey_unpack_key_format_checked(b, &unpacked, l); - l = (void*) &unpacked; - } else if (bkey_packed(r)) { - __bkey_unpack_key_format_checked(b, &unpacked, r); - r = (void*) &unpacked; - } - - return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p); + return bch2_bkey_cmp_packed_inlined(b, l, r); } __pure __flatten diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index df9fb85..19b59ff 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -5,6 +5,7 @@ #include #include "bcachefs_format.h" +#include "btree_types.h" #include "util.h" #include "vstructs.h" @@ -134,8 +135,9 @@ int bkey_cmp_left_packed(const struct btree *b, } /* - * we prefer to pass bpos by ref, but it's often enough terribly convenient to - * pass it by by val... as much as I hate c++, const ref would be nice here: + * The compiler generates better code when we pass bpos by ref, but it's often + * enough terribly convenient to pass it by val... as much as I hate c++, const + * ref would be nice here: */ __pure __flatten static inline int bkey_cmp_left_packed_byval(const struct btree *b, @@ -356,6 +358,99 @@ void bch2_bkey_unpack(const struct btree *, struct bkey_i *, bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *, const struct bkey_format *); +typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *); + +static inline void +__bkey_unpack_key_format_checked(const struct btree *b, + struct bkey *dst, + const struct bkey_packed *src) +{ + if (IS_ENABLED(HAVE_BCACHEFS_COMPILED_UNPACK)) { + compiled_unpack_fn unpack_fn = b->aux_data; + unpack_fn(dst, src); + + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && + bch2_expensive_debug_checks) { + struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src); + + BUG_ON(memcmp(dst, &dst2, sizeof(*dst))); + } + } else { + *dst = __bch2_bkey_unpack_key(&b->format, src); + } +} + +static inline struct bkey +bkey_unpack_key_format_checked(const struct btree *b, + const struct bkey_packed *src) +{ + struct bkey dst; + + __bkey_unpack_key_format_checked(b, &dst, src); + return dst; +} + +static inline void __bkey_unpack_key(const struct btree *b, + struct bkey *dst, + const struct bkey_packed *src) +{ + if (likely(bkey_packed(src))) + __bkey_unpack_key_format_checked(b, dst, src); + else + *dst = *packed_to_bkey_c(src); +} + +/** + * bkey_unpack_key -- unpack just the key, not the value + */ +static inline struct bkey bkey_unpack_key(const struct btree *b, + const struct bkey_packed *src) +{ + return likely(bkey_packed(src)) + ? bkey_unpack_key_format_checked(b, src) + : *packed_to_bkey_c(src); +} + +static inline struct bpos +bkey_unpack_pos_format_checked(const struct btree *b, + const struct bkey_packed *src) +{ +#ifdef HAVE_BCACHEFS_COMPILED_UNPACK + return bkey_unpack_key_format_checked(b, src).p; +#else + return __bkey_unpack_pos(&b->format, src); +#endif +} + +static inline struct bpos bkey_unpack_pos(const struct btree *b, + const struct bkey_packed *src) +{ + return likely(bkey_packed(src)) + ? bkey_unpack_pos_format_checked(b, src) + : packed_to_bkey_c(src)->p; +} + +/* Disassembled bkeys */ + +static inline struct bkey_s_c bkey_disassemble(struct btree *b, + const struct bkey_packed *k, + struct bkey *u) +{ + __bkey_unpack_key(b, u, k); + + return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), }; +} + +/* non const version: */ +static inline struct bkey_s __bkey_disassemble(struct btree *b, + struct bkey_packed *k, + struct bkey *u) +{ + __bkey_unpack_key(b, u, k); + + return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), }; +} + static inline u64 bkey_field_max(const struct bkey_format *f, enum bch_bkey_fields nr) { diff --git a/libbcachefs/bkey_buf.h b/libbcachefs/bkey_buf.h index 0d7c67a..a30c4ae 100644 --- a/libbcachefs/bkey_buf.h +++ b/libbcachefs/bkey_buf.h @@ -3,6 +3,7 @@ #define _BCACHEFS_BKEY_BUF_H #include "bcachefs.h" +#include "bkey.h" struct bkey_buf { struct bkey_i *k; diff --git a/libbcachefs/bkey_cmp.h b/libbcachefs/bkey_cmp.h new file mode 100644 index 0000000..5f42a6e --- /dev/null +++ b/libbcachefs/bkey_cmp.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BKEY_CMP_H +#define _BCACHEFS_BKEY_CMP_H + +#include "bkey.h" + +#ifdef CONFIG_X86_64 +static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, + unsigned nr_key_bits) +{ + long d0, d1, d2, d3; + int cmp; + + /* we shouldn't need asm for this, but gcc is being retarded: */ + + asm(".intel_syntax noprefix;" + "xor eax, eax;" + "xor edx, edx;" + "1:;" + "mov r8, [rdi];" + "mov r9, [rsi];" + "sub ecx, 64;" + "jl 2f;" + + "cmp r8, r9;" + "jnz 3f;" + + "lea rdi, [rdi - 8];" + "lea rsi, [rsi - 8];" + "jmp 1b;" + + "2:;" + "not ecx;" + "shr r8, 1;" + "shr r9, 1;" + "shr r8, cl;" + "shr r9, cl;" + "cmp r8, r9;" + + "3:\n" + "seta al;" + "setb dl;" + "sub eax, edx;" + ".att_syntax prefix;" + : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp) + : "0" (l), "1" (r), "3" (nr_key_bits) + : "r8", "r9", "cc", "memory"); + + return cmp; +} +#else +static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, + unsigned nr_key_bits) +{ + u64 l_v, r_v; + + if (!nr_key_bits) + return 0; + + /* for big endian, skip past header */ + nr_key_bits += high_bit_offset; + l_v = *l & (~0ULL >> high_bit_offset); + r_v = *r & (~0ULL >> high_bit_offset); + + while (1) { + if (nr_key_bits < 64) { + l_v >>= 64 - nr_key_bits; + r_v >>= 64 - nr_key_bits; + nr_key_bits = 0; + } else { + nr_key_bits -= 64; + } + + if (!nr_key_bits || l_v != r_v) + break; + + l = next_word(l); + r = next_word(r); + + l_v = *l; + r_v = *r; + } + + return cmp_int(l_v, r_v); +} +#endif + +static inline __pure __flatten +int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l, + const struct bkey_packed *r, + const struct btree *b) +{ + const struct bkey_format *f = &b->format; + int ret; + + EBUG_ON(!bkey_packed(l) || !bkey_packed(r)); + EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); + + ret = __bkey_cmp_bits(high_word(f, l), + high_word(f, r), + b->nr_key_bits); + + EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l), + bkey_unpack_pos(b, r))); + return ret; +} + +static inline __pure __flatten +int bch2_bkey_cmp_packed_inlined(const struct btree *b, + const struct bkey_packed *l, + const struct bkey_packed *r) +{ + struct bkey unpacked; + + if (likely(bkey_packed(l) && bkey_packed(r))) + return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b); + + if (bkey_packed(l)) { + __bkey_unpack_key_format_checked(b, &unpacked, l); + l = (void *) &unpacked; + } else if (bkey_packed(r)) { + __bkey_unpack_key_format_checked(b, &unpacked, r); + r = (void *) &unpacked; + } + + return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p); +} + +#endif /* _BCACHEFS_BKEY_CMP_H */ diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index e0cbac8..14d910a 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -149,6 +149,7 @@ static unsigned bch2_key_types_allowed[] = { (1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_inode)| (1U << KEY_TYPE_inode_v2)| + (1U << KEY_TYPE_inode_v3)| (1U << KEY_TYPE_inode_generation), [BKEY_TYPE_dirents] = (1U << KEY_TYPE_deleted)| diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c index b1385a7..8518054 100644 --- a/libbcachefs/bkey_sort.c +++ b/libbcachefs/bkey_sort.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "bkey_buf.h" +#include "bkey_cmp.h" #include "bkey_sort.h" #include "bset.h" #include "extents.h" @@ -155,7 +156,7 @@ static inline int sort_keys_cmp(struct btree *b, struct bkey_packed *l, struct bkey_packed *r) { - return bch2_bkey_cmp_packed(b, l, r) ?: + return bch2_bkey_cmp_packed_inlined(b, l, r) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) ?: (int) l->needs_whiteout - (int) r->needs_whiteout; } diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index fa60ef8..0942353 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -965,7 +965,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b, t->size -= j - l; for (j = l; j < t->size; j++) - rw_aux_tree(b, t)[j].offset += shift; + rw_aux_tree(b, t)[j].offset += shift; EBUG_ON(l < t->size && rw_aux_tree(b, t)[l].offset == @@ -1266,7 +1266,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter, bch2_btree_node_iter_sort(iter, b); } -noinline __flatten __attribute__((cold)) +noinline __flatten __cold static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, struct btree *b, struct bpos *search) { @@ -1441,7 +1441,10 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, EBUG_ON(iter->data->k > iter->data->end); if (unlikely(__btree_node_iter_set_end(iter, 0))) { - bch2_btree_node_iter_set_drop(iter, iter->data); + /* avoid an expensive memmove call: */ + iter->data[0] = iter->data[1]; + iter->data[1] = iter->data[2]; + iter->data[2] = (struct btree_node_iter_set) { 0, 0 }; return; } diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h index 0d46534..72e6376 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/bset.h @@ -205,100 +205,6 @@ static inline size_t btree_aux_data_u64s(const struct btree *b) return btree_aux_data_bytes(b) / sizeof(u64); } -typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *); - -static inline void -__bkey_unpack_key_format_checked(const struct btree *b, - struct bkey *dst, - const struct bkey_packed *src) -{ -#ifdef HAVE_BCACHEFS_COMPILED_UNPACK - { - compiled_unpack_fn unpack_fn = b->aux_data; - unpack_fn(dst, src); - - if (bch2_expensive_debug_checks) { - struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src); - - BUG_ON(memcmp(dst, &dst2, sizeof(*dst))); - } - } -#else - *dst = __bch2_bkey_unpack_key(&b->format, src); -#endif -} - -static inline struct bkey -bkey_unpack_key_format_checked(const struct btree *b, - const struct bkey_packed *src) -{ - struct bkey dst; - - __bkey_unpack_key_format_checked(b, &dst, src); - return dst; -} - -static inline void __bkey_unpack_key(const struct btree *b, - struct bkey *dst, - const struct bkey_packed *src) -{ - if (likely(bkey_packed(src))) - __bkey_unpack_key_format_checked(b, dst, src); - else - *dst = *packed_to_bkey_c(src); -} - -/** - * bkey_unpack_key -- unpack just the key, not the value - */ -static inline struct bkey bkey_unpack_key(const struct btree *b, - const struct bkey_packed *src) -{ - return likely(bkey_packed(src)) - ? bkey_unpack_key_format_checked(b, src) - : *packed_to_bkey_c(src); -} - -static inline struct bpos -bkey_unpack_pos_format_checked(const struct btree *b, - const struct bkey_packed *src) -{ -#ifdef HAVE_BCACHEFS_COMPILED_UNPACK - return bkey_unpack_key_format_checked(b, src).p; -#else - return __bkey_unpack_pos(&b->format, src); -#endif -} - -static inline struct bpos bkey_unpack_pos(const struct btree *b, - const struct bkey_packed *src) -{ - return likely(bkey_packed(src)) - ? bkey_unpack_pos_format_checked(b, src) - : packed_to_bkey_c(src)->p; -} - -/* Disassembled bkeys */ - -static inline struct bkey_s_c bkey_disassemble(struct btree *b, - const struct bkey_packed *k, - struct bkey *u) -{ - __bkey_unpack_key(b, u, k); - - return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), }; -} - -/* non const version: */ -static inline struct bkey_s __bkey_disassemble(struct btree *b, - struct bkey_packed *k, - struct bkey *u) -{ - __bkey_unpack_key(b, u, k); - - return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), }; -} - #define for_each_bset(_b, _t) \ for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++) diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index f84b508..8dd2db4 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -14,6 +14,12 @@ #include #include +#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \ +do { \ + if (shrinker_counter) \ + bc->not_freed_##counter++; \ +} while (0) + const char * const bch2_btree_node_flags[] = { #define x(f) #f, BTREE_FLAGS() @@ -175,7 +181,7 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, mutex_lock(&bc->lock); ret = __bch2_btree_node_hash_insert(bc, b); if (!ret) - list_add(&b->list, &bc->live); + list_add_tail(&b->list, &bc->live); mutex_unlock(&bc->lock); return ret; @@ -194,7 +200,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc, * this version is for btree nodes that have already been freed (we're not * reaping a real btree node) */ -static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) +static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter) { struct btree_cache *bc = &c->btree_cache; int ret = 0; @@ -204,38 +210,64 @@ wait_on_io: if (b->flags & ((1U << BTREE_NODE_dirty)| (1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { - if (!flush) + if (!flush) { + if (btree_node_dirty(b)) + BTREE_CACHE_NOT_FREED_INCREMENT(dirty); + else if (btree_node_read_in_flight(b)) + BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); + else if (btree_node_write_in_flight(b)) + BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); return -ENOMEM; + } /* XXX: waiting on IO with btree cache lock held */ bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_write(b); } - if (!six_trylock_intent(&b->c.lock)) + if (!six_trylock_intent(&b->c.lock)) { + BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent); return -ENOMEM; + } - if (!six_trylock_write(&b->c.lock)) + if (!six_trylock_write(&b->c.lock)) { + BTREE_CACHE_NOT_FREED_INCREMENT(lock_write); goto out_unlock_intent; + } /* recheck under lock */ if (b->flags & ((1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { - if (!flush) + if (!flush) { + if (btree_node_read_in_flight(b)) + BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); + else if (btree_node_write_in_flight(b)) + BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); goto out_unlock; + } six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } - if (btree_node_noevict(b) || - btree_node_write_blocked(b) || - btree_node_will_make_reachable(b)) + if (btree_node_noevict(b)) { + BTREE_CACHE_NOT_FREED_INCREMENT(noevict); + goto out_unlock; + } + if (btree_node_write_blocked(b)) { + BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked); goto out_unlock; + } + if (btree_node_will_make_reachable(b)) { + BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable); + goto out_unlock; + } if (btree_node_dirty(b)) { - if (!flush) + if (!flush) { + BTREE_CACHE_NOT_FREED_INCREMENT(dirty); goto out_unlock; + } /* * Using the underscore version because we don't want to compact * bsets after the write, since this node is about to be evicted @@ -263,14 +295,14 @@ out_unlock_intent: goto out; } -static int btree_node_reclaim(struct bch_fs *c, struct btree *b) +static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter) { - return __btree_node_reclaim(c, b, false); + return __btree_node_reclaim(c, b, false, shrinker_counter); } static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) { - return __btree_node_reclaim(c, b, true); + return __btree_node_reclaim(c, b, true, false); } static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, @@ -319,11 +351,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, if (touched >= nr) goto out; - if (!btree_node_reclaim(c, b)) { + if (!btree_node_reclaim(c, b, true)) { btree_node_data_free(c, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); freed++; + bc->freed++; } } restart: @@ -332,9 +365,11 @@ restart: if (btree_node_accessed(b)) { clear_btree_node_accessed(b); - } else if (!btree_node_reclaim(c, b)) { + bc->not_freed_access_bit++; + } else if (!btree_node_reclaim(c, b, true)) { freed++; btree_node_data_free(c, b); + bc->freed++; bch2_btree_node_hash_remove(bc, b); six_unlock_write(&b->c.lock); @@ -390,7 +425,7 @@ static void bch2_btree_cache_shrinker_to_text(struct printbuf *out, struct shrin struct bch_fs *c = container_of(shrink, struct bch_fs, btree_cache.shrink); - bch2_btree_cache_to_text(out, c); + bch2_btree_cache_to_text(out, &c->btree_cache); } void bch2_fs_btree_cache_exit(struct bch_fs *c) @@ -548,7 +583,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) struct btree *b; list_for_each_entry_reverse(b, &bc->live, list) - if (!btree_node_reclaim(c, b)) + if (!btree_node_reclaim(c, b, false)) return b; while (1) { @@ -583,7 +618,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks) * disk node. Check the freed list before allocating a new one: */ list_for_each_entry(b, freed, list) - if (!btree_node_reclaim(c, b)) { + if (!btree_node_reclaim(c, b, false)) { list_del_init(&b->list); goto got_node; } @@ -609,7 +644,7 @@ got_node: * the list. Check if there's any freed nodes there: */ list_for_each_entry(b2, &bc->freeable, list) - if (!btree_node_reclaim(c, b2)) { + if (!btree_node_reclaim(c, b2, false)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); btree_node_to_freedlist(bc, b2); @@ -830,7 +865,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * if (likely(c->opts.btree_node_mem_ptr_optimization && b && b->hash_val == btree_ptr_hash_val(k))) - goto lock_node; + goto lock_node; retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { @@ -1070,7 +1105,7 @@ wait_on_io: /* XXX we're called from btree_gc which will be holding other btree * nodes locked - * */ + */ __bch2_btree_node_wait_on_read(b); __bch2_btree_node_wait_on_write(b); @@ -1141,9 +1176,21 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, stats.failed); } -void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c) +void bch2_btree_cache_to_text(struct printbuf *out, struct btree_cache *bc) { - prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used); - prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty)); - prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock); + prt_printf(out, "nr nodes:\t\t%u\n", bc->used); + prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty)); + prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); + + prt_printf(out, "freed:\t\t\t\t%u\n", bc->freed); + prt_printf(out, "not freed, dirty:\t\t%u\n", bc->not_freed_dirty); + prt_printf(out, "not freed, write in flight:\t%u\n", bc->not_freed_write_in_flight); + prt_printf(out, "not freed, read in flight:\t%u\n", bc->not_freed_read_in_flight); + prt_printf(out, "not freed, lock intent failed:\t%u\n", bc->not_freed_lock_intent); + prt_printf(out, "not freed, lock write failed:\t%u\n", bc->not_freed_lock_write); + prt_printf(out, "not freed, access bit:\t\t%u\n", bc->not_freed_access_bit); + prt_printf(out, "not freed, no evict failed:\t%u\n", bc->not_freed_noevict); + prt_printf(out, "not freed, write blocked:\t%u\n", bc->not_freed_write_blocked); + prt_printf(out, "not freed, will make reachable:\t%u\n", bc->not_freed_will_make_reachable); + } diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index a4df3e8..b623c70 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -4,6 +4,7 @@ #include "bcachefs.h" #include "btree_types.h" +#include "bkey_methods.h" extern const char * const bch2_btree_node_flags[]; @@ -100,6 +101,6 @@ static inline unsigned btree_blocks(struct bch_fs *c) void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, struct btree *); -void bch2_btree_cache_to_text(struct printbuf *, struct bch_fs *); +void bch2_btree_cache_to_text(struct printbuf *, struct btree_cache *); #endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index a4d6998..801a09f 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -318,7 +318,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " node %s", bch2_btree_ids[b->c.btree_id], b->c.level, buf1.buf, buf2.buf)) - ret = set_node_min(c, cur, expected_start); + ret = set_node_min(c, cur, expected_start); } out: fsck_err: diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 2aa5655..d18346a 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -22,6 +22,8 @@ static void btree_trans_verify_sorted(struct btree_trans *); inline void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int); +static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *, + struct btree_path *, int); static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, @@ -1004,14 +1006,9 @@ err: return ret; } -static inline bool btree_path_good_node(struct btree_trans *trans, - struct btree_path *path, - unsigned l, int check_pos) +static inline bool btree_path_check_pos_in_node(struct btree_path *path, + unsigned l, int check_pos) { - if (!is_btree_node(path, l) || - !bch2_btree_node_relock(trans, path, l)) - return false; - if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b)) return false; if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b)) @@ -1019,6 +1016,15 @@ static inline bool btree_path_good_node(struct btree_trans *trans, return true; } +static inline bool btree_path_good_node(struct btree_trans *trans, + struct btree_path *path, + unsigned l, int check_pos) +{ + return is_btree_node(path, l) && + bch2_btree_node_relock(trans, path, l) && + btree_path_check_pos_in_node(path, l, check_pos); +} + static void btree_path_set_level_down(struct btree_trans *trans, struct btree_path *path, unsigned new_level) @@ -1035,9 +1041,9 @@ static void btree_path_set_level_down(struct btree_trans *trans, bch2_btree_path_verify(trans, path); } -static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, - struct btree_path *path, - int check_pos) +static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans, + struct btree_path *path, + int check_pos) { unsigned i, l = path->level; again: @@ -1058,6 +1064,16 @@ again: return l; } +static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, + struct btree_path *path, + int check_pos) +{ + return likely(btree_node_locked(path, path->level) && + btree_path_check_pos_in_node(path, path->level, check_pos)) + ? path->level + : __btree_path_up_until_good_node(trans, path, check_pos); +} + /* * This is the main state machine for walking down the btree - walks down to a * specified depth @@ -1158,17 +1174,21 @@ static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, struct btree_path *src) { unsigned i, offset = offsetof(struct btree_path, pos); + int cmp = btree_path_cmp(dst, src); memcpy((void *) dst + offset, (void *) src + offset, sizeof(struct btree_path) - offset); - for (i = 0; i < BTREE_MAX_DEPTH; i++) - if (btree_node_locked(dst, i)) - six_lock_increment(&dst->l[i].b->c.lock, - __btree_lock_want(dst, i)); + for (i = 0; i < BTREE_MAX_DEPTH; i++) { + unsigned t = btree_node_locked_type(dst, i); - bch2_btree_path_check_sort(trans, dst, 0); + if (t != BTREE_NODE_UNLOCKED) + six_lock_increment(&dst->l[i].b->c.lock, t); + } + + if (cmp) + bch2_btree_path_check_sort_fast(trans, dst, cmp); } static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, @@ -1181,8 +1201,7 @@ static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btr return new; } -inline struct btree_path * __must_check -bch2_btree_path_make_mut(struct btree_trans *trans, +struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans, struct btree_path *path, bool intent, unsigned long ip) { @@ -1218,7 +1237,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans, path->pos = new_pos; - bch2_btree_path_check_sort(trans, path, cmp); + bch2_btree_path_check_sort_fast(trans, path, cmp); if (unlikely(path->cached)) { btree_node_unlock(trans, path, 0); @@ -1242,7 +1261,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans, __btree_path_level_init(path, l); } - if (l != path->level) { + if (unlikely(l != path->level)) { btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); __bch2_btree_path_unlock(trans, path); } @@ -2518,6 +2537,25 @@ static inline void btree_path_swap(struct btree_trans *trans, btree_path_verify_sorted_ref(trans, r); } +static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *trans, + struct btree_path *path, + int cmp) +{ + struct btree_path *n; + int cmp2; + + EBUG_ON(!cmp); + + while ((n = cmp < 0 + ? prev_btree_path(trans, path) + : next_btree_path(trans, path)) && + (cmp2 = btree_path_cmp(n, path)) && + cmp2 != cmp) + btree_path_swap(trans, n, path); + + btree_trans_verify_sorted(trans); +} + inline void bch2_btree_path_check_sort(struct btree_trans *trans, struct btree_path *path, int cmp) { @@ -2612,7 +2650,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans, unsigned flags, unsigned long ip) { - if (trans->restarted) + if (unlikely(trans->restarted)) panic("bch2_trans_iter_init(): in transaction restart, %s by %pS\n", bch2_err_str(trans->restarted), (void *) trans->last_restarted_ip); @@ -2632,7 +2670,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans, btree_type_has_snapshots(btree_id)) flags |= BTREE_ITER_FILTER_SNAPSHOTS; - if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags)) + if (trans->journal_replay_not_finished) flags |= BTREE_ITER_WITH_JOURNAL; iter->trans = trans; @@ -2816,7 +2854,7 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) BUG_ON(trans->used_mempool); #ifdef __KERNEL__ - p = this_cpu_xchg(c->btree_paths_bufs->path , NULL); + p = this_cpu_xchg(c->btree_paths_bufs->path, NULL); #endif if (!p) p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); @@ -2825,15 +2863,16 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) trans->updates = p; p += updates_bytes; } -static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c, - const char *fn) +const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; + +unsigned bch2_trans_get_fn_idx(const char *fn) { unsigned i; - for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++) - if (!c->btree_transaction_fns[i] || - c->btree_transaction_fns[i] == fn) { - c->btree_transaction_fns[i] = fn; + for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++) + if (!bch2_btree_transaction_fns[i] || + bch2_btree_transaction_fns[i] == fn) { + bch2_btree_transaction_fns[i] = fn; return i; } @@ -2841,7 +2880,7 @@ static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct b return i; } -void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *fn) +void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_idx) __acquires(&c->btree_trans_barrier) { struct btree_transaction_stats *s; @@ -2851,10 +2890,13 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char * memset(trans, 0, sizeof(*trans)); trans->c = c; - trans->fn = fn; + trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns) + ? bch2_btree_transaction_fns[fn_idx] : NULL; trans->last_begin_time = local_clock(); - trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn); + trans->fn_idx = fn_idx; trans->locking_wait.task = current; + trans->journal_replay_not_finished = + !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags); closure_init_stack(&trans->ref); bch2_trans_alloc_paths(trans, c); @@ -2979,7 +3021,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, rcu_read_lock(); owner = READ_ONCE(b->lock.owner); - pid = owner ? owner->pid : 0;; + pid = owner ? owner->pid : 0; rcu_read_unlock(); prt_tab(out); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 910f6d7..0775cfa 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -131,9 +131,20 @@ __trans_next_path_with_node(struct btree_trans *trans, struct btree *b, _path = __trans_next_path_with_node((_trans), (_b), \ (_path)->idx + 1)) -struct btree_path * __must_check -bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *, +struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *, bool, unsigned long); + +static inline struct btree_path * __must_check +bch2_btree_path_make_mut(struct btree_trans *trans, + struct btree_path *path, bool intent, + unsigned long ip) +{ + if (path->ref > 1 || path->preserve) + path = __bch2_btree_path_make_mut(trans, path, intent, ip); + path->should_be_locked = false; + return path; +} + struct btree_path * __must_check bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *, struct bpos, bool, unsigned long); @@ -551,10 +562,21 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); -void __bch2_trans_init(struct btree_trans *, struct bch_fs *, const char *); +void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned); void bch2_trans_exit(struct btree_trans *); -#define bch2_trans_init(_trans, _c, _nr_iters, _mem) __bch2_trans_init(_trans, _c, __func__) +extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; +unsigned bch2_trans_get_fn_idx(const char *); + +#define bch2_trans_init(_trans, _c, _nr_iters, _mem) \ +do { \ + static unsigned trans_fn_idx; \ + \ + if (unlikely(!trans_fn_idx)) \ + trans_fn_idx = bch2_trans_get_fn_idx(__func__); \ + \ + __bch2_trans_init(_trans, _c, trans_fn_idx); \ +} while (0) void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index b8ed25b..cd52dd5 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "btree_cache.h" @@ -103,6 +104,22 @@ static void bkey_cached_free(struct btree_key_cache *bc, six_unlock_intent(&ck->c.lock); } +static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc, + struct bkey_cached *ck) +{ + struct bkey_cached *pos; + + list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) { + if (ULONG_CMP_GE(ck->btree_trans_barrier_seq, + pos->btree_trans_barrier_seq)) { + list_move(&ck->list, &pos->list); + return; + } + } + + list_move(&ck->list, &bc->freed_nonpcpu); +} + static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, struct bkey_cached *ck) { @@ -130,11 +147,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, while (f->nr > ARRAY_SIZE(f->objs) / 2) { struct bkey_cached *ck2 = f->objs[--f->nr]; - list_move_tail(&ck2->list, &bc->freed_nonpcpu); + __bkey_cached_move_to_freelist_ordered(bc, ck2); } preempt_enable(); - list_move_tail(&ck->list, &bc->freed_nonpcpu); + __bkey_cached_move_to_freelist_ordered(bc, ck); mutex_unlock(&bc->lock); } #else @@ -295,7 +312,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) bool was_new = true; ck = bkey_cached_alloc(trans, path); - if (unlikely(IS_ERR(ck))) + if (IS_ERR(ck)) return ck; if (unlikely(!ck)) { @@ -416,7 +433,7 @@ err: return ret; } -noinline static int +static noinline int bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, unsigned flags) { @@ -597,7 +614,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, * Since journal reclaim depends on us making progress here, and the * allocator/copygc depend on journal reclaim making progress, we need * to be using alloc reserves: - * */ + */ ret = bch2_btree_iter_traverse(&b_iter) ?: bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_KEY_CACHE_RECLAIM| @@ -982,7 +999,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) bc->table_init_done = true; - bc->shrink.seeks = 1; + bc->shrink.seeks = 0; bc->shrink.count_objects = bch2_btree_key_cache_count; bc->shrink.scan_objects = bch2_btree_key_cache_scan; bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text; @@ -991,15 +1008,17 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) { - prt_printf(out, "nr_freed:\t%zu\n", atomic_long_read(&c->nr_freed)); - prt_printf(out, "nr_keys:\t%lu\n", atomic_long_read(&c->nr_keys)); - prt_printf(out, "nr_dirty:\t%lu\n", atomic_long_read(&c->nr_dirty)); + prt_printf(out, "nr_freed:\t%zu", atomic_long_read(&c->nr_freed)); + prt_newline(out); + prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys)); + prt_newline(out); + prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty)); + prt_newline(out); } void bch2_btree_key_cache_exit(void) { - if (bch2_key_cache) - kmem_cache_destroy(bch2_key_cache); + kmem_cache_destroy(bch2_key_cache); } int __init bch2_btree_key_cache_init(void) diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 93a6ebe..9d09043 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -274,7 +274,7 @@ next: b = &READ_ONCE(path->l[top->level].b)->c; - if (unlikely(IS_ERR_OR_NULL(b))) { + if (IS_ERR_OR_NULL(b)) { BUG_ON(!lock_graph_remove_non_waiters(&g)); goto next; } @@ -605,7 +605,7 @@ int bch2_trans_relock(struct btree_trans *trans) struct btree_path *path; if (unlikely(trans->restarted)) - return - ((int) trans->restarted); + return -((int) trans->restarted); trans_for_each_path(trans, path) if (path->should_be_locked && diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index af226ee..892d123 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -6,7 +6,7 @@ #include #include -#include "bkey_methods.h" +//#include "bkey_methods.h" #include "buckets_types.h" #include "darray.h" #include "journal_types.h" @@ -160,6 +160,16 @@ struct btree_cache { /* Number of elements in live + freeable lists */ unsigned used; unsigned reserve; + unsigned freed; + unsigned not_freed_lock_intent; + unsigned not_freed_lock_write; + unsigned not_freed_dirty; + unsigned not_freed_read_in_flight; + unsigned not_freed_write_in_flight; + unsigned not_freed_noevict; + unsigned not_freed_write_blocked; + unsigned not_freed_will_make_reachable; + unsigned not_freed_access_bit; atomic_t dirty; struct shrinker shrink; @@ -408,6 +418,7 @@ struct btree_trans { bool in_traverse_all:1; bool memory_allocation_failure:1; bool is_initial_gc:1; + bool journal_replay_not_finished:1; enum bch_errcode restarted:16; u32 restart_count; unsigned long last_restarted_ip; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 03c4fd0..40debf7 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -2046,7 +2046,7 @@ static int async_btree_node_rewrite_trans(struct btree_trans *trans, goto out; ret = bch2_btree_node_rewrite(trans, &iter, b, 0); -out : +out: bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index b166ab4..3a68382 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -339,7 +339,7 @@ btree_key_can_insert_cached(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; - unsigned old_u64s = ck->u64s, new_u64s; + unsigned new_u64s; struct bkey_i *new_k; EBUG_ON(path->level); @@ -368,12 +368,7 @@ btree_key_can_insert_cached(struct btree_trans *trans, ck->u64s = new_u64s; ck->k = new_k; - /* - * Keys returned by peek() are no longer valid pointers, so we need a - * transaction restart: - */ - trace_and_count(c, trans_restart_key_cache_key_realloced, trans, _RET_IP_, path, old_u64s, new_u64s); - return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced); + return 0; } /* Triggers: */ @@ -1385,6 +1380,37 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, return ret; } +static int __must_check +bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, + struct bkey_i *k, enum btree_update_flags flags, + unsigned long ip); + +static noinline int flush_new_cached_update(struct btree_trans *trans, + struct btree_path *path, + struct btree_insert_entry *i, + enum btree_update_flags flags, + unsigned long ip) +{ + struct btree_path *btree_path; + int ret; + + i->key_cache_already_flushed = true; + i->flags |= BTREE_TRIGGER_NORUN; + + btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, + BTREE_ITER_INTENT, _THIS_IP_); + + ret = bch2_btree_path_traverse(trans, btree_path, 0); + if (ret) + goto err; + + btree_path_set_should_be_locked(btree_path); + ret = bch2_trans_update_by_path_trace(trans, btree_path, i->k, flags, ip); +err: + bch2_path_put(trans, btree_path, true); + return ret; +} + static int __must_check bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, struct bkey_i *k, enum btree_update_flags flags, @@ -1392,7 +1418,6 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa { struct bch_fs *c = trans->c; struct btree_insert_entry *i, n; - int ret = 0; BUG_ON(!path->should_be_locked); @@ -1461,27 +1486,10 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa * the key cache - but the key has to exist in the btree for that to * work: */ - if (path->cached && - bkey_deleted(&i->old_k)) { - struct btree_path *btree_path; - - i->key_cache_already_flushed = true; - i->flags |= BTREE_TRIGGER_NORUN; + if (unlikely(path->cached && bkey_deleted(&i->old_k))) + return flush_new_cached_update(trans, path, i, flags, ip); - btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, - BTREE_ITER_INTENT, _THIS_IP_); - - ret = bch2_btree_path_traverse(trans, btree_path, 0); - if (ret) - goto err; - - btree_path_set_should_be_locked(btree_path); - ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip); -err: - bch2_path_put(trans, btree_path, true); - } - - return ret; + return 0; } static int __must_check diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index c611931..116711f 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -89,20 +89,17 @@ static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca, : ca->usage[journal_seq & JOURNAL_BUF_MASK]); } -struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) +void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) { struct bch_fs *c = ca->fs; - struct bch_dev_usage ret; unsigned seq, i, u64s = dev_usage_u64s(); do { seq = read_seqcount_begin(&c->usage_lock); - memcpy(&ret, ca->usage_base, u64s * sizeof(u64)); + memcpy(usage, ca->usage_base, u64s * sizeof(u64)); for (i = 0; i < ARRAY_SIZE(ca->usage); i++) - acc_u64s_percpu((u64 *) &ret, (u64 __percpu *) ca->usage[i], u64s); + acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s); } while (read_seqcount_retry(&c->usage_lock, seq)); - - return ret; } static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, @@ -923,7 +920,7 @@ int bch2_mark_extent(struct btree_trans *trans, { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1115,10 +1112,10 @@ int bch2_mark_inode(struct btree_trans *trans, u64 journal_seq = trans->journal_res.seq; if (flags & BTREE_TRIGGER_INSERT) { - struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v; + struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; BUG_ON(!journal_seq); - BUG_ON(new.k->type != KEY_TYPE_inode_v2); + BUG_ON(new.k->type != KEY_TYPE_inode_v3); v->bi_journal_seq = cpu_to_le64(journal_seq); } @@ -1142,7 +1139,7 @@ int bch2_mark_reservation(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bch_fs_usage __percpu *fs_usage; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; @@ -1221,7 +1218,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; @@ -2113,5 +2110,5 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) return -ENOMEM; } - return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; + return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets); } diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 6881502..56c06cc 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -139,7 +139,15 @@ static inline u8 ptr_stale(struct bch_dev *ca, /* Device usage: */ -struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *); +void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *); +static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) +{ + struct bch_dev_usage ret; + + bch2_dev_usage_read_fast(ca, &ret); + return ret; +} + void bch2_dev_usage_init(struct bch_dev *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve) @@ -240,8 +248,6 @@ int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); - int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index b5850a7..3268e8d 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -131,7 +131,7 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm, size_t orig_len = len; int ret, i; - sg = kmalloc_array(sizeof(*sg), pages, GFP_KERNEL); + sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL); if (!sg) return -ENOMEM; diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index f692f35..2b7080b 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -377,7 +377,7 @@ static unsigned __bio_compress(struct bch_fs *c, /* If it's only one block, don't bother trying to compress: */ if (src->bi_iter.bi_size <= c->opts.block_size) - return 0; + return BCH_COMPRESSION_TYPE_incompressible; dst_data = bio_map_or_bounce(c, dst, WRITE); src_data = bio_map_or_bounce(c, src, READ); diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 5ef35e3..b75ff07 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -312,7 +312,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, bch2_write_op_init(&m->op, c, io_opts); m->op.pos = bkey_start_pos(k.k); m->op.version = k.k->version; - m->op.target = data_opts.target, + m->op.target = data_opts.target; m->op.write_point = wp; m->op.flags |= BCH_WRITE_PAGES_STABLE| BCH_WRITE_PAGES_OWNED| diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index d87131f..57602c8 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -477,7 +477,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, if (i->iter < tbl->size) { rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) bch2_cached_btree_node_to_text(&i->buf, c, b); - i->iter++;; + i->iter++; } else { done = true; } @@ -637,11 +637,11 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf, if (!i->size) break; - if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) || - !c->btree_transaction_fns[i->iter]) + if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) || + !bch2_btree_transaction_fns[i->iter]) break; - prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]); + prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 4d942d2..288f46b 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -103,7 +103,7 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) { prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k),dirent_val_u64s(len)); + bkey_val_u64s(k.k), dirent_val_u64s(len)); return -EINVAL; } diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 2ca1301..9e2a4ed 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -292,7 +292,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <= lp.crc.uncompressed_size) { /* can use left extent's crc entry */ - } else if (lp.crc.live_size <= rp.crc.offset ) { + } else if (lp.crc.live_size <= rp.crc.offset) { /* can use right extent's crc entry */ } else { /* check if checksums can be merged: */ @@ -351,7 +351,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) if (crc_l.offset + crc_l.live_size + crc_r.live_size <= crc_l.uncompressed_size) { /* can use left extent's crc entry */ - } else if (crc_l.live_size <= crc_r.offset ) { + } else if (crc_l.live_size <= crc_r.offset) { /* can use right extent's crc entry */ crc_r.offset -= crc_l.live_size; bch2_extent_crc_pack(entry_to_crc(en_l), crc_r, diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index e9dd1d1..1f2e1fc 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -487,11 +487,11 @@ int bch2_rename_trans(struct btree_trans *trans, ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?: (src_dir.inum != dst_dir.inum ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u) - : 0 ) ?: + : 0) ?: bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?: (dst_inum.inum ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) - : 0 ); + : 0); err: bch2_trans_iter_exit(trans, &dst_inode_iter); bch2_trans_iter_exit(trans, &src_inode_iter); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 7429206..706180b 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1684,7 +1684,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, unsigned pg_len = min_t(unsigned, len - copied, PAGE_SIZE - pg_offset); unsigned pg_copied = copy_page_from_iter_atomic(page, - pg_offset, pg_len,iter); + pg_offset, pg_len, iter); if (!pg_copied) break; @@ -2137,8 +2137,8 @@ static long bch2_dio_write_loop(struct dio_write *dio) struct iovec *iov = dio->inline_vecs; if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { - iov = kmalloc(dio->iter.nr_segs * sizeof(*iov), - GFP_KERNEL); + iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), + GFP_KERNEL); if (unlikely(!iov)) { dio->sync = sync = true; goto do_io; @@ -2713,7 +2713,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len truncate_pagecache_range(&inode->v, offset, end - 1); - if (block_start < block_end ) { + if (block_start < block_end) { s64 i_sectors_delta = 0; ret = bch2_fpunch(c, inode_inum(inode), diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index bf82737..186faa5 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -528,7 +528,7 @@ static int bch2_symlink(struct user_namespace *mnt_userns, inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0, (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); - if (unlikely(IS_ERR(inode))) + if (IS_ERR(inode)) return bch2_err_class(PTR_ERR(inode)); inode_lock(&inode->v); @@ -1846,7 +1846,7 @@ got_sb: sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); c->vfs_sb = sb; - strlcpy(sb->s_id, c->name, sizeof(sb->s_id)); + strscpy(sb->s_id, c->name, sizeof(sb->s_id)); ret = super_setup_bdi(sb); if (ret) @@ -1917,8 +1917,7 @@ MODULE_ALIAS_FS("bcachefs"); void bch2_vfs_exit(void) { unregister_filesystem(&bcache_fs_type); - if (bch2_inode_cache) - kmem_cache_destroy(bch2_inode_cache); + kmem_cache_destroy(bch2_inode_cache); } int __init bch2_vfs_init(void) diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 12f2ef4..ca95d85 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -2044,7 +2044,8 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t, { if (t->nr == t->size) { size_t new_size = max_t(size_t, 128UL, t->size * 2); - void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL); + void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); + if (!d) { bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", new_size); diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 1f2782f..1a0d260 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -60,11 +60,10 @@ static int inode_decode_field(const u8 *in, const u8 *end, return bytes; } -void bch2_inode_pack(struct bch_fs *c, - struct bkey_inode_buf *packed, - const struct bch_inode_unpacked *inode) +static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed, + const struct bch_inode_unpacked *inode) { - struct bkey_i_inode_v2 *k = &packed->inode; + struct bkey_i_inode_v3 *k = &packed->inode; u8 *out = k->v.fields; u8 *end = (void *) &packed[1]; u8 *last_nonzero_field = out; @@ -72,13 +71,17 @@ void bch2_inode_pack(struct bch_fs *c, unsigned bytes; int ret; - bkey_inode_v2_init(&packed->inode.k_i); + bkey_inode_v3_init(&packed->inode.k_i); packed->inode.k.p.offset = inode->bi_inum; packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq); packed->inode.v.bi_hash_seed = inode->bi_hash_seed; packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); - packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); - packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode); + packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors); + packed->inode.v.bi_size = cpu_to_le64(inode->bi_size); + packed->inode.v.bi_version = cpu_to_le64(inode->bi_version); + SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode); + SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR); + #define x(_name, _bits) \ nr_fields++; \ @@ -99,7 +102,7 @@ void bch2_inode_pack(struct bch_fs *c, *out++ = 0; \ } - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x BUG_ON(out > end); @@ -110,7 +113,7 @@ void bch2_inode_pack(struct bch_fs *c, set_bkey_val_bytes(&packed->inode.k, bytes); memset_u64s_tail(&packed->inode.v, 0, bytes); - SET_INODEv2_NR_FIELDS(&k->v, nr_fields); + SET_INODEv3_NR_FIELDS(&k->v, nr_fields); if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { struct bch_inode_unpacked unpacked; @@ -120,16 +123,25 @@ void bch2_inode_pack(struct bch_fs *c, BUG_ON(ret); BUG_ON(unpacked.bi_inum != inode->bi_inum); BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); + BUG_ON(unpacked.bi_sectors != inode->bi_sectors); + BUG_ON(unpacked.bi_size != inode->bi_size); + BUG_ON(unpacked.bi_version != inode->bi_version); BUG_ON(unpacked.bi_mode != inode->bi_mode); #define x(_name, _bits) if (unpacked._name != inode->_name) \ panic("unpacked %llu should be %llu", \ (u64) unpacked._name, (u64) inode->_name); - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x } } +void bch2_inode_pack(struct bkey_inode_buf *packed, + const struct bch_inode_unpacked *inode) +{ + bch2_inode_pack_inlined(packed, inode); +} + static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, struct bch_inode_unpacked *unpacked) { @@ -157,7 +169,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, unpacked->_name = field[1]; \ in += ret; - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v2() #undef x /* XXX: signal if there were more fields than expected? */ @@ -196,15 +208,66 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, return -1; \ fieldnr++; - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v2() #undef x /* XXX: signal if there were more fields than expected? */ return 0; } -int bch2_inode_unpack(struct bkey_s_c k, - struct bch_inode_unpacked *unpacked) +static int bch2_inode_unpack_v3(struct bkey_s_c k, + struct bch_inode_unpacked *unpacked) +{ + struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); + const u8 *in = inode.v->fields; + const u8 *end = bkey_val_end(inode); + unsigned nr_fields = INODEv3_NR_FIELDS(inode.v); + unsigned fieldnr = 0; + int ret; + u64 v[2]; + + unpacked->bi_inum = inode.k->p.offset; + unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); + unpacked->bi_hash_seed = inode.v->bi_hash_seed; + unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); + unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors); + unpacked->bi_size = le64_to_cpu(inode.v->bi_size); + unpacked->bi_version = le64_to_cpu(inode.v->bi_version); + unpacked->bi_mode = INODEv3_MODE(inode.v); + +#define x(_name, _bits) \ + if (fieldnr < nr_fields) { \ + ret = bch2_varint_decode_fast(in, end, &v[0]); \ + if (ret < 0) \ + return ret; \ + in += ret; \ + \ + if (_bits > 64) { \ + ret = bch2_varint_decode_fast(in, end, &v[1]); \ + if (ret < 0) \ + return ret; \ + in += ret; \ + } else { \ + v[1] = 0; \ + } \ + } else { \ + v[0] = v[1] = 0; \ + } \ + \ + unpacked->_name = v[0]; \ + if (v[1] || v[0] != unpacked->_name) \ + return -1; \ + fieldnr++; + + BCH_INODE_FIELDS_v3() +#undef x + + /* XXX: signal if there were more fields than expected? */ + return 0; +} + +static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k, + struct bch_inode_unpacked *unpacked) { switch (k.k->type) { case KEY_TYPE_inode: { @@ -243,6 +306,14 @@ int bch2_inode_unpack(struct bkey_s_c k, } } +int bch2_inode_unpack(struct bkey_s_c k, + struct bch_inode_unpacked *unpacked) +{ + if (likely(k.k->type == KEY_TYPE_inode_v3)) + return bch2_inode_unpack_v3(k, unpacked); + return bch2_inode_unpack_slowpath(k, unpacked); +} + int bch2_inode_peek(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode, @@ -288,11 +359,29 @@ int bch2_inode_write(struct btree_trans *trans, if (IS_ERR(inode_p)) return PTR_ERR(inode_p); - bch2_inode_pack(trans->c, inode_p, inode); + bch2_inode_pack_inlined(inode_p, inode); inode_p->inode.k.p.snapshot = iter->snapshot; return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); } +struct bkey_s_c bch2_inode_to_v3(struct btree_trans *trans, struct bkey_s_c k) +{ + struct bch_inode_unpacked u; + struct bkey_inode_buf *inode_p; + int ret; + + inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); + if (IS_ERR(inode_p)) + return bkey_s_c_err(PTR_ERR(inode_p)); + + ret = bch2_inode_unpack(k, &u); + if (ret) + return bkey_s_c_err(ret); + + bch2_inode_pack(inode_p, &u); + return bkey_i_to_s_c(&inode_p->inode.k_i); +} + static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) { struct bch_inode_unpacked unpacked; @@ -307,7 +396,7 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) return -EINVAL; } - if (bch2_inode_unpack(k, &unpacked)){ + if (bch2_inode_unpack(k, &unpacked)) { prt_printf(err, "invalid variable length fields"); return -EINVAL; } @@ -378,15 +467,48 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, return __bch2_inode_invalid(k, err); } -static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) +int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, + int rw, struct printbuf *err) +{ + struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); + + if (bkey_val_bytes(k.k) < sizeof(*inode.v)) { + prt_printf(err, "incorrect value size (%zu < %zu)", + bkey_val_bytes(k.k), sizeof(*inode.v)); + return -EINVAL; + } + + if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) { + prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)", + INODEv3_FIELDS_START(inode.v), + INODEv3_FIELDS_START_INITIAL, + bkey_val_u64s(inode.k)); + return -EINVAL; + } + + if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { + prt_printf(err, "invalid str hash type (%llu >= %u)", + INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); + return -EINVAL; + } + + return __bch2_inode_invalid(k, err); +} + +static void __bch2_inode_unpacked_to_text(struct printbuf *out, + struct bch_inode_unpacked *inode) { - prt_printf(out, "mode %o flags %x journal_seq %llu", + prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu", inode->bi_mode, inode->bi_flags, - inode->bi_journal_seq); + inode->bi_journal_seq, + inode->bi_size, + inode->bi_sectors, + inode->bi_version); #define x(_name, _bits) \ prt_printf(out, " "#_name " %llu", (u64) inode->_name); - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x } @@ -396,8 +518,7 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked __bch2_inode_unpacked_to_text(out, inode); } -void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) +void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bch_inode_unpacked inode; diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 2ac2fc1..2915f4f 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -2,12 +2,14 @@ #ifndef _BCACHEFS_INODE_H #define _BCACHEFS_INODE_H +#include "bkey.h" #include "opts.h" extern const char * const bch2_inode_opts[]; int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); +int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode (struct bkey_ops) { \ @@ -24,10 +26,18 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .atomic_trigger = bch2_mark_inode, \ } +#define bch2_bkey_ops_inode_v3 (struct bkey_ops) { \ + .key_invalid = bch2_inode_v3_invalid, \ + .val_to_text = bch2_inode_to_text, \ + .trans_trigger = bch2_trans_mark_inode, \ + .atomic_trigger = bch2_mark_inode, \ +} + static inline bool bkey_is_inode(const struct bkey *k) { return k->type == KEY_TYPE_inode || - k->type == KEY_TYPE_inode_v2; + k->type == KEY_TYPE_inode_v2 || + k->type == KEY_TYPE_inode_v3; } int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c, @@ -51,25 +61,28 @@ struct bch_inode_unpacked { u64 bi_inum; u64 bi_journal_seq; __le64 bi_hash_seed; + u64 bi_size; + u64 bi_sectors; + u64 bi_version; u32 bi_flags; u16 bi_mode; #define x(_name, _bits) u##_bits _name; - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x }; struct bkey_inode_buf { - struct bkey_i_inode_v2 inode; + struct bkey_i_inode_v3 inode; #define x(_name, _bits) + 8 + _bits / 8 - u8 _pad[0 + BCH_INODE_FIELDS()]; + u8 _pad[0 + BCH_INODE_FIELDS_v3()]; #undef x } __attribute__((packed, aligned(8))); -void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *, - const struct bch_inode_unpacked *); +void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *); +struct bkey_s_c bch2_inode_to_v3(struct btree_trans *, struct bkey_s_c); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 558d0c2..5971569 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -242,8 +242,7 @@ int bch2_extent_update(struct btree_trans *trans, s64 *i_sectors_delta_total, bool check_enospc) { - struct btree_iter inode_iter; - struct bch_inode_unpacked inode_u; + struct btree_iter inode_iter = { NULL }; struct bpos next_pos; bool usage_increasing; s64 i_sectors_delta = 0, disk_sectors_delta = 0; @@ -283,32 +282,67 @@ int bch2_extent_update(struct btree_trans *trans, return ret; } - ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum, - BTREE_ITER_INTENT); - if (ret) - return ret; + if (new_i_size || i_sectors_delta) { + struct bkey_s_c k; + struct bkey_s_c_inode_v3 inode; + struct bkey_i_inode_v3 *new_inode; + bool i_size_update; + + bch2_trans_iter_init(trans, &inode_iter, BTREE_ID_inodes, + SPOS(0, inum.inum, iter->snapshot), + BTREE_ITER_INTENT|BTREE_ITER_CACHED); + k = bch2_btree_iter_peek_slot(&inode_iter); + ret = bkey_err(k); + if (unlikely(ret)) + goto err; + + ret = bkey_is_inode(k.k) ? 0 : -ENOENT; + if (unlikely(ret)) + goto err; + + if (unlikely(k.k->type != KEY_TYPE_inode_v3)) { + k = bch2_inode_to_v3(trans, k); + ret = bkey_err(k); + if (unlikely(ret)) + goto err; + } + + inode = bkey_s_c_to_inode_v3(k); + i_size_update = !(le64_to_cpu(inode.v->bi_flags) & BCH_INODE_I_SIZE_DIRTY) && + new_i_size > le64_to_cpu(inode.v->bi_size); + + if (!i_sectors_delta && !i_size_update) + goto no_inode_update; + + new_inode = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(new_inode); + if (unlikely(ret)) + goto err; - if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - new_i_size > inode_u.bi_size) - inode_u.bi_size = new_i_size; + bkey_reassemble(&new_inode->k_i, k); - inode_u.bi_sectors += i_sectors_delta; + if (i_size_update) + new_inode->v.bi_size = cpu_to_le64(new_i_size); + le64_add_cpu(&new_inode->v.bi_sectors, i_sectors_delta); + ret = bch2_trans_update(trans, &inode_iter, &new_inode->k_i, 0); + if (unlikely(ret)) + goto err; + } +no_inode_update: ret = bch2_trans_update(trans, iter, k, 0) ?: - bch2_inode_write(trans, &inode_iter, &inode_u) ?: bch2_trans_commit(trans, disk_res, journal_seq, BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL); - bch2_trans_iter_exit(trans, &inode_iter); - - if (ret) - return ret; + if (unlikely(ret)) + goto err; if (i_sectors_delta_total) *i_sectors_delta_total += i_sectors_delta; bch2_btree_iter_set_pos(iter, next_pos); - - return 0; +err: + bch2_trans_iter_exit(trans, &inode_iter); + return ret; } /* @@ -926,8 +960,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, saved_iter = dst->bi_iter; do { - struct bch_extent_crc_unpacked crc = - (struct bch_extent_crc_unpacked) { 0 }; + struct bch_extent_crc_unpacked crc = { 0 }; struct bversion version = op->version; size_t dst_len, src_len; @@ -979,6 +1012,8 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, !crc_is_compressed(crc) && bch2_csum_type_is_encryption(op->crc.csum_type) == bch2_csum_type_is_encryption(op->csum_type)) { + u8 compression_type = crc.compression_type; + u16 nonce = crc.nonce; /* * Note: when we're using rechecksum(), we need to be * checksumming @src because it has all the data our @@ -997,6 +1032,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, bio_sectors(src) - (src_len >> 9), op->csum_type)) goto csum_err; + /* + * rchecksum_bio sets compression_type on crc from op->crc, + * this isn't always correct as sometimes we're changing + * an extent from uncompressed to incompressible. + */ + crc.compression_type = compression_type; + crc.nonce = nonce; } else { if ((op->flags & BCH_WRITE_DATA_ENCODED) && bch2_rechecksum_bio(c, src, version, op->crc, @@ -1115,8 +1157,8 @@ again: BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl); EBUG_ON(!wp); - if (unlikely(IS_ERR(wp))) { - if (unlikely(PTR_ERR(wp) != -EAGAIN)) { + if (IS_ERR(wp)) { + if (unlikely(wp != ERR_PTR(-EAGAIN))) { ret = PTR_ERR(wp); goto err; } diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index ab59462..95c2922 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -739,7 +739,7 @@ int bch2_journal_log_msg(struct journal *j, const char *fmt, ...) return ret; entry = container_of(journal_res_entry(j, &res), - struct jset_entry_log, entry);; + struct jset_entry_log, entry); memset(entry, 0, u64s * sizeof(u64)); entry->entry.type = BCH_JSET_ENTRY_log; entry->entry.u64s = u64s - 1; @@ -796,10 +796,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, bch2_journal_block(&c->journal); } - bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL); - ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL); - new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL); - new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL); + bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL); + ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL); + new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL); + new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL); if (!bu || !ob || !new_buckets || !new_bucket_seq) { ret = -ENOMEM; goto err_unblock; @@ -1265,7 +1265,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) rcu_read_lock(); s = READ_ONCE(j->reservations); - prt_printf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size); + prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size); prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk); prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index d3caa7e..9428f42 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -110,6 +110,7 @@ */ #include +#include #include "journal_types.h" @@ -304,15 +305,26 @@ static inline int journal_res_get_fast(struct journal *j, { union journal_res_state old, new; u64 v = atomic64_read(&j->reservations.counter); + unsigned u64s, offset; do { old.v = new.v = v; + /* + * Round up the end of the journal reservation to the next + * cacheline boundary: + */ + u64s = res->u64s; + offset = sizeof(struct jset) / sizeof(u64) + + new.cur_entry_offset + u64s; + u64s += ((offset - 1) & ((SMP_CACHE_BYTES / sizeof(u64)) - 1)) + 1; + + /* * Check if there is still room in the current journal * entry: */ - if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s) + if (new.cur_entry_offset + u64s > j->cur_entry_u64s) return 0; EBUG_ON(!journal_state_count(new, new.idx)); @@ -320,7 +332,7 @@ static inline int journal_res_get_fast(struct journal *j, if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark) return 0; - new.cur_entry_offset += res->u64s; + new.cur_entry_offset += u64s; journal_state_inc(&new); /* @@ -337,8 +349,15 @@ static inline int journal_res_get_fast(struct journal *j, res->ref = true; res->idx = old.idx; + res->u64s = u64s; res->offset = old.cur_entry_offset; res->seq = le64_to_cpu(j->buf[old.idx].data->seq); + + offset = res->offset; + while (offset < res->offset + res->u64s) { + prefetchw(vstruct_idx(j->buf[res->idx].data, offset)); + offset += SMP_CACHE_BYTES / sizeof(u64); + } return 1; } diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index e69595b..e873ce2 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -232,7 +232,7 @@ void bch2_journal_space_available(struct journal *j) if ((j->space[journal_space_clean_ondisk].next_entry < j->space[journal_space_clean_ondisk].total) && (clean - clean_ondisk <= total / 8) && - (clean_ondisk * 2 > clean )) + (clean_ondisk * 2 > clean)) set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); else clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); @@ -363,7 +363,7 @@ static inline void __journal_pin_drop(struct journal *j, list_del_init(&pin->list); /* - * Unpinning a journal entry make make journal_next_bucket() succeed, if + * Unpinning a journal entry may make journal_next_bucket() succeed if * writing a new last_seq will now make another bucket available: */ if (atomic_dec_and_test(&pin_list->count) && diff --git a/libbcachefs/journal_sb.c b/libbcachefs/journal_sb.c index cfdbd92..c19db04 100644 --- a/libbcachefs/journal_sb.c +++ b/libbcachefs/journal_sb.c @@ -31,7 +31,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, if (!nr) return 0; - b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL); + b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL); if (!b) return -ENOMEM; @@ -114,7 +114,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, if (!nr) return 0; - b = kmalloc_array(sizeof(*b), nr, GFP_KERNEL); + b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL); if (!b) return -ENOMEM; diff --git a/libbcachefs/keylist.c b/libbcachefs/keylist.c index cda7783..5e85055 100644 --- a/libbcachefs/keylist.c +++ b/libbcachefs/keylist.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey.h" #include "keylist.h" int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s, diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 55fdaca..7486920 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -497,7 +497,7 @@ static int __bch2_move_data(struct moving_context *ctxt, /* * The iterator gets unlocked by __bch2_read_extent - need to * save a copy of @k elsewhere: - */ + */ bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); @@ -868,7 +868,7 @@ static bool migrate_pred(struct bch_fs *c, void *arg, i++; } - return data_opts->rewrite_ptrs != 0;; + return data_opts->rewrite_ptrs != 0; } static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 18f6ec5..ea7810a 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -225,7 +225,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL); + new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL); if (!new_keys.d) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); @@ -502,7 +502,7 @@ static int journal_keys_sort(struct bch_fs *c) keys->size = roundup_pow_of_two(nr_keys); - keys->d = kvmalloc(sizeof(keys->d[0]) * keys->size, GFP_KERNEL); + keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL); if (!keys->d) return -ENOMEM; @@ -1092,6 +1092,9 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; + } else if (c->sb.version < bcachefs_metadata_version_inode_v3) { + bch_info(c, "version prior to inode_v3, upgrade required"); + c->opts.version_upgrade = true; } } @@ -1458,7 +1461,7 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - if (c->sb.version < bcachefs_metadata_version_backpointers) + if (c->sb.version < bcachefs_metadata_version_inode_v3) c->opts.version_upgrade = true; if (c->opts.version_upgrade) { @@ -1537,7 +1540,7 @@ int bch2_fs_initialize(struct bch_fs *c) S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); root_inode.bi_inum = BCACHEFS_ROOT_INO; root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; - bch2_inode_pack(c, &packed_inode, &root_inode); + bch2_inode_pack(&packed_inode, &root_inode); packed_inode.inode.k.p.snapshot = U32_MAX; err = "error creating root directory"; diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index 87820b2..cc34b38 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_REPLICAS_H #define _BCACHEFS_REPLICAS_H +#include "bkey.h" #include "eytzinger.h" #include "replicas_types.h" diff --git a/libbcachefs/siphash.c b/libbcachefs/siphash.c index c062edb..dc1a27c 100644 --- a/libbcachefs/siphash.c +++ b/libbcachefs/siphash.c @@ -160,7 +160,7 @@ u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf) r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]); memset(ctx, 0, sizeof(*ctx)); - return (r); + return r; } u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len) diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index cbc5979..60c1f03 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -100,8 +100,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb, void bch2_free_super(struct bch_sb_handle *sb) { - if (sb->bio) - kfree(sb->bio); + kfree(sb->bio); if (!IS_ERR_OR_NULL(sb->bdev)) blkdev_put(sb->bdev, sb->mode); @@ -149,8 +148,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); - if (sb->bio) - kfree(sb->bio); + kfree(sb->bio); sb->bio = bio; } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 3f674bf..5be4c40 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -327,26 +327,12 @@ static int bch2_fs_read_write_late(struct bch_fs *c) { int ret; - ret = bch2_gc_thread_start(c); - if (ret) { - bch_err(c, "error starting gc thread"); - return ret; - } - - ret = bch2_copygc_start(c); - if (ret) { - bch_err(c, "error starting copygc thread"); - return ret; - } - ret = bch2_rebalance_start(c); if (ret) { bch_err(c, "error starting rebalance thread"); return ret; } - schedule_work(&c->ec_stripe_delete_work); - return 0; } @@ -385,6 +371,20 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + ret = bch2_gc_thread_start(c); + if (ret) { + bch_err(c, "error starting gc thread"); + return ret; + } + + ret = bch2_copygc_start(c); + if (ret) { + bch_err(c, "error starting copygc thread"); + return ret; + } + + schedule_work(&c->ec_stripe_delete_work); + bch2_do_discards(c); bch2_do_invalidates(c); @@ -463,8 +463,8 @@ static void __bch2_fs_free(struct bch_fs *c) kfree(c->unused_inode_hints); free_heap(&c->copygc_heap); - if (c->io_complete_wq ) - destroy_workqueue(c->io_complete_wq ); + if (c->io_complete_wq) + destroy_workqueue(c->io_complete_wq); if (c->copygc_wq) destroy_workqueue(c->copygc_wq); if (c->btree_io_complete_wq) @@ -711,7 +711,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto err; pr_uuid(&name, c->sb.user_uuid.b); - strlcpy(c->name, name.buf, sizeof(c->name)); + strscpy(c->name, name.buf, sizeof(c->name)); printbuf_exit(&name); ret = name.allocation_failure ? -ENOMEM : 0; @@ -1784,9 +1784,8 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) } ret = bch2_trans_mark_dev_sb(c, ca); - if (ret) { + if (ret) goto err; - } mutex_lock(&c->sb_lock); mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 103fde9..0f45aef 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -175,7 +175,7 @@ read_attribute(minor); read_attribute(bucket_size); read_attribute(first_bucket); read_attribute(nbuckets); -read_attribute(durability); +rw_attribute(durability); read_attribute(iodone); read_attribute(io_latency_read); @@ -425,7 +425,7 @@ SHOW(bch2_fs) bch2_btree_updates_to_text(out, c); if (attr == &sysfs_btree_cache) - bch2_btree_cache_to_text(out, c); + bch2_btree_cache_to_text(out, &c->btree_cache); if (attr == &sysfs_btree_key_cache) bch2_btree_key_cache_to_text(out, &c->btree_key_cache); @@ -907,6 +907,19 @@ STORE(bch2_dev) mutex_unlock(&c->sb_lock); } + if (attr == &sysfs_durability) { + u64 v = strtoul_or_return(buf); + + mutex_lock(&c->sb_lock); + mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; + + if (v != BCH_MEMBER_DURABILITY(mi)) { + SET_BCH_MEMBER_DURABILITY(mi, v + 1); + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); + } + if (attr == &sysfs_label) { char *tmp; int ret; diff --git a/libbcachefs/util.c b/libbcachefs/util.c index f08215a..62fa662 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -433,7 +433,7 @@ static void pr_time_units(struct printbuf *out, u64 ns) static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) { - prt_printf(out, name); + prt_str(out, name); prt_tab(out); pr_time_units(out, ns); prt_newline(out); @@ -786,8 +786,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) } } -#include "eytzinger.h" - static int alignment_ok(const void *base, size_t align) { return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || diff --git a/linux/string.c b/linux/string.c index fd2797e..a32a899 100644 --- a/linux/string.c +++ b/linux/string.c @@ -21,8 +21,10 @@ #include #include +#include #include +#include #include #include @@ -62,6 +64,31 @@ size_t strlcpy(char *dest, const char *src, size_t size) return ret; } +ssize_t strscpy(char *dest, const char *src, size_t count) +{ + long res = 0; + + if (count == 0 || WARN_ON_ONCE(count > INT_MAX)) + return -E2BIG; + + while (count) { + char c; + + c = src[res]; + dest[res] = c; + if (!c) + return res; + res++; + count--; + } + + /* Hit buffer length without finding a NUL; force NUL-termination. */ + if (res) + dest[res-1] = '\0'; + + return -E2BIG; +} + void memzero_explicit(void *s, size_t count) { memset(s, 0, count); diff --git a/linux/string_helpers.c b/linux/string_helpers.c index 3d720bc..29c498a 100644 --- a/linux/string_helpers.c +++ b/linux/string_helpers.c @@ -52,7 +52,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, static const unsigned int rounding[] = { 500, 50, 5 }; int i = 0, j; u32 remainder = 0, sf_cap; - char tmp[8]; + char tmp[12]; const char *unit; tmp[0] = '\0'; -- 2.39.5