]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to cd779e0cc5 bcachefs: Skip inode unpack/pack in bch2_extent...
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 22 Oct 2022 17:25:25 +0000 (13:25 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 22 Oct 2022 18:41:54 +0000 (14:41 -0400)
60 files changed:
.bcachefs_revision
cmd_migrate.c
include/linux/bug.h
include/linux/prefetch.h
include/linux/string.h
libbcachefs/acl.c
libbcachefs/alloc_background.c
libbcachefs/alloc_foreground.c
libbcachefs/backpointers.c
libbcachefs/bbpos.h
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bkey.c
libbcachefs/bkey.h
libbcachefs/bkey_buf.h
libbcachefs/bkey_cmp.h [new file with mode: 0644]
libbcachefs/bkey_methods.c
libbcachefs/bkey_sort.c
libbcachefs/bset.c
libbcachefs/bset.h
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_gc.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_key_cache.c
libbcachefs/btree_locking.c
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/checksum.c
libbcachefs/compress.c
libbcachefs/data_update.c
libbcachefs/debug.c
libbcachefs/dirent.c
libbcachefs/extents.c
libbcachefs/fs-common.c
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_reclaim.c
libbcachefs/journal_sb.c
libbcachefs/keylist.c
libbcachefs/move.c
libbcachefs/recovery.c
libbcachefs/replicas.h
libbcachefs/siphash.c
libbcachefs/super-io.c
libbcachefs/super.c
libbcachefs/sysfs.c
libbcachefs/util.c
linux/string.c
linux/string_helpers.c

index da204b90d6dbea199c1f8a81e1b403d4160b6b18..e8327f59bd4e74630451354eb27658a27853cda6 100644 (file)
@@ -1 +1 @@
-44be8c1da2e1d4edb23d5dcf3b522971c245c3f6
+cd779e0cc51cb232d17eec4537cb4769af202b5f
index 9e8ceeff0f7be89341c7f9ddc9f8c3e6278d70fb..3ba51c0c2ebd5d280adc7062c9d96e28473157e1 100644 (file)
@@ -122,7 +122,7 @@ static void update_inode(struct bch_fs *c,
        struct bkey_inode_buf packed;
        int ret;
 
-       bch2_inode_pack(c, &packed, inode);
+       bch2_inode_pack(&packed, inode);
        packed.inode.k.p.snapshot = U32_MAX;
        ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
                                NULL, NULL, 0);
index 957d408788537c1fc15341d3c69d39bb65f5666f..1a10f7e66144cb28db87a10f17df9dd7ec1ffa12 100644 (file)
@@ -2,6 +2,7 @@
 #define __TOOLS_LINUX_BUG_H
 
 #include <assert.h>
+#include <stdio.h>
 #include <linux/compiler.h>
 
 #ifdef CONFIG_VALGRIND
index 13cb826d1fa27ace5bd14e14c016742dd740ef49..b14fbe93664983f632ebe73c469ccda675ea4d42 100644 (file)
@@ -4,4 +4,7 @@
 #define prefetch(p)    \
        ({ __maybe_unused typeof(p) __var = (p); })
 
+#define prefetchw(p)   \
+       ({ __maybe_unused typeof(p) __var = (p); })
+
 #endif /* _LINUX_PREFETCH_H */
index b5e00a092f420f89f1237fa3b908ad8f8b82bbc1..3ceda3a3a6698884137a48bdeef4acafb3ac565a 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/types.h>       /* for size_t */
 
 extern size_t strlcpy(char *dest, const char *src, size_t size);
+extern ssize_t strscpy(char *dest, const char *src, size_t count);
 extern char *strim(char *);
 extern void memzero_explicit(void *, size_t);
 int match_string(const char * const *, size_t, const char *);
index 5c6ccf68509404713564277388737371b3c46705..9592541f7b5ce8159f77f70052a905e330125e78 100644 (file)
@@ -173,7 +173,7 @@ bch2_acl_to_xattr(struct btree_trans *trans,
        bkey_xattr_init(&xattr->k_i);
        xattr->k.u64s           = u64s;
        xattr->v.x_type         = acl_to_xattr_type(type);
-       xattr->v.x_name_len     = 0,
+       xattr->v.x_name_len     = 0;
        xattr->v.x_val_len      = cpu_to_le16(acl_len);
 
        acl_header = xattr_val(&xattr->v);
index d0d7690a49408e153c3244fde10179c95c30ba8a..796b9f5afe8c66c3067888f3f4426246b5a6e94d 100644 (file)
@@ -210,31 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
        return ret;
 }
 
-struct bkey_i_alloc_v4 *
-bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
-                             struct bpos pos)
-{
-       struct bkey_s_c k;
-       struct bkey_i_alloc_v4 *a;
-       int ret;
-
-       bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
-                            BTREE_ITER_WITH_UPDATES|
-                            BTREE_ITER_CACHED|
-                            BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_slot(iter);
-       ret = bkey_err(k);
-       if (ret) {
-               bch2_trans_iter_exit(trans, iter);
-               return ERR_PTR(ret);
-       }
-
-       a = bch2_alloc_to_v4_mut(trans, k);
-       if (IS_ERR(a))
-               bch2_trans_iter_exit(trans, iter);
-       return a;
-}
-
 static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
 {
        unsigned i, bytes = offsetof(struct bch_alloc, data);
@@ -475,12 +450,13 @@ void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
        }
 }
 
-struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+static noinline struct bkey_i_alloc_v4 *
+__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
 {
+       struct bkey_i_alloc_v4 *ret;
        unsigned bytes = k.k->type == KEY_TYPE_alloc_v4
                ? bkey_bytes(k.k)
                : sizeof(struct bkey_i_alloc_v4);
-       struct bkey_i_alloc_v4 *ret;
 
        /*
         * Reserve space for one more backpointer here:
@@ -491,20 +467,18 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b
                return ret;
 
        if (k.k->type == KEY_TYPE_alloc_v4) {
-               bkey_reassemble(&ret->k_i, k);
+               struct bch_backpointer *src, *dst;
 
-               if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) {
-                       struct bch_backpointer *src, *dst;
+               bkey_reassemble(&ret->k_i, k);
 
-                       src = alloc_v4_backpointers(&ret->v);
-                       SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
-                       dst = alloc_v4_backpointers(&ret->v);
+               src = alloc_v4_backpointers(&ret->v);
+               SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
+               dst = alloc_v4_backpointers(&ret->v);
 
-                       memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
-                               sizeof(struct bch_backpointer));
-                       memset(src, 0, dst - src);
-                       set_alloc_v4_u64s(ret);
-               }
+               memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
+                       sizeof(struct bch_backpointer));
+               memset(src, 0, dst - src);
+               set_alloc_v4_u64s(ret);
        } else {
                bkey_alloc_v4_init(&ret->k_i);
                ret->k.p = k.k->p;
@@ -513,6 +487,54 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b
        return ret;
 }
 
+static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
+{
+       if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
+           BCH_ALLOC_V4_BACKPOINTERS_START(bkey_s_c_to_alloc_v4(k).v) == BCH_ALLOC_V4_U64s) {
+               /*
+                * Reserve space for one more backpointer here:
+                * Not sketchy at doing it this way, nope...
+                */
+               struct bkey_i_alloc_v4 *ret =
+                       bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(struct bch_backpointer));
+               if (!IS_ERR(ret))
+                       bkey_reassemble(&ret->k_i, k);
+               return ret;
+       }
+
+       return __bch2_alloc_to_v4_mut(trans, k);
+}
+
+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+       return bch2_alloc_to_v4_mut_inlined(trans, k);
+}
+
+struct bkey_i_alloc_v4 *
+bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
+                             struct bpos pos)
+{
+       struct bkey_s_c k;
+       struct bkey_i_alloc_v4 *a;
+       int ret;
+
+       bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
+                            BTREE_ITER_WITH_UPDATES|
+                            BTREE_ITER_CACHED|
+                            BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret) {
+               bch2_trans_iter_exit(trans, iter);
+               return ERR_PTR(ret);
+       }
+
+       a = bch2_alloc_to_v4_mut_inlined(trans, k);
+       if (IS_ERR(a))
+               bch2_trans_iter_exit(trans, iter);
+       return a;
+}
+
 int bch2_alloc_read(struct bch_fs *c)
 {
        struct btree_trans trans;
index e89999cf9238b149df16807767497784740cc632..a9e0c7397292fcab2e6dbcac08b7dd335c6c0147 100644 (file)
@@ -489,16 +489,16 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
  * bch_bucket_alloc - allocate a single bucket from a specific device
  *
  * Returns index of bucket on success, 0 on failure
- * */
+ */
 static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
                                      struct bch_dev *ca,
                                      enum alloc_reserve reserve,
                                      bool may_alloc_partial,
-                                     struct closure *cl)
+                                     struct closure *cl,
+                                     struct bch_dev_usage *usage)
 {
        struct bch_fs *c = trans->c;
        struct open_bucket *ob = NULL;
-       struct bch_dev_usage usage;
        bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
        u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
        u64 avail;
@@ -509,16 +509,16 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
        u64 skipped_nouse = 0;
        bool waiting = false;
 again:
-       usage = bch2_dev_usage_read(ca);
-       avail = dev_buckets_free(ca, usage, reserve);
+       bch2_dev_usage_read_fast(ca, usage);
+       avail = dev_buckets_free(ca, *usage, reserve);
 
-       if (usage.d[BCH_DATA_need_discard].buckets > avail)
+       if (usage->d[BCH_DATA_need_discard].buckets > avail)
                bch2_do_discards(c);
 
-       if (usage.d[BCH_DATA_need_gc_gens].buckets > avail)
+       if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
                bch2_do_gc_gens(c);
 
-       if (should_invalidate_buckets(ca, usage))
+       if (should_invalidate_buckets(ca, *usage))
                bch2_do_invalidates(c);
 
        if (!avail) {
@@ -577,10 +577,10 @@ err:
        if (!IS_ERR(ob))
                trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve],
                                may_alloc_partial, ob->bucket);
-       else
+       else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
                trace_and_count(c, bucket_alloc_fail,
                                ca, bch2_alloc_reserves[reserve],
-                               usage.d[BCH_DATA_free].buckets,
+                               usage->d[BCH_DATA_free].buckets,
                                avail,
                                bch2_copygc_wait_amount(c),
                                c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
@@ -599,11 +599,12 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
                                      bool may_alloc_partial,
                                      struct closure *cl)
 {
+       struct bch_dev_usage usage;
        struct open_bucket *ob;
 
        bch2_trans_do(c, NULL, NULL, 0,
                      PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
-                                                                  may_alloc_partial, cl)));
+                                                       may_alloc_partial, cl, &usage)));
        return ob;
 }
 
@@ -630,8 +631,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
        return ret;
 }
 
-void bch2_dev_stripe_increment(struct bch_dev *ca,
-                              struct dev_stripe_state *stripe)
+static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
+                              struct dev_stripe_state *stripe,
+                              struct bch_dev_usage *usage)
 {
        u64 *v = stripe->next_alloc + ca->dev_idx;
        u64 free_space = dev_buckets_available(ca, RESERVE_none);
@@ -650,6 +652,15 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
                *v = *v < scale ? 0 : *v - scale;
 }
 
+void bch2_dev_stripe_increment(struct bch_dev *ca,
+                              struct dev_stripe_state *stripe)
+{
+       struct bch_dev_usage usage;
+
+       bch2_dev_usage_read_fast(ca, &usage);
+       bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
+}
+
 #define BUCKET_MAY_ALLOC_PARTIAL       (1 << 0)
 #define BUCKET_ALLOC_USE_DURABILITY    (1 << 1)
 
@@ -694,6 +705,7 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
        BUG_ON(*nr_effective >= nr_replicas);
 
        for (i = 0; i < devs_sorted.nr; i++) {
+               struct bch_dev_usage usage;
                struct open_bucket *ob;
 
                dev = devs_sorted.devs[i];
@@ -713,9 +725,9 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
                }
 
                ob = bch2_bucket_alloc_trans(trans, ca, reserve,
-                               flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
+                               flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
                if (!IS_ERR(ob))
-                       bch2_dev_stripe_increment(ca, stripe);
+                       bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
                percpu_ref_put(&ca->ref);
 
                if (IS_ERR(ob)) {
@@ -1110,7 +1122,7 @@ restart_find_oldest:
        hlist_add_head_rcu(&wp->node, head);
        mutex_unlock(&c->write_points_hash_lock);
 out:
-       wp->last_used = sched_clock();
+       wp->last_used = local_clock();
        return wp;
 }
 
@@ -1356,7 +1368,7 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
             wp < c->write_points + c->write_points_nr; wp++) {
                writepoint_init(wp, BCH_DATA_user);
 
-               wp->last_used   = sched_clock();
+               wp->last_used   = local_clock();
                wp->write_point = (unsigned long) wp;
                hlist_add_head_rcu(&wp->node,
                                   writepoint_hash(c, wp->write_point));
index a537768c4600b8c32bfa7b9a4280177f7c13b7a3..d74de1df7aa3433a9f2c284e96ab66bc9086f5db 100644 (file)
@@ -535,7 +535,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
        if (bp.level == c->btree_roots[bp.btree_id].level + 1)
                k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
 
-       if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+       if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
                return k;
 
        bch2_trans_iter_exit(trans, iter);
@@ -585,12 +585,12 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
        if (IS_ERR(b))
                goto err;
 
-       if (extent_matches_bp(c, bp.btree_id, bp.level,
-                             bkey_i_to_s_c(&b->key),
-                             bucket, bp))
+       if (b && extent_matches_bp(c, bp.btree_id, bp.level,
+                                  bkey_i_to_s_c(&b->key),
+                                  bucket, bp))
                return b;
 
-       if (btree_node_will_make_reachable(b)) {
+       if (b && btree_node_will_make_reachable(b)) {
                b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
        } else {
                backpointer_not_found(trans, bucket, bp_offset, bp,
index eaf3dbf03e9e4025d755a10cedc12d41cfdac336..1fbed1f8378d1aeeca4d781f9fa3c64583e2a833 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _BCACHEFS_BBPOS_H
 #define _BCACHEFS_BBPOS_H
 
+#include "bkey_methods.h"
+
 struct bbpos {
        enum btree_id           btree;
        struct bpos             pos;
index ccac2a3fcdf733aa3882dd4bace4b9265abfc551..33186fa82682e037af78dcb3543d5bbf3049e26b 100644 (file)
  *
  * BTREE NODES:
  *
- * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and
+ * Our unit of allocation is a bucket, and we can't arbitrarily allocate and
  * free smaller than a bucket - so, that's how big our btree nodes are.
  *
  * (If buckets are really big we'll only use part of the bucket for a btree node
@@ -930,7 +930,6 @@ struct bch_fs {
 
        struct time_stats       times[BCH_TIME_STAT_NR];
 
-       const char              *btree_transaction_fns[BCH_TRANSACTIONS_NR];
        struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
 };
 
index 9e10fc8301f09ea7b308bd96b0b3161038d2d170..bfcb75a361cb4c1edf231ca0fce8ae235dad2984 100644 (file)
@@ -336,7 +336,7 @@ static inline void bkey_init(struct bkey *k)
  *   number.
  *
  * - WHITEOUT: for hash table btrees
-*/
+ */
 #define BCH_BKEY_TYPES()                               \
        x(deleted,              0)                      \
        x(whiteout,             1)                      \
@@ -366,7 +366,8 @@ static inline void bkey_init(struct bkey *k)
        x(set,                  25)                     \
        x(lru,                  26)                     \
        x(alloc_v4,             27)                     \
-       x(backpointer,          28)
+       x(backpointer,          28)                     \
+       x(inode_v3,             29)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -717,6 +718,21 @@ struct bch_inode_v2 {
        __u8                    fields[0];
 } __attribute__((packed, aligned(8)));
 
+struct bch_inode_v3 {
+       struct bch_val          v;
+
+       __le64                  bi_journal_seq;
+       __le64                  bi_hash_seed;
+       __le64                  bi_flags;
+       __le64                  bi_sectors;
+       __le64                  bi_size;
+       __le64                  bi_version;
+       __u8                    fields[0];
+} __attribute__((packed, aligned(8)));
+
+#define INODEv3_FIELDS_START_INITIAL   6
+#define INODEv3_FIELDS_START_CUR       (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
+
 struct bch_inode_generation {
        struct bch_val          v;
 
@@ -728,7 +744,7 @@ struct bch_inode_generation {
  * bi_subvol and bi_parent_subvol are only set for subvolume roots:
  */
 
-#define BCH_INODE_FIELDS()                     \
+#define BCH_INODE_FIELDS_v2()                  \
        x(bi_atime,                     96)     \
        x(bi_ctime,                     96)     \
        x(bi_mtime,                     96)     \
@@ -755,6 +771,31 @@ struct bch_inode_generation {
        x(bi_subvol,                    32)     \
        x(bi_parent_subvol,             32)
 
+#define BCH_INODE_FIELDS_v3()                  \
+       x(bi_atime,                     96)     \
+       x(bi_ctime,                     96)     \
+       x(bi_mtime,                     96)     \
+       x(bi_otime,                     96)     \
+       x(bi_uid,                       32)     \
+       x(bi_gid,                       32)     \
+       x(bi_nlink,                     32)     \
+       x(bi_generation,                32)     \
+       x(bi_dev,                       32)     \
+       x(bi_data_checksum,             8)      \
+       x(bi_compression,               8)      \
+       x(bi_project,                   32)     \
+       x(bi_background_compression,    8)      \
+       x(bi_data_replicas,             8)      \
+       x(bi_promote_target,            16)     \
+       x(bi_foreground_target,         16)     \
+       x(bi_background_target,         16)     \
+       x(bi_erasure_code,              16)     \
+       x(bi_fields_set,                16)     \
+       x(bi_dir,                       64)     \
+       x(bi_dir_offset,                64)     \
+       x(bi_subvol,                    32)     \
+       x(bi_parent_subvol,             32)
+
 /* subset of BCH_INODE_FIELDS */
 #define BCH_INODE_OPTS()                       \
        x(data_checksum,                8)      \
@@ -780,16 +821,16 @@ enum {
         * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
         * flags)
         */
-       __BCH_INODE_SYNC        = 0,
-       __BCH_INODE_IMMUTABLE   = 1,
-       __BCH_INODE_APPEND      = 2,
-       __BCH_INODE_NODUMP      = 3,
-       __BCH_INODE_NOATIME     = 4,
+       __BCH_INODE_SYNC                = 0,
+       __BCH_INODE_IMMUTABLE           = 1,
+       __BCH_INODE_APPEND              = 2,
+       __BCH_INODE_NODUMP              = 3,
+       __BCH_INODE_NOATIME             = 4,
 
-       __BCH_INODE_I_SIZE_DIRTY= 5,
-       __BCH_INODE_I_SECTORS_DIRTY= 6,
-       __BCH_INODE_UNLINKED    = 7,
-       __BCH_INODE_BACKPTR_UNTRUSTED = 8,
+       __BCH_INODE_I_SIZE_DIRTY        = 5,
+       __BCH_INODE_I_SECTORS_DIRTY     = 6,
+       __BCH_INODE_UNLINKED            = 7,
+       __BCH_INODE_BACKPTR_UNTRUSTED   = 8,
 
        /* bits 20+ reserved for packed fields below: */
 };
@@ -811,6 +852,13 @@ LE32_BITMASK(INODE_NEW_VARINT,     struct bch_inode, bi_flags, 31, 32);
 LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
 LE64_BITMASK(INODEv2_NR_FIELDS,        struct bch_inode_v2, bi_flags, 24, 31);
 
+LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
+LE64_BITMASK(INODEv3_NR_FIELDS,        struct bch_inode_v3, bi_flags, 24, 31);
+
+LE64_BITMASK(INODEv3_FIELDS_START,
+                               struct bch_inode_v3, bi_flags, 31, 36);
+LE64_BITMASK(INODEv3_MODE,     struct bch_inode_v3, bi_flags, 36, 52);
+
 /* Dirents */
 
 /*
@@ -1494,7 +1542,8 @@ struct bch_sb_field_journal_seq_blacklist {
        x(freespace,                    19)             \
        x(alloc_v4,                     20)             \
        x(new_data_types,               21)             \
-       x(backpointers,                 22)
+       x(backpointers,                 22)             \
+       x(inode_v3,                     23)
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
index d348175edad402c15f3dff9b78b7e70e7a185af2..f7e5d0c377eb0cc6443244de8110c2d6423c752f 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "bkey.h"
+#include "bkey_cmp.h"
 #include "bkey_methods.h"
 #include "bset.h"
 #include "util.h"
@@ -763,50 +764,6 @@ unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
 
 #ifdef CONFIG_X86_64
 
-static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-                                 unsigned nr_key_bits)
-{
-       long d0, d1, d2, d3;
-       int cmp;
-
-       /* we shouldn't need asm for this, but gcc is being retarded: */
-
-       asm(".intel_syntax noprefix;"
-           "xor eax, eax;"
-           "xor edx, edx;"
-           "1:;"
-           "mov r8, [rdi];"
-           "mov r9, [rsi];"
-           "sub ecx, 64;"
-           "jl 2f;"
-
-           "cmp r8, r9;"
-           "jnz 3f;"
-
-           "lea rdi, [rdi - 8];"
-           "lea rsi, [rsi - 8];"
-           "jmp 1b;"
-
-           "2:;"
-           "not ecx;"
-           "shr r8, 1;"
-           "shr r9, 1;"
-           "shr r8, cl;"
-           "shr r9, cl;"
-           "cmp r8, r9;"
-
-           "3:\n"
-           "seta al;"
-           "setb dl;"
-           "sub eax, edx;"
-           ".att_syntax prefix;"
-           : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
-           : "0" (l), "1" (r), "3" (nr_key_bits)
-           : "r8", "r9", "cc", "memory");
-
-       return cmp;
-}
-
 #define I(_x)                  (*(out)++ = (_x))
 #define I1(i0)                                         I(i0)
 #define I2(i0, i1)             (I1(i0),                I(i1))
@@ -1037,40 +994,6 @@ int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
 }
 
 #else
-static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-                                 unsigned nr_key_bits)
-{
-       u64 l_v, r_v;
-
-       if (!nr_key_bits)
-               return 0;
-
-       /* for big endian, skip past header */
-       nr_key_bits += high_bit_offset;
-       l_v = *l & (~0ULL >> high_bit_offset);
-       r_v = *r & (~0ULL >> high_bit_offset);
-
-       while (1) {
-               if (nr_key_bits < 64) {
-                       l_v >>= 64 - nr_key_bits;
-                       r_v >>= 64 - nr_key_bits;
-                       nr_key_bits = 0;
-               } else {
-                       nr_key_bits -= 64;
-               }
-
-               if (!nr_key_bits || l_v != r_v)
-                       break;
-
-               l = next_word(l);
-               r = next_word(r);
-
-               l_v = *l;
-               r_v = *r;
-       }
-
-       return cmp_int(l_v, r_v);
-}
 #endif
 
 __pure
@@ -1078,19 +1001,7 @@ int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
                                          const struct bkey_packed *r,
                                          const struct btree *b)
 {
-       const struct bkey_format *f = &b->format;
-       int ret;
-
-       EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
-       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-
-       ret = __bkey_cmp_bits(high_word(f, l),
-                             high_word(f, r),
-                             b->nr_key_bits);
-
-       EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
-                               bkey_unpack_pos(b, r)));
-       return ret;
+       return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
 }
 
 __pure __flatten
@@ -1106,20 +1017,7 @@ int bch2_bkey_cmp_packed(const struct btree *b,
                         const struct bkey_packed *l,
                         const struct bkey_packed *r)
 {
-       struct bkey unpacked;
-
-       if (likely(bkey_packed(l) && bkey_packed(r)))
-               return __bch2_bkey_cmp_packed_format_checked(l, r, b);
-
-       if (bkey_packed(l)) {
-               __bkey_unpack_key_format_checked(b, &unpacked, l);
-               l = (void*) &unpacked;
-       } else if (bkey_packed(r)) {
-               __bkey_unpack_key_format_checked(b, &unpacked, r);
-               r = (void*) &unpacked;
-       }
-
-       return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+       return bch2_bkey_cmp_packed_inlined(b, l, r);
 }
 
 __pure __flatten
index df9fb859d1db6834057ef46e7573ddac031b1aee..19b59ffe0a98fbde8828feb9be1d76641a13e9ab 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/bug.h>
 #include "bcachefs_format.h"
 
+#include "btree_types.h"
 #include "util.h"
 #include "vstructs.h"
 
@@ -134,8 +135,9 @@ int bkey_cmp_left_packed(const struct btree *b,
 }
 
 /*
- * we prefer to pass bpos by ref, but it's often enough terribly convenient to
- * pass it by by val... as much as I hate c++, const ref would be nice here:
+ * The compiler generates better code when we pass bpos by ref, but it's often
+ * enough terribly convenient to pass it by val... as much as I hate c++, const
+ * ref would be nice here:
  */
 __pure __flatten
 static inline int bkey_cmp_left_packed_byval(const struct btree *b,
@@ -356,6 +358,99 @@ void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
 bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
               const struct bkey_format *);
 
+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
+
+static inline void
+__bkey_unpack_key_format_checked(const struct btree *b,
+                              struct bkey *dst,
+                              const struct bkey_packed *src)
+{
+       if (IS_ENABLED(HAVE_BCACHEFS_COMPILED_UNPACK)) {
+               compiled_unpack_fn unpack_fn = b->aux_data;
+               unpack_fn(dst, src);
+
+               if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+                   bch2_expensive_debug_checks) {
+                       struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
+
+                       BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
+               }
+       } else {
+               *dst = __bch2_bkey_unpack_key(&b->format, src);
+       }
+}
+
+static inline struct bkey
+bkey_unpack_key_format_checked(const struct btree *b,
+                              const struct bkey_packed *src)
+{
+       struct bkey dst;
+
+       __bkey_unpack_key_format_checked(b, &dst, src);
+       return dst;
+}
+
+static inline void __bkey_unpack_key(const struct btree *b,
+                                    struct bkey *dst,
+                                    const struct bkey_packed *src)
+{
+       if (likely(bkey_packed(src)))
+               __bkey_unpack_key_format_checked(b, dst, src);
+       else
+               *dst = *packed_to_bkey_c(src);
+}
+
+/**
+ * bkey_unpack_key -- unpack just the key, not the value
+ */
+static inline struct bkey bkey_unpack_key(const struct btree *b,
+                                         const struct bkey_packed *src)
+{
+       return likely(bkey_packed(src))
+               ? bkey_unpack_key_format_checked(b, src)
+               : *packed_to_bkey_c(src);
+}
+
+static inline struct bpos
+bkey_unpack_pos_format_checked(const struct btree *b,
+                              const struct bkey_packed *src)
+{
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+       return bkey_unpack_key_format_checked(b, src).p;
+#else
+       return __bkey_unpack_pos(&b->format, src);
+#endif
+}
+
+static inline struct bpos bkey_unpack_pos(const struct btree *b,
+                                         const struct bkey_packed *src)
+{
+       return likely(bkey_packed(src))
+               ? bkey_unpack_pos_format_checked(b, src)
+               : packed_to_bkey_c(src)->p;
+}
+
+/* Disassembled bkeys */
+
+static inline struct bkey_s_c bkey_disassemble(struct btree *b,
+                                              const struct bkey_packed *k,
+                                              struct bkey *u)
+{
+       __bkey_unpack_key(b, u, k);
+
+       return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
+}
+
+/* non const version: */
+static inline struct bkey_s __bkey_disassemble(struct btree *b,
+                                              struct bkey_packed *k,
+                                              struct bkey *u)
+{
+       __bkey_unpack_key(b, u, k);
+
+       return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
+}
+
 static inline u64 bkey_field_max(const struct bkey_format *f,
                                 enum bch_bkey_fields nr)
 {
index 0d7c67a959af14baf8efa98be0ea04150f9137ed..a30c4ae8eb369db29c3a5d1333b418b5972efbac 100644 (file)
@@ -3,6 +3,7 @@
 #define _BCACHEFS_BKEY_BUF_H
 
 #include "bcachefs.h"
+#include "bkey.h"
 
 struct bkey_buf {
        struct bkey_i   *k;
diff --git a/libbcachefs/bkey_cmp.h b/libbcachefs/bkey_cmp.h
new file mode 100644 (file)
index 0000000..5f42a6e
--- /dev/null
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_CMP_H
+#define _BCACHEFS_BKEY_CMP_H
+
+#include "bkey.h"
+
+#ifdef CONFIG_X86_64
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+                                 unsigned nr_key_bits)
+{
+       long d0, d1, d2, d3;
+       int cmp;
+
+       /* we shouldn't need asm for this, but gcc is being retarded: */
+
+       asm(".intel_syntax noprefix;"
+           "xor eax, eax;"
+           "xor edx, edx;"
+           "1:;"
+           "mov r8, [rdi];"
+           "mov r9, [rsi];"
+           "sub ecx, 64;"
+           "jl 2f;"
+
+           "cmp r8, r9;"
+           "jnz 3f;"
+
+           "lea rdi, [rdi - 8];"
+           "lea rsi, [rsi - 8];"
+           "jmp 1b;"
+
+           "2:;"
+           "not ecx;"
+           "shr r8, 1;"
+           "shr r9, 1;"
+           "shr r8, cl;"
+           "shr r9, cl;"
+           "cmp r8, r9;"
+
+           "3:\n"
+           "seta al;"
+           "setb dl;"
+           "sub eax, edx;"
+           ".att_syntax prefix;"
+           : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
+           : "0" (l), "1" (r), "3" (nr_key_bits)
+           : "r8", "r9", "cc", "memory");
+
+       return cmp;
+}
+#else
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+                                 unsigned nr_key_bits)
+{
+       u64 l_v, r_v;
+
+       if (!nr_key_bits)
+               return 0;
+
+       /* for big endian, skip past header */
+       nr_key_bits += high_bit_offset;
+       l_v = *l & (~0ULL >> high_bit_offset);
+       r_v = *r & (~0ULL >> high_bit_offset);
+
+       while (1) {
+               if (nr_key_bits < 64) {
+                       l_v >>= 64 - nr_key_bits;
+                       r_v >>= 64 - nr_key_bits;
+                       nr_key_bits = 0;
+               } else {
+                       nr_key_bits -= 64;
+               }
+
+               if (!nr_key_bits || l_v != r_v)
+                       break;
+
+               l = next_word(l);
+               r = next_word(r);
+
+               l_v = *l;
+               r_v = *r;
+       }
+
+       return cmp_int(l_v, r_v);
+}
+#endif
+
+static inline __pure __flatten
+int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l,
+                                         const struct bkey_packed *r,
+                                         const struct btree *b)
+{
+       const struct bkey_format *f = &b->format;
+       int ret;
+
+       EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
+       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
+
+       ret = __bkey_cmp_bits(high_word(f, l),
+                             high_word(f, r),
+                             b->nr_key_bits);
+
+       EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
+                               bkey_unpack_pos(b, r)));
+       return ret;
+}
+
+static inline __pure __flatten
+int bch2_bkey_cmp_packed_inlined(const struct btree *b,
+                        const struct bkey_packed *l,
+                        const struct bkey_packed *r)
+{
+       struct bkey unpacked;
+
+       if (likely(bkey_packed(l) && bkey_packed(r)))
+               return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
+
+       if (bkey_packed(l)) {
+               __bkey_unpack_key_format_checked(b, &unpacked, l);
+               l = (void *) &unpacked;
+       } else if (bkey_packed(r)) {
+               __bkey_unpack_key_format_checked(b, &unpacked, r);
+               r = (void *) &unpacked;
+       }
+
+       return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+}
+
+#endif /* _BCACHEFS_BKEY_CMP_H */
index e0cbac8811afa6d69b5d7e99e6a411f769fee25c..14d910a3077ffa49a6522423563bc1de0df632fc 100644 (file)
@@ -149,6 +149,7 @@ static unsigned bch2_key_types_allowed[] = {
                (1U << KEY_TYPE_whiteout)|
                (1U << KEY_TYPE_inode)|
                (1U << KEY_TYPE_inode_v2)|
+               (1U << KEY_TYPE_inode_v3)|
                (1U << KEY_TYPE_inode_generation),
        [BKEY_TYPE_dirents] =
                (1U << KEY_TYPE_deleted)|
index b1385a77da1146f6efd643d389a73aa999745244..8518054a23817cbf06bd6b8c371162096995fabb 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "bkey_buf.h"
+#include "bkey_cmp.h"
 #include "bkey_sort.h"
 #include "bset.h"
 #include "extents.h"
@@ -155,7 +156,7 @@ static inline int sort_keys_cmp(struct btree *b,
                                struct bkey_packed *l,
                                struct bkey_packed *r)
 {
-       return bch2_bkey_cmp_packed(b, l, r) ?:
+       return bch2_bkey_cmp_packed_inlined(b, l, r) ?:
                (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
                (int) l->needs_whiteout - (int) r->needs_whiteout;
 }
index fa60ef84e4ef00c40a909b371268836106f99ac0..09423536447049066def097c1b6c32b3ab4657d1 100644 (file)
@@ -965,7 +965,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b,
        t->size -= j - l;
 
        for (j = l; j < t->size; j++)
-              rw_aux_tree(b, t)[j].offset += shift;
+               rw_aux_tree(b, t)[j].offset += shift;
 
        EBUG_ON(l < t->size &&
                rw_aux_tree(b, t)[l].offset ==
@@ -1266,7 +1266,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter,
        bch2_btree_node_iter_sort(iter, b);
 }
 
-noinline __flatten __attribute__((cold))
+noinline __flatten __cold
 static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
                              struct btree *b, struct bpos *search)
 {
@@ -1441,7 +1441,10 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
        EBUG_ON(iter->data->k > iter->data->end);
 
        if (unlikely(__btree_node_iter_set_end(iter, 0))) {
-               bch2_btree_node_iter_set_drop(iter, iter->data);
+               /* avoid an expensive memmove call: */
+               iter->data[0] = iter->data[1];
+               iter->data[1] = iter->data[2];
+               iter->data[2] = (struct btree_node_iter_set) { 0, 0 };
                return;
        }
 
index 0d46534c3dcd148e872f222c125c144fac115b7a..72e6376bce2af705ee8519abbd3af3202b2cbd53 100644 (file)
@@ -205,100 +205,6 @@ static inline size_t btree_aux_data_u64s(const struct btree *b)
        return btree_aux_data_bytes(b) / sizeof(u64);
 }
 
-typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
-
-static inline void
-__bkey_unpack_key_format_checked(const struct btree *b,
-                              struct bkey *dst,
-                              const struct bkey_packed *src)
-{
-#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-       {
-               compiled_unpack_fn unpack_fn = b->aux_data;
-               unpack_fn(dst, src);
-
-               if (bch2_expensive_debug_checks) {
-                       struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
-
-                       BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
-               }
-       }
-#else
-       *dst = __bch2_bkey_unpack_key(&b->format, src);
-#endif
-}
-
-static inline struct bkey
-bkey_unpack_key_format_checked(const struct btree *b,
-                              const struct bkey_packed *src)
-{
-       struct bkey dst;
-
-       __bkey_unpack_key_format_checked(b, &dst, src);
-       return dst;
-}
-
-static inline void __bkey_unpack_key(const struct btree *b,
-                                    struct bkey *dst,
-                                    const struct bkey_packed *src)
-{
-       if (likely(bkey_packed(src)))
-               __bkey_unpack_key_format_checked(b, dst, src);
-       else
-               *dst = *packed_to_bkey_c(src);
-}
-
-/**
- * bkey_unpack_key -- unpack just the key, not the value
- */
-static inline struct bkey bkey_unpack_key(const struct btree *b,
-                                         const struct bkey_packed *src)
-{
-       return likely(bkey_packed(src))
-               ? bkey_unpack_key_format_checked(b, src)
-               : *packed_to_bkey_c(src);
-}
-
-static inline struct bpos
-bkey_unpack_pos_format_checked(const struct btree *b,
-                              const struct bkey_packed *src)
-{
-#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-       return bkey_unpack_key_format_checked(b, src).p;
-#else
-       return __bkey_unpack_pos(&b->format, src);
-#endif
-}
-
-static inline struct bpos bkey_unpack_pos(const struct btree *b,
-                                         const struct bkey_packed *src)
-{
-       return likely(bkey_packed(src))
-               ? bkey_unpack_pos_format_checked(b, src)
-               : packed_to_bkey_c(src)->p;
-}
-
-/* Disassembled bkeys */
-
-static inline struct bkey_s_c bkey_disassemble(struct btree *b,
-                                              const struct bkey_packed *k,
-                                              struct bkey *u)
-{
-       __bkey_unpack_key(b, u, k);
-
-       return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
-}
-
-/* non const version: */
-static inline struct bkey_s __bkey_disassemble(struct btree *b,
-                                              struct bkey_packed *k,
-                                              struct bkey *u)
-{
-       __bkey_unpack_key(b, u, k);
-
-       return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
-}
-
 #define for_each_bset(_b, _t)                                          \
        for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
 
index f84b50869de22626e2cfeb3ed98deb9d0124a462..8dd2db4121a6b57db2a2c20fa4c9f3e0ad781428 100644 (file)
 #include <linux/sched/mm.h>
 #include <trace/events/bcachefs.h>
 
+#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
+do {                                            \
+       if (shrinker_counter)                    \
+               bc->not_freed_##counter++;       \
+} while (0)
+
 const char * const bch2_btree_node_flags[] = {
 #define x(f)   #f,
        BTREE_FLAGS()
@@ -175,7 +181,7 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
        mutex_lock(&bc->lock);
        ret = __bch2_btree_node_hash_insert(bc, b);
        if (!ret)
-               list_add(&b->list, &bc->live);
+               list_add_tail(&b->list, &bc->live);
        mutex_unlock(&bc->lock);
 
        return ret;
@@ -194,7 +200,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
  * this version is for btree nodes that have already been freed (we're not
  * reaping a real btree node)
  */
-static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
 {
        struct btree_cache *bc = &c->btree_cache;
        int ret = 0;
@@ -204,38 +210,64 @@ wait_on_io:
        if (b->flags & ((1U << BTREE_NODE_dirty)|
                        (1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush)
+               if (!flush) {
+                       if (btree_node_dirty(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
+                       else if (btree_node_read_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
+                       else if (btree_node_write_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
                        return -ENOMEM;
+               }
 
                /* XXX: waiting on IO with btree cache lock held */
                bch2_btree_node_wait_on_read(b);
                bch2_btree_node_wait_on_write(b);
        }
 
-       if (!six_trylock_intent(&b->c.lock))
+       if (!six_trylock_intent(&b->c.lock)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
                return -ENOMEM;
+       }
 
-       if (!six_trylock_write(&b->c.lock))
+       if (!six_trylock_write(&b->c.lock)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
                goto out_unlock_intent;
+       }
 
        /* recheck under lock */
        if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush)
+               if (!flush) {
+                       if (btree_node_read_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
+                       else if (btree_node_write_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
                        goto out_unlock;
+               }
                six_unlock_write(&b->c.lock);
                six_unlock_intent(&b->c.lock);
                goto wait_on_io;
        }
 
-       if (btree_node_noevict(b) ||
-           btree_node_write_blocked(b) ||
-           btree_node_will_make_reachable(b))
+       if (btree_node_noevict(b)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
+               goto out_unlock;
+       }
+       if (btree_node_write_blocked(b)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
                goto out_unlock;
+       }
+       if (btree_node_will_make_reachable(b)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
+               goto out_unlock;
+       }
 
        if (btree_node_dirty(b)) {
-               if (!flush)
+               if (!flush) {
+                       BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
                        goto out_unlock;
+               }
                /*
                 * Using the underscore version because we don't want to compact
                 * bsets after the write, since this node is about to be evicted
@@ -263,14 +295,14 @@ out_unlock_intent:
        goto out;
 }
 
-static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
 {
-       return __btree_node_reclaim(c, b, false);
+       return __btree_node_reclaim(c, b, false, shrinker_counter);
 }
 
 static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
 {
-       return __btree_node_reclaim(c, b, true);
+       return __btree_node_reclaim(c, b, true, false);
 }
 
 static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
@@ -319,11 +351,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
                if (touched >= nr)
                        goto out;
 
-               if (!btree_node_reclaim(c, b)) {
+               if (!btree_node_reclaim(c, b, true)) {
                        btree_node_data_free(c, b);
                        six_unlock_write(&b->c.lock);
                        six_unlock_intent(&b->c.lock);
                        freed++;
+                       bc->freed++;
                }
        }
 restart:
@@ -332,9 +365,11 @@ restart:
 
                if (btree_node_accessed(b)) {
                        clear_btree_node_accessed(b);
-               } else if (!btree_node_reclaim(c, b)) {
+                       bc->not_freed_access_bit++;
+               } else if (!btree_node_reclaim(c, b, true)) {
                        freed++;
                        btree_node_data_free(c, b);
+                       bc->freed++;
 
                        bch2_btree_node_hash_remove(bc, b);
                        six_unlock_write(&b->c.lock);
@@ -390,7 +425,7 @@ static void bch2_btree_cache_shrinker_to_text(struct printbuf *out, struct shrin
        struct bch_fs *c = container_of(shrink, struct bch_fs,
                                        btree_cache.shrink);
 
-       bch2_btree_cache_to_text(out, c);
+       bch2_btree_cache_to_text(out, &c->btree_cache);
 }
 
 void bch2_fs_btree_cache_exit(struct bch_fs *c)
@@ -548,7 +583,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
        struct btree *b;
 
        list_for_each_entry_reverse(b, &bc->live, list)
-               if (!btree_node_reclaim(c, b))
+               if (!btree_node_reclaim(c, b, false))
                        return b;
 
        while (1) {
@@ -583,7 +618,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks)
         * disk node. Check the freed list before allocating a new one:
         */
        list_for_each_entry(b, freed, list)
-               if (!btree_node_reclaim(c, b)) {
+               if (!btree_node_reclaim(c, b, false)) {
                        list_del_init(&b->list);
                        goto got_node;
                }
@@ -609,7 +644,7 @@ got_node:
         * the list. Check if there's any freed nodes there:
         */
        list_for_each_entry(b2, &bc->freeable, list)
-               if (!btree_node_reclaim(c, b2)) {
+               if (!btree_node_reclaim(c, b2, false)) {
                        swap(b->data, b2->data);
                        swap(b->aux_data, b2->aux_data);
                        btree_node_to_freedlist(bc, b2);
@@ -830,7 +865,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
        if (likely(c->opts.btree_node_mem_ptr_optimization &&
                   b &&
                   b->hash_val == btree_ptr_hash_val(k)))
-                       goto lock_node;
+               goto lock_node;
 retry:
        b = btree_cache_find(bc, k);
        if (unlikely(!b)) {
@@ -1070,7 +1105,7 @@ wait_on_io:
 
        /* XXX we're called from btree_gc which will be holding other btree
         * nodes locked
-        * */
+        */
        __bch2_btree_node_wait_on_read(b);
        __bch2_btree_node_wait_on_write(b);
 
@@ -1141,9 +1176,21 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
               stats.failed);
 }
 
-void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c)
+void bch2_btree_cache_to_text(struct printbuf *out, struct btree_cache *bc)
 {
-       prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
-       prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
-       prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
+       prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
+       prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
+       prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
+
+       prt_printf(out, "freed:\t\t\t\t%u\n", bc->freed);
+       prt_printf(out, "not freed, dirty:\t\t%u\n", bc->not_freed_dirty);
+       prt_printf(out, "not freed, write in flight:\t%u\n", bc->not_freed_write_in_flight);
+       prt_printf(out, "not freed, read in flight:\t%u\n", bc->not_freed_read_in_flight);
+       prt_printf(out, "not freed, lock intent failed:\t%u\n", bc->not_freed_lock_intent);
+       prt_printf(out, "not freed, lock write failed:\t%u\n", bc->not_freed_lock_write);
+       prt_printf(out, "not freed, access bit:\t\t%u\n", bc->not_freed_access_bit);
+       prt_printf(out, "not freed, no evict failed:\t%u\n", bc->not_freed_noevict);
+       prt_printf(out, "not freed, write blocked:\t%u\n", bc->not_freed_write_blocked);
+       prt_printf(out, "not freed, will make reachable:\t%u\n", bc->not_freed_will_make_reachable);
+
 }
index a4df3e866bb81a730690c206a344f0a217383cb9..b623c70282730336790e5553f1670e93c777b736 100644 (file)
@@ -4,6 +4,7 @@
 
 #include "bcachefs.h"
 #include "btree_types.h"
+#include "bkey_methods.h"
 
 extern const char * const bch2_btree_node_flags[];
 
@@ -100,6 +101,6 @@ static inline unsigned btree_blocks(struct bch_fs *c)
 
 void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
                             struct btree *);
-void bch2_btree_cache_to_text(struct printbuf *, struct bch_fs *);
+void bch2_btree_cache_to_text(struct printbuf *, struct btree_cache *);
 
 #endif /* _BCACHEFS_BTREE_CACHE_H */
index a4d6998fcd00fb97735d3e6b75e8b51e5a1c0f0e..801a09f6fc1141f595cfc89b2cc4b151fbb85978 100644 (file)
@@ -318,7 +318,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
                                "  node %s",
                                bch2_btree_ids[b->c.btree_id], b->c.level,
                                buf1.buf, buf2.buf))
-                   ret = set_node_min(c, cur, expected_start);
+                       ret = set_node_min(c, cur, expected_start);
        }
 out:
 fsck_err:
index 2aa565550c911a18bc7e892b9b35a5de4a65b790..d18346a5d58d066bafd86509c2aeeb61d6ab41ed 100644 (file)
@@ -22,6 +22,8 @@
 
 static void btree_trans_verify_sorted(struct btree_trans *);
 inline void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int);
+static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *,
+                                                  struct btree_path *, int);
 
 static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
 static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
@@ -1004,14 +1006,9 @@ err:
        return ret;
 }
 
-static inline bool btree_path_good_node(struct btree_trans *trans,
-                                       struct btree_path *path,
-                                       unsigned l, int check_pos)
+static inline bool btree_path_check_pos_in_node(struct btree_path *path,
+                                               unsigned l, int check_pos)
 {
-       if (!is_btree_node(path, l) ||
-           !bch2_btree_node_relock(trans, path, l))
-               return false;
-
        if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
                return false;
        if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
@@ -1019,6 +1016,15 @@ static inline bool btree_path_good_node(struct btree_trans *trans,
        return true;
 }
 
+static inline bool btree_path_good_node(struct btree_trans *trans,
+                                       struct btree_path *path,
+                                       unsigned l, int check_pos)
+{
+       return is_btree_node(path, l) &&
+               bch2_btree_node_relock(trans, path, l) &&
+               btree_path_check_pos_in_node(path, l, check_pos);
+}
+
 static void btree_path_set_level_down(struct btree_trans *trans,
                                      struct btree_path *path,
                                      unsigned new_level)
@@ -1035,9 +1041,9 @@ static void btree_path_set_level_down(struct btree_trans *trans,
        bch2_btree_path_verify(trans, path);
 }
 
-static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
-                                                    struct btree_path *path,
-                                                    int check_pos)
+static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans,
+                                                        struct btree_path *path,
+                                                        int check_pos)
 {
        unsigned i, l = path->level;
 again:
@@ -1058,6 +1064,16 @@ again:
        return l;
 }
 
+static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
+                                                    struct btree_path *path,
+                                                    int check_pos)
+{
+       return likely(btree_node_locked(path, path->level) &&
+                     btree_path_check_pos_in_node(path, path->level, check_pos))
+               ? path->level
+               : __btree_path_up_until_good_node(trans, path, check_pos);
+}
+
 /*
  * This is the main state machine for walking down the btree - walks down to a
  * specified depth
@@ -1158,17 +1174,21 @@ static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
                            struct btree_path *src)
 {
        unsigned i, offset = offsetof(struct btree_path, pos);
+       int cmp = btree_path_cmp(dst, src);
 
        memcpy((void *) dst + offset,
               (void *) src + offset,
               sizeof(struct btree_path) - offset);
 
-       for (i = 0; i < BTREE_MAX_DEPTH; i++)
-               if (btree_node_locked(dst, i))
-                       six_lock_increment(&dst->l[i].b->c.lock,
-                                          __btree_lock_want(dst, i));
+       for (i = 0; i < BTREE_MAX_DEPTH; i++) {
+               unsigned t = btree_node_locked_type(dst, i);
 
-       bch2_btree_path_check_sort(trans, dst, 0);
+               if (t != BTREE_NODE_UNLOCKED)
+                       six_lock_increment(&dst->l[i].b->c.lock, t);
+       }
+
+       if (cmp)
+               bch2_btree_path_check_sort_fast(trans, dst, cmp);
 }
 
 static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
@@ -1181,8 +1201,7 @@ static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btr
        return new;
 }
 
-inline struct btree_path * __must_check
-bch2_btree_path_make_mut(struct btree_trans *trans,
+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans,
                         struct btree_path *path, bool intent,
                         unsigned long ip)
 {
@@ -1218,7 +1237,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
 
        path->pos = new_pos;
 
-       bch2_btree_path_check_sort(trans, path, cmp);
+       bch2_btree_path_check_sort_fast(trans, path, cmp);
 
        if (unlikely(path->cached)) {
                btree_node_unlock(trans, path, 0);
@@ -1242,7 +1261,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
                        __btree_path_level_init(path, l);
        }
 
-       if (l != path->level) {
+       if (unlikely(l != path->level)) {
                btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
                __bch2_btree_path_unlock(trans, path);
        }
@@ -2518,6 +2537,25 @@ static inline void btree_path_swap(struct btree_trans *trans,
        btree_path_verify_sorted_ref(trans, r);
 }
 
+static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *trans,
+                                                  struct btree_path *path,
+                                                  int cmp)
+{
+       struct btree_path *n;
+       int cmp2;
+
+       EBUG_ON(!cmp);
+
+       while ((n = cmp < 0
+               ? prev_btree_path(trans, path)
+               : next_btree_path(trans, path)) &&
+              (cmp2 = btree_path_cmp(n, path)) &&
+              cmp2 != cmp)
+               btree_path_swap(trans, n, path);
+
+       btree_trans_verify_sorted(trans);
+}
+
 inline void bch2_btree_path_check_sort(struct btree_trans *trans, struct btree_path *path,
                                       int cmp)
 {
@@ -2612,7 +2650,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans,
                                          unsigned flags,
                                          unsigned long ip)
 {
-       if (trans->restarted)
+       if (unlikely(trans->restarted))
                panic("bch2_trans_iter_init(): in transaction restart, %s by %pS\n",
                      bch2_err_str(trans->restarted),
                      (void *) trans->last_restarted_ip);
@@ -2632,7 +2670,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans,
            btree_type_has_snapshots(btree_id))
                flags |= BTREE_ITER_FILTER_SNAPSHOTS;
 
-       if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags))
+       if (trans->journal_replay_not_finished)
                flags |= BTREE_ITER_WITH_JOURNAL;
 
        iter->trans     = trans;
@@ -2816,7 +2854,7 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
        BUG_ON(trans->used_mempool);
 
 #ifdef __KERNEL__
-       p = this_cpu_xchg(c->btree_paths_bufs->path , NULL);
+       p = this_cpu_xchg(c->btree_paths_bufs->path, NULL);
 #endif
        if (!p)
                p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
@@ -2825,15 +2863,16 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
        trans->updates          = p; p += updates_bytes;
 }
 
-static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c,
-                                       const char *fn)
+const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
+
+unsigned bch2_trans_get_fn_idx(const char *fn)
 {
        unsigned i;
 
-       for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++)
-               if (!c->btree_transaction_fns[i] ||
-                   c->btree_transaction_fns[i] == fn) {
-                       c->btree_transaction_fns[i] = fn;
+       for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
+               if (!bch2_btree_transaction_fns[i] ||
+                   bch2_btree_transaction_fns[i] == fn) {
+                       bch2_btree_transaction_fns[i] = fn;
                        return i;
                }
 
@@ -2841,7 +2880,7 @@ static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct b
        return i;
 }
 
-void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *fn)
+void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_idx)
        __acquires(&c->btree_trans_barrier)
 {
        struct btree_transaction_stats *s;
@@ -2851,10 +2890,13 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
 
        memset(trans, 0, sizeof(*trans));
        trans->c                = c;
-       trans->fn               = fn;
+       trans->fn               = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns)
+               ? bch2_btree_transaction_fns[fn_idx] : NULL;
        trans->last_begin_time  = local_clock();
-       trans->fn_idx           = bch2_trans_get_fn_idx(trans, c, fn);
+       trans->fn_idx           = fn_idx;
        trans->locking_wait.task = current;
+       trans->journal_replay_not_finished =
+               !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
        closure_init_stack(&trans->ref);
 
        bch2_trans_alloc_paths(trans, c);
@@ -2979,7 +3021,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
 
        rcu_read_lock();
        owner = READ_ONCE(b->lock.owner);
-       pid = owner ? owner->pid : 0;;
+       pid = owner ? owner->pid : 0;
        rcu_read_unlock();
 
        prt_tab(out);
index 910f6d7bc961818cf4463eabcdf99a68e33c6622..0775cfa2be9a37a0a042bd56f52217fc3e5d0ffc 100644 (file)
@@ -131,9 +131,20 @@ __trans_next_path_with_node(struct btree_trans *trans, struct btree *b,
             _path = __trans_next_path_with_node((_trans), (_b),        \
                                                 (_path)->idx + 1))
 
-struct btree_path * __must_check
-bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
                         bool, unsigned long);
+
+static inline struct btree_path * __must_check
+bch2_btree_path_make_mut(struct btree_trans *trans,
+                        struct btree_path *path, bool intent,
+                        unsigned long ip)
+{
+       if (path->ref > 1 || path->preserve)
+               path = __bch2_btree_path_make_mut(trans, path, intent, ip);
+       path->should_be_locked = false;
+       return path;
+}
+
 struct btree_path * __must_check
 bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *,
                        struct bpos, bool, unsigned long);
@@ -551,10 +562,21 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
 void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
 void bch2_dump_trans_updates(struct btree_trans *);
 void bch2_dump_trans_paths_updates(struct btree_trans *);
-void __bch2_trans_init(struct btree_trans *, struct bch_fs *, const char *);
+void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned);
 void bch2_trans_exit(struct btree_trans *);
 
-#define bch2_trans_init(_trans, _c, _nr_iters, _mem) __bch2_trans_init(_trans, _c, __func__)
+extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
+unsigned bch2_trans_get_fn_idx(const char *);
+
+#define bch2_trans_init(_trans, _c, _nr_iters, _mem)                   \
+do {                                                                   \
+       static unsigned trans_fn_idx;                                   \
+                                                                       \
+       if (unlikely(!trans_fn_idx))                                    \
+               trans_fn_idx = bch2_trans_get_fn_idx(__func__);         \
+                                                                       \
+       __bch2_trans_init(_trans, _c, trans_fn_idx);                    \
+} while (0)
 
 void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
 
index b8ed25b9998547d9d5abcc319cca92980e6bf91c..cd52dd5a2890e44263f7f2925bddb8951cbcba8b 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
 #include "btree_cache.h"
@@ -103,6 +104,22 @@ static void bkey_cached_free(struct btree_key_cache *bc,
        six_unlock_intent(&ck->c.lock);
 }
 
+static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
+                                                  struct bkey_cached *ck)
+{
+       struct bkey_cached *pos;
+
+       list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
+               if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
+                                pos->btree_trans_barrier_seq)) {
+                       list_move(&ck->list, &pos->list);
+                       return;
+               }
+       }
+
+       list_move(&ck->list, &bc->freed_nonpcpu);
+}
+
 static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
                                         struct bkey_cached *ck)
 {
@@ -130,11 +147,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
                        while (f->nr > ARRAY_SIZE(f->objs) / 2) {
                                struct bkey_cached *ck2 = f->objs[--f->nr];
 
-                               list_move_tail(&ck2->list, &bc->freed_nonpcpu);
+                               __bkey_cached_move_to_freelist_ordered(bc, ck2);
                        }
                        preempt_enable();
 
-                       list_move_tail(&ck->list, &bc->freed_nonpcpu);
+                       __bkey_cached_move_to_freelist_ordered(bc, ck);
                        mutex_unlock(&bc->lock);
                }
 #else
@@ -295,7 +312,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
        bool was_new = true;
 
        ck = bkey_cached_alloc(trans, path);
-       if (unlikely(IS_ERR(ck)))
+       if (IS_ERR(ck))
                return ck;
 
        if (unlikely(!ck)) {
@@ -416,7 +433,7 @@ err:
        return ret;
 }
 
-noinline static int
+static noinline int
 bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
                                         unsigned flags)
 {
@@ -597,7 +614,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
         * Since journal reclaim depends on us making progress here, and the
         * allocator/copygc depend on journal reclaim making progress, we need
         * to be using alloc reserves:
-        * */
+        */
        ret   = bch2_btree_iter_traverse(&b_iter) ?:
                bch2_trans_update(trans, &b_iter, ck->k,
                                  BTREE_UPDATE_KEY_CACHE_RECLAIM|
@@ -982,7 +999,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
 
        bc->table_init_done = true;
 
-       bc->shrink.seeks                = 1;
+       bc->shrink.seeks                = 0;
        bc->shrink.count_objects        = bch2_btree_key_cache_count;
        bc->shrink.scan_objects         = bch2_btree_key_cache_scan;
        bc->shrink.to_text              = bch2_btree_key_cache_shrinker_to_text;
@@ -991,15 +1008,17 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
 
 void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
 {
-       prt_printf(out, "nr_freed:\t%zu\n",     atomic_long_read(&c->nr_freed));
-       prt_printf(out, "nr_keys:\t%lu\n",      atomic_long_read(&c->nr_keys));
-       prt_printf(out, "nr_dirty:\t%lu\n",     atomic_long_read(&c->nr_dirty));
+       prt_printf(out, "nr_freed:\t%zu",       atomic_long_read(&c->nr_freed));
+       prt_newline(out);
+       prt_printf(out, "nr_keys:\t%lu",        atomic_long_read(&c->nr_keys));
+       prt_newline(out);
+       prt_printf(out, "nr_dirty:\t%lu",       atomic_long_read(&c->nr_dirty));
+       prt_newline(out);
 }
 
 void bch2_btree_key_cache_exit(void)
 {
-       if (bch2_key_cache)
-               kmem_cache_destroy(bch2_key_cache);
+       kmem_cache_destroy(bch2_key_cache);
 }
 
 int __init bch2_btree_key_cache_init(void)
index 93a6ebed3aba25ebf2873845d02d9b5da0510a3d..9d090437d8f6a5d6cb373076bdeceeb0a41499bd 100644 (file)
@@ -274,7 +274,7 @@ next:
 
                        b = &READ_ONCE(path->l[top->level].b)->c;
 
-                       if (unlikely(IS_ERR_OR_NULL(b))) {
+                       if (IS_ERR_OR_NULL(b)) {
                                BUG_ON(!lock_graph_remove_non_waiters(&g));
                                goto next;
                        }
@@ -605,7 +605,7 @@ int bch2_trans_relock(struct btree_trans *trans)
        struct btree_path *path;
 
        if (unlikely(trans->restarted))
-               return - ((int) trans->restarted);
+               return -((int) trans->restarted);
 
        trans_for_each_path(trans, path)
                if (path->should_be_locked &&
index af226eed818bc5d0b2072ae95fa70f824ce0d81c..892d1231755164dede6deafd9b61ad65148d9919 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/rhashtable.h>
 #include <linux/six.h>
 
-#include "bkey_methods.h"
+//#include "bkey_methods.h"
 #include "buckets_types.h"
 #include "darray.h"
 #include "journal_types.h"
@@ -160,6 +160,16 @@ struct btree_cache {
        /* Number of elements in live + freeable lists */
        unsigned                used;
        unsigned                reserve;
+       unsigned                freed;
+       unsigned                not_freed_lock_intent;
+       unsigned                not_freed_lock_write;
+       unsigned                not_freed_dirty;
+       unsigned                not_freed_read_in_flight;
+       unsigned                not_freed_write_in_flight;
+       unsigned                not_freed_noevict;
+       unsigned                not_freed_write_blocked;
+       unsigned                not_freed_will_make_reachable;
+       unsigned                not_freed_access_bit;
        atomic_t                dirty;
        struct shrinker         shrink;
 
@@ -408,6 +418,7 @@ struct btree_trans {
        bool                    in_traverse_all:1;
        bool                    memory_allocation_failure:1;
        bool                    is_initial_gc:1;
+       bool                    journal_replay_not_finished:1;
        enum bch_errcode        restarted:16;
        u32                     restart_count;
        unsigned long           last_restarted_ip;
index 03c4fd0998945447f4075ddc1ec8ed05c5e1f074..40debf7563f8d5fdecd6185c3823d7dfda72b1cd 100644 (file)
@@ -2046,7 +2046,7 @@ static int async_btree_node_rewrite_trans(struct btree_trans *trans,
                goto out;
 
        ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
-out :
+out:
        bch2_trans_iter_exit(trans, &iter);
 
        return ret;
index b166ab4b4a3b53fdfbc99b3a5a34ad940e73a5e2..3a68382013e79620351e0ba17bcd0e45708f4779 100644 (file)
@@ -339,7 +339,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct bkey_cached *ck = (void *) path->l[0].b;
-       unsigned old_u64s = ck->u64s, new_u64s;
+       unsigned new_u64s;
        struct bkey_i *new_k;
 
        EBUG_ON(path->level);
@@ -368,12 +368,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
 
        ck->u64s        = new_u64s;
        ck->k           = new_k;
-       /*
-        * Keys returned by peek() are no longer valid pointers, so we need a
-        * transaction restart:
-        */
-       trace_and_count(c, trans_restart_key_cache_key_realloced, trans, _RET_IP_, path, old_u64s, new_u64s);
-       return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced);
+       return 0;
 }
 
 /* Triggers: */
@@ -1385,6 +1380,37 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
        return ret;
 }
 
+static int __must_check
+bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
+                               struct bkey_i *k, enum btree_update_flags flags,
+                               unsigned long ip);
+
+static noinline int flush_new_cached_update(struct btree_trans *trans,
+                                           struct btree_path *path,
+                                           struct btree_insert_entry *i,
+                                           enum btree_update_flags flags,
+                                           unsigned long ip)
+{
+       struct btree_path *btree_path;
+       int ret;
+
+       i->key_cache_already_flushed = true;
+       i->flags |= BTREE_TRIGGER_NORUN;
+
+       btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
+                                  BTREE_ITER_INTENT, _THIS_IP_);
+
+       ret = bch2_btree_path_traverse(trans, btree_path, 0);
+       if (ret)
+               goto err;
+
+       btree_path_set_should_be_locked(btree_path);
+       ret = bch2_trans_update_by_path_trace(trans, btree_path, i->k, flags, ip);
+err:
+       bch2_path_put(trans, btree_path, true);
+       return ret;
+}
+
 static int __must_check
 bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
                                struct bkey_i *k, enum btree_update_flags flags,
@@ -1392,7 +1418,6 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i, n;
-       int ret = 0;
 
        BUG_ON(!path->should_be_locked);
 
@@ -1461,27 +1486,10 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
         * the key cache - but the key has to exist in the btree for that to
         * work:
         */
-       if (path->cached &&
-           bkey_deleted(&i->old_k)) {
-               struct btree_path *btree_path;
-
-               i->key_cache_already_flushed = true;
-               i->flags |= BTREE_TRIGGER_NORUN;
+       if (unlikely(path->cached && bkey_deleted(&i->old_k)))
+               return flush_new_cached_update(trans, path, i, flags, ip);
 
-               btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
-                                          BTREE_ITER_INTENT, _THIS_IP_);
-
-               ret = bch2_btree_path_traverse(trans, btree_path, 0);
-               if (ret)
-                       goto err;
-
-               btree_path_set_should_be_locked(btree_path);
-               ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip);
-err:
-               bch2_path_put(trans, btree_path, true);
-       }
-
-       return ret;
+       return 0;
 }
 
 static int __must_check
index c611931f88be39cb96e4f76e9024b14f9d9530bb..116711fc01fb30f501ad206fb9c6a7f70362ca3f 100644 (file)
@@ -89,20 +89,17 @@ static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
                            : ca->usage[journal_seq & JOURNAL_BUF_MASK]);
 }
 
-struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
 {
        struct bch_fs *c = ca->fs;
-       struct bch_dev_usage ret;
        unsigned seq, i, u64s = dev_usage_u64s();
 
        do {
                seq = read_seqcount_begin(&c->usage_lock);
-               memcpy(&ret, ca->usage_base, u64s * sizeof(u64));
+               memcpy(usage, ca->usage_base, u64s * sizeof(u64));
                for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
-                       acc_u64s_percpu((u64 *) &ret, (u64 __percpu *) ca->usage[i], u64s);
+                       acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s);
        } while (read_seqcount_retry(&c->usage_lock, seq));
-
-       return ret;
 }
 
 static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
@@ -923,7 +920,7 @@ int bch2_mark_extent(struct btree_trans *trans,
 {
        u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
@@ -1115,10 +1112,10 @@ int bch2_mark_inode(struct btree_trans *trans,
        u64 journal_seq = trans->journal_res.seq;
 
        if (flags & BTREE_TRIGGER_INSERT) {
-               struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v;
+               struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
 
                BUG_ON(!journal_seq);
-               BUG_ON(new.k->type != KEY_TYPE_inode_v2);
+               BUG_ON(new.k->type != KEY_TYPE_inode_v3);
 
                v->bi_journal_seq = cpu_to_le64(journal_seq);
        }
@@ -1142,7 +1139,7 @@ int bch2_mark_reservation(struct btree_trans *trans,
                          unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
        struct bch_fs_usage __percpu *fs_usage;
        unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
        s64 sectors = (s64) k.k->size;
@@ -1221,7 +1218,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
                        unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
        struct reflink_gc *ref;
        size_t l, r, m;
@@ -2113,5 +2110,5 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
                        return -ENOMEM;
        }
 
-       return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
+       return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
 }
index 6881502d95f1abf9b62b44997c9c83bd42abeb79..56c06ccde14f8a39a19ded9475d6861624f35d63 100644 (file)
@@ -139,7 +139,15 @@ static inline u8 ptr_stale(struct bch_dev *ca,
 
 /* Device usage: */
 
-struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
+void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *);
+static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+{
+       struct bch_dev_usage ret;
+
+       bch2_dev_usage_read_fast(ca, &ret);
+       return ret;
+}
+
 void bch2_dev_usage_init(struct bch_dev *);
 
 static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve)
@@ -240,8 +248,6 @@ int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct
 int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
 int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
 
-int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
-
 int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
 
 int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
index b5850a761b91037dbbffb56eb5a7e1ae37f7cde6..3268e8d48603372f6f6178e346393edb068ede7c 100644 (file)
@@ -131,7 +131,7 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
                size_t orig_len = len;
                int ret, i;
 
-               sg = kmalloc_array(sizeof(*sg), pages, GFP_KERNEL);
+               sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL);
                if (!sg)
                        return -ENOMEM;
 
index f692f35a6a98e735784c9a856b4833c67d834aff..2b7080b67ecac518d297b77c953c2c6374f9bd23 100644 (file)
@@ -377,7 +377,7 @@ static unsigned __bio_compress(struct bch_fs *c,
 
        /* If it's only one block, don't bother trying to compress: */
        if (src->bi_iter.bi_size <= c->opts.block_size)
-               return 0;
+               return BCH_COMPRESSION_TYPE_incompressible;
 
        dst_data = bio_map_or_bounce(c, dst, WRITE);
        src_data = bio_map_or_bounce(c, src, READ);
index 5ef35e3be7d64f0d91aac88c4629d94e6d6166a1..b75ff07e59210175fd1a18bad2eef5f2bd08018d 100644 (file)
@@ -312,7 +312,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
        bch2_write_op_init(&m->op, c, io_opts);
        m->op.pos       = bkey_start_pos(k.k);
        m->op.version   = k.k->version;
-       m->op.target    = data_opts.target,
+       m->op.target    = data_opts.target;
        m->op.write_point = wp;
        m->op.flags     |= BCH_WRITE_PAGES_STABLE|
                BCH_WRITE_PAGES_OWNED|
index d87131f58c80fa14990c5d14e2034e20b3ee5b0d..57602c8e6c34a6ec64da4f2934ce420f2e25a4a0 100644 (file)
@@ -477,7 +477,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
                if (i->iter < tbl->size) {
                        rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
                                bch2_cached_btree_node_to_text(&i->buf, c, b);
-                       i->iter++;;
+                       i->iter++;
                } else {
                        done = true;
                }
@@ -637,11 +637,11 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
                if (!i->size)
                        break;
 
-               if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) ||
-                   !c->btree_transaction_fns[i->iter])
+               if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) ||
+                   !bch2_btree_transaction_fns[i->iter])
                        break;
 
-               prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]);
+               prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]);
                prt_newline(&i->buf);
                printbuf_indent_add(&i->buf, 2);
 
index 4d942d224a088303fd9a79684fed652ac48daf6d..288f46b55876b72bf0ca004904a6d67452956a90 100644 (file)
@@ -103,7 +103,7 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
 
        if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) {
                prt_printf(err, "value too big (%zu > %u)",
-                      bkey_val_u64s(k.k),dirent_val_u64s(len));
+                      bkey_val_u64s(k.k), dirent_val_u64s(len));
                return -EINVAL;
        }
 
index 2ca13014b9c44537057cddceb1da03065c54cca6..9e2a4ed48b42bedf70f373c564452d3a66c3b4f8 100644 (file)
@@ -292,7 +292,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
                if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
                    lp.crc.uncompressed_size) {
                        /* can use left extent's crc entry */
-               } else if (lp.crc.live_size <= rp.crc.offset ) {
+               } else if (lp.crc.live_size <= rp.crc.offset) {
                        /* can use right extent's crc entry */
                } else {
                        /* check if checksums can be merged: */
@@ -351,7 +351,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
                        if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
                            crc_l.uncompressed_size) {
                                /* can use left extent's crc entry */
-                       } else if (crc_l.live_size <= crc_r.offset ) {
+                       } else if (crc_l.live_size <= crc_r.offset) {
                                /* can use right extent's crc entry */
                                crc_r.offset -= crc_l.live_size;
                                bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
index e9dd1d13ec7e97d39acf2b2472092934b06508b5..1f2e1fc4f6b22292e4932b9f82a45767460647a4 100644 (file)
@@ -487,11 +487,11 @@ int bch2_rename_trans(struct btree_trans *trans,
        ret =   bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
                (src_dir.inum != dst_dir.inum
                 ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
-                : 0 ) ?:
+                : 0) ?:
                bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
                (dst_inum.inum
                 ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
-                : 0 );
+                : 0);
 err:
        bch2_trans_iter_exit(trans, &dst_inode_iter);
        bch2_trans_iter_exit(trans, &src_inode_iter);
index 7429206391e2df13d9a202111e14e8720426d2eb..706180b97a7711138bfb9af59952c748fad6c7a3 100644 (file)
@@ -1684,7 +1684,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
                unsigned pg_len = min_t(unsigned, len - copied,
                                        PAGE_SIZE - pg_offset);
                unsigned pg_copied = copy_page_from_iter_atomic(page,
-                                               pg_offset, pg_len,iter);
+                                               pg_offset, pg_len, iter);
 
                if (!pg_copied)
                        break;
@@ -2137,8 +2137,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
                        struct iovec *iov = dio->inline_vecs;
 
                        if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
-                               iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
-                                             GFP_KERNEL);
+                               iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
+                                                   GFP_KERNEL);
                                if (unlikely(!iov)) {
                                        dio->sync = sync = true;
                                        goto do_io;
@@ -2713,7 +2713,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
 
        truncate_pagecache_range(&inode->v, offset, end - 1);
 
-       if (block_start < block_end ) {
+       if (block_start < block_end) {
                s64 i_sectors_delta = 0;
 
                ret = bch2_fpunch(c, inode_inum(inode),
index bf82737d4ec2deb7927a650bb904c2698942cd5f..186faa54b590f1e736c1b660d5c83d1bc56cdbda 100644 (file)
@@ -528,7 +528,7 @@ static int bch2_symlink(struct user_namespace *mnt_userns,
 
        inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
                              (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
-       if (unlikely(IS_ERR(inode)))
+       if (IS_ERR(inode))
                return bch2_err_class(PTR_ERR(inode));
 
        inode_lock(&inode->v);
@@ -1846,7 +1846,7 @@ got_sb:
        sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
        sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
        c->vfs_sb               = sb;
-       strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
+       strscpy(sb->s_id, c->name, sizeof(sb->s_id));
 
        ret = super_setup_bdi(sb);
        if (ret)
@@ -1917,8 +1917,7 @@ MODULE_ALIAS_FS("bcachefs");
 void bch2_vfs_exit(void)
 {
        unregister_filesystem(&bcache_fs_type);
-       if (bch2_inode_cache)
-               kmem_cache_destroy(bch2_inode_cache);
+       kmem_cache_destroy(bch2_inode_cache);
 }
 
 int __init bch2_vfs_init(void)
index 12f2ef4417cb31255a405d5517ee727922ff5623..ca95d85b73488ef849c54ab26e044d72a7e76e32 100644 (file)
@@ -2044,7 +2044,8 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t,
 {
        if (t->nr == t->size) {
                size_t new_size = max_t(size_t, 128UL, t->size * 2);
-               void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
+               void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
+
                if (!d) {
                        bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
                                new_size);
index 1f2782fc5a2dfd2f4bfc38a88a189313726ba507..1a0d2608c058662d1f8d1238f5093aa22fd7ce11 100644 (file)
@@ -60,11 +60,10 @@ static int inode_decode_field(const u8 *in, const u8 *end,
        return bytes;
 }
 
-void bch2_inode_pack(struct bch_fs *c,
-                    struct bkey_inode_buf *packed,
-                    const struct bch_inode_unpacked *inode)
+static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
+                                          const struct bch_inode_unpacked *inode)
 {
-       struct bkey_i_inode_v2 *k = &packed->inode;
+       struct bkey_i_inode_v3 *k = &packed->inode;
        u8 *out = k->v.fields;
        u8 *end = (void *) &packed[1];
        u8 *last_nonzero_field = out;
@@ -72,13 +71,17 @@ void bch2_inode_pack(struct bch_fs *c,
        unsigned bytes;
        int ret;
 
-       bkey_inode_v2_init(&packed->inode.k_i);
+       bkey_inode_v3_init(&packed->inode.k_i);
        packed->inode.k.p.offset        = inode->bi_inum;
        packed->inode.v.bi_journal_seq  = cpu_to_le64(inode->bi_journal_seq);
        packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
        packed->inode.v.bi_flags        = cpu_to_le64(inode->bi_flags);
-       packed->inode.v.bi_flags        = cpu_to_le64(inode->bi_flags);
-       packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
+       packed->inode.v.bi_sectors      = cpu_to_le64(inode->bi_sectors);
+       packed->inode.v.bi_size         = cpu_to_le64(inode->bi_size);
+       packed->inode.v.bi_version      = cpu_to_le64(inode->bi_version);
+       SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
+       SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
+
 
 #define x(_name, _bits)                                                        \
        nr_fields++;                                                    \
@@ -99,7 +102,7 @@ void bch2_inode_pack(struct bch_fs *c,
                        *out++ = 0;                                     \
        }
 
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v3()
 #undef  x
        BUG_ON(out > end);
 
@@ -110,7 +113,7 @@ void bch2_inode_pack(struct bch_fs *c,
        set_bkey_val_bytes(&packed->inode.k, bytes);
        memset_u64s_tail(&packed->inode.v, 0, bytes);
 
-       SET_INODEv2_NR_FIELDS(&k->v, nr_fields);
+       SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
 
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
                struct bch_inode_unpacked unpacked;
@@ -120,16 +123,25 @@ void bch2_inode_pack(struct bch_fs *c,
                BUG_ON(ret);
                BUG_ON(unpacked.bi_inum         != inode->bi_inum);
                BUG_ON(unpacked.bi_hash_seed    != inode->bi_hash_seed);
+               BUG_ON(unpacked.bi_sectors      != inode->bi_sectors);
+               BUG_ON(unpacked.bi_size         != inode->bi_size);
+               BUG_ON(unpacked.bi_version      != inode->bi_version);
                BUG_ON(unpacked.bi_mode         != inode->bi_mode);
 
 #define x(_name, _bits)        if (unpacked._name != inode->_name)             \
                        panic("unpacked %llu should be %llu",           \
                              (u64) unpacked._name, (u64) inode->_name);
-               BCH_INODE_FIELDS()
+               BCH_INODE_FIELDS_v3()
 #undef  x
        }
 }
 
+void bch2_inode_pack(struct bkey_inode_buf *packed,
+                    const struct bch_inode_unpacked *inode)
+{
+       bch2_inode_pack_inlined(packed, inode);
+}
+
 static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
                                struct bch_inode_unpacked *unpacked)
 {
@@ -157,7 +169,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
        unpacked->_name = field[1];                                     \
        in += ret;
 
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v2()
 #undef  x
 
        /* XXX: signal if there were more fields than expected? */
@@ -196,15 +208,66 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
                return -1;                                              \
        fieldnr++;
 
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v2()
 #undef  x
 
        /* XXX: signal if there were more fields than expected? */
        return 0;
 }
 
-int bch2_inode_unpack(struct bkey_s_c k,
-                     struct bch_inode_unpacked *unpacked)
+static int bch2_inode_unpack_v3(struct bkey_s_c k,
+                               struct bch_inode_unpacked *unpacked)
+{
+       struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+       const u8 *in = inode.v->fields;
+       const u8 *end = bkey_val_end(inode);
+       unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
+       unsigned fieldnr = 0;
+       int ret;
+       u64 v[2];
+
+       unpacked->bi_inum       = inode.k->p.offset;
+       unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
+       unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
+       unpacked->bi_flags      = le64_to_cpu(inode.v->bi_flags);
+       unpacked->bi_sectors    = le64_to_cpu(inode.v->bi_sectors);
+       unpacked->bi_size       = le64_to_cpu(inode.v->bi_size);
+       unpacked->bi_version    = le64_to_cpu(inode.v->bi_version);
+       unpacked->bi_mode       = INODEv3_MODE(inode.v);
+
+#define x(_name, _bits)                                                        \
+       if (fieldnr < nr_fields) {                                      \
+               ret = bch2_varint_decode_fast(in, end, &v[0]);          \
+               if (ret < 0)                                            \
+                       return ret;                                     \
+               in += ret;                                              \
+                                                                       \
+               if (_bits > 64) {                                       \
+                       ret = bch2_varint_decode_fast(in, end, &v[1]);  \
+                       if (ret < 0)                                    \
+                               return ret;                             \
+                       in += ret;                                      \
+               } else {                                                \
+                       v[1] = 0;                                       \
+               }                                                       \
+       } else {                                                        \
+               v[0] = v[1] = 0;                                        \
+       }                                                               \
+                                                                       \
+       unpacked->_name = v[0];                                         \
+       if (v[1] || v[0] != unpacked->_name)                            \
+               return -1;                                              \
+       fieldnr++;
+
+       BCH_INODE_FIELDS_v3()
+#undef  x
+
+       /* XXX: signal if there were more fields than expected? */
+       return 0;
+}
+
+static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
+                                              struct bch_inode_unpacked *unpacked)
 {
        switch (k.k->type) {
        case KEY_TYPE_inode: {
@@ -243,6 +306,14 @@ int bch2_inode_unpack(struct bkey_s_c k,
        }
 }
 
+int bch2_inode_unpack(struct bkey_s_c k,
+                     struct bch_inode_unpacked *unpacked)
+{
+       if (likely(k.k->type == KEY_TYPE_inode_v3))
+               return bch2_inode_unpack_v3(k, unpacked);
+       return bch2_inode_unpack_slowpath(k, unpacked);
+}
+
 int bch2_inode_peek(struct btree_trans *trans,
                    struct btree_iter *iter,
                    struct bch_inode_unpacked *inode,
@@ -288,11 +359,29 @@ int bch2_inode_write(struct btree_trans *trans,
        if (IS_ERR(inode_p))
                return PTR_ERR(inode_p);
 
-       bch2_inode_pack(trans->c, inode_p, inode);
+       bch2_inode_pack_inlined(inode_p, inode);
        inode_p->inode.k.p.snapshot = iter->snapshot;
        return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
 }
 
+struct bkey_s_c bch2_inode_to_v3(struct btree_trans *trans, struct bkey_s_c k)
+{
+       struct bch_inode_unpacked u;
+       struct bkey_inode_buf *inode_p;
+       int ret;
+
+       inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+       if (IS_ERR(inode_p))
+               return bkey_s_c_err(PTR_ERR(inode_p));
+
+       ret = bch2_inode_unpack(k, &u);
+       if (ret)
+               return bkey_s_c_err(ret);
+
+       bch2_inode_pack(inode_p, &u);
+       return bkey_i_to_s_c(&inode_p->inode.k_i);
+}
+
 static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
 {
        struct bch_inode_unpacked unpacked;
@@ -307,7 +396,7 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
                return -EINVAL;
        }
 
-       if (bch2_inode_unpack(k, &unpacked)){
+       if (bch2_inode_unpack(k, &unpacked)) {
                prt_printf(err, "invalid variable length fields");
                return -EINVAL;
        }
@@ -378,15 +467,48 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
        return __bch2_inode_invalid(k, err);
 }
 
-static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
+int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                         int rw, struct printbuf *err)
+{
+       struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+
+       if (bkey_val_bytes(k.k) < sizeof(*inode.v)) {
+               prt_printf(err, "incorrect value size (%zu < %zu)",
+                      bkey_val_bytes(k.k), sizeof(*inode.v));
+               return -EINVAL;
+       }
+
+       if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
+           INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) {
+               prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)",
+                      INODEv3_FIELDS_START(inode.v),
+                      INODEv3_FIELDS_START_INITIAL,
+                      bkey_val_u64s(inode.k));
+               return -EINVAL;
+       }
+
+       if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
+               prt_printf(err, "invalid str hash type (%llu >= %u)",
+                      INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
+               return -EINVAL;
+       }
+
+       return __bch2_inode_invalid(k, err);
+}
+
+static void __bch2_inode_unpacked_to_text(struct printbuf *out,
+                                         struct bch_inode_unpacked *inode)
 {
-       prt_printf(out, "mode %o flags %x journal_seq %llu",
+       prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
               inode->bi_mode, inode->bi_flags,
-              inode->bi_journal_seq);
+              inode->bi_journal_seq,
+              inode->bi_size,
+              inode->bi_sectors,
+              inode->bi_version);
 
 #define x(_name, _bits)                                                \
        prt_printf(out, " "#_name " %llu", (u64) inode->_name);
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v3()
 #undef  x
 }
 
@@ -396,8 +518,7 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked
        __bch2_inode_unpacked_to_text(out, inode);
 }
 
-void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
-                      struct bkey_s_c k)
+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
 {
        struct bch_inode_unpacked inode;
 
index 2ac2fc10513bb3162521e7eea1af4575f0b85fa0..2915f4f96f4bb3c2fc8f2190ffad7fe86a8d2408 100644 (file)
@@ -2,12 +2,14 @@
 #ifndef _BCACHEFS_INODE_H
 #define _BCACHEFS_INODE_H
 
+#include "bkey.h"
 #include "opts.h"
 
 extern const char * const bch2_inode_opts[];
 
 int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
 int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
+int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
 void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
 #define bch2_bkey_ops_inode (struct bkey_ops) {                \
@@ -24,10 +26,18 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
        .atomic_trigger = bch2_mark_inode,              \
 }
 
+#define bch2_bkey_ops_inode_v3 (struct bkey_ops) {     \
+       .key_invalid    = bch2_inode_v3_invalid,        \
+       .val_to_text    = bch2_inode_to_text,           \
+       .trans_trigger  = bch2_trans_mark_inode,        \
+       .atomic_trigger = bch2_mark_inode,              \
+}
+
 static inline bool bkey_is_inode(const struct bkey *k)
 {
        return  k->type == KEY_TYPE_inode ||
-               k->type == KEY_TYPE_inode_v2;
+               k->type == KEY_TYPE_inode_v2 ||
+               k->type == KEY_TYPE_inode_v3;
 }
 
 int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c,
@@ -51,25 +61,28 @@ struct bch_inode_unpacked {
        u64                     bi_inum;
        u64                     bi_journal_seq;
        __le64                  bi_hash_seed;
+       u64                     bi_size;
+       u64                     bi_sectors;
+       u64                     bi_version;
        u32                     bi_flags;
        u16                     bi_mode;
 
 #define x(_name, _bits)        u##_bits _name;
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v3()
 #undef  x
 };
 
 struct bkey_inode_buf {
-       struct bkey_i_inode_v2  inode;
+       struct bkey_i_inode_v3  inode;
 
 #define x(_name, _bits)                + 8 + _bits / 8
-       u8              _pad[0 + BCH_INODE_FIELDS()];
+       u8              _pad[0 + BCH_INODE_FIELDS_v3()];
 #undef  x
 } __attribute__((packed, aligned(8)));
 
-void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
-                    const struct bch_inode_unpacked *);
+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
 int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
+struct bkey_s_c bch2_inode_to_v3(struct btree_trans *, struct bkey_s_c);
 
 void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
 
index 558d0c2328160b0e29943287601413d1de07c54f..5971569e31336043ce4bd69dda9b8028b703b6df 100644 (file)
@@ -242,8 +242,7 @@ int bch2_extent_update(struct btree_trans *trans,
                       s64 *i_sectors_delta_total,
                       bool check_enospc)
 {
-       struct btree_iter inode_iter;
-       struct bch_inode_unpacked inode_u;
+       struct btree_iter inode_iter = { NULL };
        struct bpos next_pos;
        bool usage_increasing;
        s64 i_sectors_delta = 0, disk_sectors_delta = 0;
@@ -283,32 +282,67 @@ int bch2_extent_update(struct btree_trans *trans,
                        return ret;
        }
 
-       ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum,
-                             BTREE_ITER_INTENT);
-       if (ret)
-               return ret;
+       if (new_i_size || i_sectors_delta) {
+               struct bkey_s_c k;
+               struct bkey_s_c_inode_v3 inode;
+               struct bkey_i_inode_v3 *new_inode;
+               bool i_size_update;
+
+               bch2_trans_iter_init(trans, &inode_iter, BTREE_ID_inodes,
+                                    SPOS(0, inum.inum, iter->snapshot),
+                                    BTREE_ITER_INTENT|BTREE_ITER_CACHED);
+               k = bch2_btree_iter_peek_slot(&inode_iter);
+               ret = bkey_err(k);
+               if (unlikely(ret))
+                       goto err;
+
+               ret = bkey_is_inode(k.k) ? 0 : -ENOENT;
+               if (unlikely(ret))
+                       goto err;
+
+               if (unlikely(k.k->type != KEY_TYPE_inode_v3)) {
+                       k = bch2_inode_to_v3(trans, k);
+                       ret = bkey_err(k);
+                       if (unlikely(ret))
+                               goto err;
+               }
+
+               inode = bkey_s_c_to_inode_v3(k);
+               i_size_update = !(le64_to_cpu(inode.v->bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
+                       new_i_size > le64_to_cpu(inode.v->bi_size);
+
+               if (!i_sectors_delta && !i_size_update)
+                       goto no_inode_update;
+
+               new_inode = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+               ret = PTR_ERR_OR_ZERO(new_inode);
+               if (unlikely(ret))
+                       goto err;
 
-       if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-           new_i_size > inode_u.bi_size)
-               inode_u.bi_size = new_i_size;
+               bkey_reassemble(&new_inode->k_i, k);
 
-       inode_u.bi_sectors += i_sectors_delta;
+               if (i_size_update)
+                       new_inode->v.bi_size = cpu_to_le64(new_i_size);
 
+               le64_add_cpu(&new_inode->v.bi_sectors, i_sectors_delta);
+               ret = bch2_trans_update(trans, &inode_iter, &new_inode->k_i, 0);
+               if (unlikely(ret))
+                       goto err;
+       }
+no_inode_update:
        ret =   bch2_trans_update(trans, iter, k, 0) ?:
-               bch2_inode_write(trans, &inode_iter, &inode_u) ?:
                bch2_trans_commit(trans, disk_res, journal_seq,
                                BTREE_INSERT_NOCHECK_RW|
                                BTREE_INSERT_NOFAIL);
-       bch2_trans_iter_exit(trans, &inode_iter);
-
-       if (ret)
-               return ret;
+       if (unlikely(ret))
+               goto err;
 
        if (i_sectors_delta_total)
                *i_sectors_delta_total += i_sectors_delta;
        bch2_btree_iter_set_pos(iter, next_pos);
-
-       return 0;
+err:
+       bch2_trans_iter_exit(trans, &inode_iter);
+       return ret;
 }
 
 /*
@@ -926,8 +960,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
        saved_iter = dst->bi_iter;
 
        do {
-               struct bch_extent_crc_unpacked crc =
-                       (struct bch_extent_crc_unpacked) { 0 };
+               struct bch_extent_crc_unpacked crc = { 0 };
                struct bversion version = op->version;
                size_t dst_len, src_len;
 
@@ -979,6 +1012,8 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
                    !crc_is_compressed(crc) &&
                    bch2_csum_type_is_encryption(op->crc.csum_type) ==
                    bch2_csum_type_is_encryption(op->csum_type)) {
+                       u8 compression_type = crc.compression_type;
+                       u16 nonce = crc.nonce;
                        /*
                         * Note: when we're using rechecksum(), we need to be
                         * checksumming @src because it has all the data our
@@ -997,6 +1032,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
                                        bio_sectors(src) - (src_len >> 9),
                                        op->csum_type))
                                goto csum_err;
+                       /*
+                        * rchecksum_bio sets compression_type on crc from op->crc,
+                        * this isn't always correct as sometimes we're changing
+                        * an extent from uncompressed to incompressible.
+                        */
+                       crc.compression_type = compression_type;
+                       crc.nonce = nonce;
                } else {
                        if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
                            bch2_rechecksum_bio(c, src, version, op->crc,
@@ -1115,8 +1157,8 @@ again:
                                      BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
                EBUG_ON(!wp);
 
-               if (unlikely(IS_ERR(wp))) {
-                       if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
+               if (IS_ERR(wp)) {
+                       if (unlikely(wp != ERR_PTR(-EAGAIN))) {
                                ret = PTR_ERR(wp);
                                goto err;
                        }
index ab594623341f8c76c7c25fcfc94220037041104e..95c29229d3fe658c6ff9e58361bdf0b2c125e50a 100644 (file)
@@ -739,7 +739,7 @@ int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
                return ret;
 
        entry = container_of(journal_res_entry(j, &res),
-                            struct jset_entry_log, entry);;
+                            struct jset_entry_log, entry);
        memset(entry, 0, u64s * sizeof(u64));
        entry->entry.type = BCH_JSET_ENTRY_log;
        entry->entry.u64s = u64s - 1;
@@ -796,10 +796,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                bch2_journal_block(&c->journal);
        }
 
-       bu              = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL);
-       ob              = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL);
-       new_buckets     = kzalloc(nr * sizeof(u64), GFP_KERNEL);
-       new_bucket_seq  = kzalloc(nr * sizeof(u64), GFP_KERNEL);
+       bu              = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
+       ob              = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL);
+       new_buckets     = kcalloc(nr, sizeof(u64), GFP_KERNEL);
+       new_bucket_seq  = kcalloc(nr, sizeof(u64), GFP_KERNEL);
        if (!bu || !ob || !new_buckets || !new_bucket_seq) {
                ret = -ENOMEM;
                goto err_unblock;
@@ -1265,7 +1265,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        rcu_read_lock();
        s = READ_ONCE(j->reservations);
 
-       prt_printf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size);
+       prt_printf(out, "dirty journal entries:\t%llu/%llu\n",  fifo_used(&j->pin), j->pin.size);
        prt_printf(out, "seq:\t\t\t%llu\n",                     journal_cur_seq(j));
        prt_printf(out, "seq_ondisk:\t\t%llu\n",                j->seq_ondisk);
        prt_printf(out, "last_seq:\t\t%llu\n",          journal_last_seq(j));
index d3caa7ea7ce9446b2b9994f41af0782abaa79b08..9428f4233997b0072f7fd473cf45b8e6575b80d0 100644 (file)
  */
 
 #include <linux/hash.h>
+#include <linux/prefetch.h>
 
 #include "journal_types.h"
 
@@ -304,15 +305,26 @@ static inline int journal_res_get_fast(struct journal *j,
 {
        union journal_res_state old, new;
        u64 v = atomic64_read(&j->reservations.counter);
+       unsigned u64s, offset;
 
        do {
                old.v = new.v = v;
 
+               /*
+                * Round up the end of the journal reservation to the next
+                * cacheline boundary:
+                */
+               u64s = res->u64s;
+               offset = sizeof(struct jset) / sizeof(u64) +
+                         new.cur_entry_offset + u64s;
+               u64s += ((offset - 1) & ((SMP_CACHE_BYTES / sizeof(u64)) - 1)) + 1;
+
+
                /*
                 * Check if there is still room in the current journal
                 * entry:
                 */
-               if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
+               if (new.cur_entry_offset + u64s > j->cur_entry_u64s)
                        return 0;
 
                EBUG_ON(!journal_state_count(new, new.idx));
@@ -320,7 +332,7 @@ static inline int journal_res_get_fast(struct journal *j,
                if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark)
                        return 0;
 
-               new.cur_entry_offset += res->u64s;
+               new.cur_entry_offset += u64s;
                journal_state_inc(&new);
 
                /*
@@ -337,8 +349,15 @@ static inline int journal_res_get_fast(struct journal *j,
 
        res->ref        = true;
        res->idx        = old.idx;
+       res->u64s       = u64s;
        res->offset     = old.cur_entry_offset;
        res->seq        = le64_to_cpu(j->buf[old.idx].data->seq);
+
+       offset = res->offset;
+       while (offset < res->offset + res->u64s) {
+               prefetchw(vstruct_idx(j->buf[res->idx].data, offset));
+               offset += SMP_CACHE_BYTES / sizeof(u64);
+       }
        return 1;
 }
 
index e69595bd1359d588a1c97cbaad177e41c59e871c..e873ce2a3f03a5e9c2ba4d4cfc2ff87c30065ad2 100644 (file)
@@ -232,7 +232,7 @@ void bch2_journal_space_available(struct journal *j)
        if ((j->space[journal_space_clean_ondisk].next_entry <
             j->space[journal_space_clean_ondisk].total) &&
            (clean - clean_ondisk <= total / 8) &&
-           (clean_ondisk * 2 > clean ))
+           (clean_ondisk * 2 > clean))
                set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
        else
                clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
@@ -363,7 +363,7 @@ static inline void __journal_pin_drop(struct journal *j,
        list_del_init(&pin->list);
 
        /*
-        * Unpinning a journal entry make make journal_next_bucket() succeed, if
+        * Unpinning a journal entry may make journal_next_bucket() succeed if
         * writing a new last_seq will now make another bucket available:
         */
        if (atomic_dec_and_test(&pin_list->count) &&
index cfdbd92d2164f4360983c2ed47773e0e8272bfb5..c19db0425dd7effc7982f52753827b74a6cdcc98 100644 (file)
@@ -31,7 +31,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb,
        if (!nr)
                return 0;
 
-       b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
+       b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL);
        if (!b)
                return -ENOMEM;
 
@@ -114,7 +114,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
        if (!nr)
                return 0;
 
-       b = kmalloc_array(sizeof(*b), nr, GFP_KERNEL);
+       b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL);
        if (!b)
                return -ENOMEM;
 
index cda77835b9ea62381f3962a1d0029d463fe3b2b1..5e85055b0f9382df6ef9ababd31975c16f50cd98 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bkey.h"
 #include "keylist.h"
 
 int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
index 55fdacad9b9818e8cadf2173f8b57de0fdab69d9..7486920475f0c6b2b909e014dcb10d79d8265570 100644 (file)
@@ -497,7 +497,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
                /*
                 * The iterator gets unlocked by __bch2_read_extent - need to
                 * save a copy of @k elsewhere:
-                 */
+                */
                bch2_bkey_buf_reassemble(&sk, c, k);
                k = bkey_i_to_s_c(sk.k);
 
@@ -868,7 +868,7 @@ static bool migrate_pred(struct bch_fs *c, void *arg,
                i++;
        }
 
-       return data_opts->rewrite_ptrs != 0;;
+       return data_opts->rewrite_ptrs != 0;
 }
 
 static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
index 18f6ec5cc7d07a7d128477f99455027b2e85c8c5..ea7810a1797500c826ce1b40b162d7725e15d2b9 100644 (file)
@@ -225,7 +225,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
                        .size                   = max_t(size_t, keys->size, 8) * 2,
                };
 
-               new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL);
+               new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL);
                if (!new_keys.d) {
                        bch_err(c, "%s: error allocating new key array (size %zu)",
                                __func__, new_keys.size);
@@ -502,7 +502,7 @@ static int journal_keys_sort(struct bch_fs *c)
 
        keys->size = roundup_pow_of_two(nr_keys);
 
-       keys->d = kvmalloc(sizeof(keys->d[0]) * keys->size, GFP_KERNEL);
+       keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
        if (!keys->d)
                return -ENOMEM;
 
@@ -1092,6 +1092,9 @@ int bch2_fs_recovery(struct bch_fs *c)
                        c->opts.version_upgrade = true;
                        c->opts.fsck            = true;
                        c->opts.fix_errors      = FSCK_OPT_YES;
+               } else if (c->sb.version < bcachefs_metadata_version_inode_v3) {
+                       bch_info(c, "version prior to inode_v3, upgrade required");
+                       c->opts.version_upgrade = true;
                }
        }
 
@@ -1458,7 +1461,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
 
-       if (c->sb.version < bcachefs_metadata_version_backpointers)
+       if (c->sb.version < bcachefs_metadata_version_inode_v3)
                c->opts.version_upgrade = true;
 
        if (c->opts.version_upgrade) {
@@ -1537,7 +1540,7 @@ int bch2_fs_initialize(struct bch_fs *c)
                        S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
        root_inode.bi_inum      = BCACHEFS_ROOT_INO;
        root_inode.bi_subvol    = BCACHEFS_ROOT_SUBVOL;
-       bch2_inode_pack(c, &packed_inode, &root_inode);
+       bch2_inode_pack(&packed_inode, &root_inode);
        packed_inode.inode.k.p.snapshot = U32_MAX;
 
        err = "error creating root directory";
index 87820b2e1ad3e1322216aba57da1e083d6d49d37..cc34b3809206fb1f5666ba41ad42b7958a39e307 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _BCACHEFS_REPLICAS_H
 #define _BCACHEFS_REPLICAS_H
 
+#include "bkey.h"
 #include "eytzinger.h"
 #include "replicas_types.h"
 
index c062edb3fbc24e6fd5889d1ac138b3e2c10ab9db..dc1a27cc31cd4de56cdc2e44026e4be7b789c34f 100644 (file)
@@ -160,7 +160,7 @@ u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
 
        r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
        memset(ctx, 0, sizeof(*ctx));
-       return (r);
+       return r;
 }
 
 u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
index cbc5979a5181641fc1001740bfefebbdca07d1cc..60c1f03c05af48bd7badfa39e157a53b0bf1cc41 100644 (file)
@@ -100,8 +100,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
 
 void bch2_free_super(struct bch_sb_handle *sb)
 {
-       if (sb->bio)
-               kfree(sb->bio);
+       kfree(sb->bio);
        if (!IS_ERR_OR_NULL(sb->bdev))
                blkdev_put(sb->bdev, sb->mode);
 
@@ -149,8 +148,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
 
                bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
 
-               if (sb->bio)
-                       kfree(sb->bio);
+               kfree(sb->bio);
                sb->bio = bio;
        }
 
index 3f674bf061ff40554e38c5eb4666a4764be750fa..5be4c40afa47500725e6ea95bb8ab0b0d3acebed 100644 (file)
@@ -327,26 +327,12 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
 {
        int ret;
 
-       ret = bch2_gc_thread_start(c);
-       if (ret) {
-               bch_err(c, "error starting gc thread");
-               return ret;
-       }
-
-       ret = bch2_copygc_start(c);
-       if (ret) {
-               bch_err(c, "error starting copygc thread");
-               return ret;
-       }
-
        ret = bch2_rebalance_start(c);
        if (ret) {
                bch_err(c, "error starting rebalance thread");
                return ret;
        }
 
-       schedule_work(&c->ec_stripe_delete_work);
-
        return 0;
 }
 
@@ -385,6 +371,20 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
 
+       ret = bch2_gc_thread_start(c);
+       if (ret) {
+               bch_err(c, "error starting gc thread");
+               return ret;
+       }
+
+       ret = bch2_copygc_start(c);
+       if (ret) {
+               bch_err(c, "error starting copygc thread");
+               return ret;
+       }
+
+       schedule_work(&c->ec_stripe_delete_work);
+
        bch2_do_discards(c);
        bch2_do_invalidates(c);
 
@@ -463,8 +463,8 @@ static void __bch2_fs_free(struct bch_fs *c)
        kfree(c->unused_inode_hints);
        free_heap(&c->copygc_heap);
 
-       if (c->io_complete_wq )
-               destroy_workqueue(c->io_complete_wq );
+       if (c->io_complete_wq)
+               destroy_workqueue(c->io_complete_wq);
        if (c->copygc_wq)
                destroy_workqueue(c->copygc_wq);
        if (c->btree_io_complete_wq)
@@ -711,7 +711,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                goto err;
 
        pr_uuid(&name, c->sb.user_uuid.b);
-       strlcpy(c->name, name.buf, sizeof(c->name));
+       strscpy(c->name, name.buf, sizeof(c->name));
        printbuf_exit(&name);
 
        ret = name.allocation_failure ? -ENOMEM : 0;
@@ -1784,9 +1784,8 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
        }
 
        ret = bch2_trans_mark_dev_sb(c, ca);
-       if (ret) {
+       if (ret)
                goto err;
-       }
 
        mutex_lock(&c->sb_lock);
        mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
index 103fde97231f994219cd9ee1db4c694ccab92ef0..0f45aef78477326b1f10dd1aeae4fabe385097de 100644 (file)
@@ -175,7 +175,7 @@ read_attribute(minor);
 read_attribute(bucket_size);
 read_attribute(first_bucket);
 read_attribute(nbuckets);
-read_attribute(durability);
+rw_attribute(durability);
 read_attribute(iodone);
 
 read_attribute(io_latency_read);
@@ -425,7 +425,7 @@ SHOW(bch2_fs)
                bch2_btree_updates_to_text(out, c);
 
        if (attr == &sysfs_btree_cache)
-               bch2_btree_cache_to_text(out, c);
+               bch2_btree_cache_to_text(out, &c->btree_cache);
 
        if (attr == &sysfs_btree_key_cache)
                bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
@@ -907,6 +907,19 @@ STORE(bch2_dev)
                mutex_unlock(&c->sb_lock);
        }
 
+       if (attr == &sysfs_durability) {
+               u64 v = strtoul_or_return(buf);
+
+               mutex_lock(&c->sb_lock);
+               mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+
+               if (v != BCH_MEMBER_DURABILITY(mi)) {
+                       SET_BCH_MEMBER_DURABILITY(mi, v + 1);
+                       bch2_write_super(c);
+               }
+               mutex_unlock(&c->sb_lock);
+       }
+
        if (attr == &sysfs_label) {
                char *tmp;
                int ret;
index f08215af359f83766132965f7178f64875400742..62fa662019ad9dc73eb4985b06f6f47e37d0fae3 100644 (file)
@@ -433,7 +433,7 @@ static void pr_time_units(struct printbuf *out, u64 ns)
 
 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
 {
-       prt_printf(out, name);
+       prt_str(out, name);
        prt_tab(out);
        pr_time_units(out, ns);
        prt_newline(out);
@@ -786,8 +786,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
        }
 }
 
-#include "eytzinger.h"
-
 static int alignment_ok(const void *base, size_t align)
 {
        return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
index fd2797eaf7739319543d1c2ac3c4be7a533d1513..a32a8995ddc46cb611490122bb43b60a007b0419 100644 (file)
 
 #include <ctype.h>
 #include <errno.h>
+#include <limits.h>
 #include <string.h>
 
+#include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/string.h>
 
@@ -62,6 +64,31 @@ size_t strlcpy(char *dest, const char *src, size_t size)
        return ret;
 }
 
+ssize_t strscpy(char *dest, const char *src, size_t count)
+{
+       long res = 0;
+
+       if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
+               return -E2BIG;
+
+       while (count) {
+               char c;
+
+               c = src[res];
+               dest[res] = c;
+               if (!c)
+                       return res;
+               res++;
+               count--;
+       }
+
+       /* Hit buffer length without finding a NUL; force NUL-termination. */
+       if (res)
+               dest[res-1] = '\0';
+
+       return -E2BIG;
+}
+
 void memzero_explicit(void *s, size_t count)
 {
        memset(s, 0, count);
index 3d720bc0f3577490efbbf9608b2dff8531bb6e59..29c498ad9c9c6ca6dcf258dcea0f3f8e14eec0a2 100644 (file)
@@ -52,7 +52,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
        static const unsigned int rounding[] = { 500, 50, 5 };
        int i = 0, j;
        u32 remainder = 0, sf_cap;
-       char tmp[8];
+       char tmp[12];
        const char *unit;
 
        tmp[0] = '\0';