]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to dfaf9a6ee2 lib/printbuf: Clean up headers
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 18 Aug 2022 16:32:10 +0000 (12:32 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Thu, 18 Aug 2022 16:49:48 +0000 (12:49 -0400)
32 files changed:
.bcachefs_revision
Makefile
cmd_fs.c
include/linux/printbuf.h
include/linux/string_helpers.h [new file with mode: 0644]
include/trace/events/bcachefs.h
libbcachefs/alloc_foreground.c
libbcachefs/alloc_foreground.h
libbcachefs/backpointers.c
libbcachefs/bcachefs.h
libbcachefs/bkey.c
libbcachefs/bkey.h
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_key_cache.c
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/debug.c
libbcachefs/errcode.h
libbcachefs/fsck.c
libbcachefs/journal.c
libbcachefs/move.c
libbcachefs/rebalance.c
libbcachefs/subvolume.c
libbcachefs/super-io.c
libbcachefs/sysfs.c
libbcachefs/util.c
libbcachefs/util.h
linux/printbuf.c
linux/string_helpers.c [new file with mode: 0644]

index 720981cad6708d59e8a369e1ab7fb12b919ad846..9f7af72c512d87e411df3b7977d19e48dee8a031 100644 (file)
@@ -1 +1 @@
-49c34dadcad9c33b1e8510b5543d60c40fa0bebd
+dfaf9a6ee24f5c415635f9a75f5281f385535ebd
index bed43bdab229ce15a94b53c0dbc4b0f1235bad16..a5a74fedef110475421f9883b82ec0f6bb46566e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ PREFIX?=/usr/local
 PKG_CONFIG?=pkg-config
 INSTALL=install
 
-CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC                             \
+CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC                     \
        -Wno-pointer-sign                                       \
        -fno-strict-aliasing                                    \
        -fno-delete-null-pointer-checks                         \
@@ -195,6 +195,10 @@ update-bcachefs-sources:
        git add linux/generic-radix-tree.c
        cp $(LINUX_DIR)/include/linux/kmemleak.h include/linux/
        git add include/linux/kmemleak.h
+       cp $(LINUX_DIR)/include/linux/printbuf.h include/linux/
+       git add include/linux/printbuf.h
+       cp $(LINUX_DIR)/lib/printbuf.c linux/
+       git add linux/printbuf.c
        cp $(LINUX_DIR)/scripts/Makefile.compiler ./
        git add Makefile.compiler
        $(RM) libbcachefs/*.mod.c
index 195ad302daa2f55d28f603a8a1bcecb0621fb67b..007c8d87a64fca68422f2de6f904d2221cfa97f3 100644 (file)
--- a/cmd_fs.c
+++ b/cmd_fs.c
@@ -179,8 +179,9 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
        pr_uuid(out, fs.uuid.b);
        prt_newline(out);
 
-       out->tabstops[0] = 20;
-       out->tabstops[1] = 36;
+       printbuf_tabstops_reset(out);
+       printbuf_tabstop_push(out, 20);
+       printbuf_tabstop_push(out, 16);
 
        prt_str(out, "Size:");
        prt_tab(out);
@@ -202,10 +203,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
 
        prt_newline(out);
 
-       out->tabstops[0] = 16;
-       out->tabstops[1] = 32;
-       out->tabstops[2] = 50;
-       out->tabstops[3] = 68;
+       printbuf_tabstops_reset(out);
+       printbuf_tabstop_push(out, 16);
+       printbuf_tabstop_push(out, 16);
+       printbuf_tabstop_push(out, 18);
+       printbuf_tabstop_push(out, 18);
 
        prt_str(out, "Data type");
        prt_tab(out);
@@ -255,10 +257,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
        sort(dev_names.data, dev_names.nr,
             sizeof(dev_names.data[0]), dev_by_label_cmp, NULL);
 
-       out->tabstops[0] = 16;
-       out->tabstops[1] = 36;
-       out->tabstops[2] = 52;
-       out->tabstops[3] = 68;
+       printbuf_tabstops_reset(out);
+       printbuf_tabstop_push(out, 16);
+       printbuf_tabstop_push(out, 20);
+       printbuf_tabstop_push(out, 16);
+       printbuf_tabstop_push(out, 14);
 
        darray_for_each(dev_names, dev)
                dev_usage_to_text(out, fs, dev);
index fa8e73d5766aa23bb1acf1f428ecf3c7a847b949..24e62e56d18c1c19d7e40f3cb7a871bc208bcf8e 100644 (file)
  * Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations
  * will be done with GFP_NOWAIT if printbuf->atomic is nonzero.
  *
+ * It's allowed to grab the output buffer and free it later with kfree() instead
+ * of using printbuf_exit(), if the user just needs a heap allocated string at
+ * the end.
+ *
  * Memory allocation failures: We don't return errors directly, because on
  * memory allocation failure we usually don't want to bail out and unwind - we
  * want to print what we've got, on a best-effort basis. But code that does want
@@ -67,6 +71,8 @@ enum printbuf_si {
        PRINTBUF_UNITS_10,      /* use powers of 10^3 (standard SI) */
 };
 
+#define PRINTBUF_INLINE_TABSTOPS       4
+
 struct printbuf {
        char                    *buf;
        unsigned                size;
@@ -82,19 +88,34 @@ struct printbuf {
        bool                    heap_allocated:1;
        enum printbuf_si        si_units:1;
        bool                    human_readable_units:1;
-       u8                      tabstop;
-       u8                      tabstops[4];
+       bool                    has_indent_or_tabstops:1;
+       bool                    suppress_indent_tabstop_handling:1;
+       u8                      nr_tabstops;
+
+       /*
+        * Do not modify directly: use printbuf_tabstop_add(),
+        * printbuf_tabstop_get()
+        */
+       u8                      cur_tabstop;
+       u8                      _tabstops[PRINTBUF_INLINE_TABSTOPS];
 };
 
 int printbuf_make_room(struct printbuf *, unsigned);
 const char *printbuf_str(const struct printbuf *);
 void printbuf_exit(struct printbuf *);
 
-void prt_newline(struct printbuf *);
+void printbuf_tabstops_reset(struct printbuf *);
+void printbuf_tabstop_pop(struct printbuf *);
+int printbuf_tabstop_push(struct printbuf *, unsigned);
+
 void printbuf_indent_add(struct printbuf *, unsigned);
 void printbuf_indent_sub(struct printbuf *, unsigned);
+
+void prt_newline(struct printbuf *);
 void prt_tab(struct printbuf *);
 void prt_tab_rjust(struct printbuf *);
+
+void prt_bytes_indented(struct printbuf *, const char *, unsigned);
 void prt_human_readable_u64(struct printbuf *, u64);
 void prt_human_readable_s64(struct printbuf *, s64);
 void prt_units_u64(struct printbuf *, u64);
@@ -129,7 +150,7 @@ static inline unsigned printbuf_remaining(struct printbuf *out)
 
 static inline unsigned printbuf_written(struct printbuf *out)
 {
-       return min(out->pos, out->size);
+       return out->size ? min(out->pos, out->size - 1) : 0;
 }
 
 /*
@@ -150,21 +171,6 @@ static inline void printbuf_nul_terminate(struct printbuf *out)
                out->buf[out->size - 1] = 0;
 }
 
-static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
-{
-       memset(out->buf + out->pos,
-              c,
-              min(n, printbuf_remaining(out)));
-       out->pos += n;
-}
-
-static inline void prt_chars(struct printbuf *out, char c, unsigned n)
-{
-       printbuf_make_room(out, n);
-       __prt_chars_reserved(out, c, n);
-       printbuf_nul_terminate(out);
-}
-
 /* Doesn't call printbuf_make_room(), doesn't nul terminate: */
 static inline void __prt_char_reserved(struct printbuf *out, char c)
 {
@@ -186,14 +192,34 @@ static inline void prt_char(struct printbuf *out, char c)
        printbuf_nul_terminate(out);
 }
 
+static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
+{
+       unsigned i, can_print = min(n, printbuf_remaining(out));
+
+       for (i = 0; i < can_print; i++)
+               out->buf[out->pos++] = c;
+       out->pos += n - can_print;
+}
+
+static inline void prt_chars(struct printbuf *out, char c, unsigned n)
+{
+       printbuf_make_room(out, n);
+       __prt_chars_reserved(out, c, n);
+       printbuf_nul_terminate(out);
+}
+
 static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n)
 {
+       unsigned i, can_print;
+
        printbuf_make_room(out, n);
 
-       memcpy(out->buf + out->pos,
-              b,
-              min(n, printbuf_remaining(out)));
-       out->pos += n;
+       can_print = min(n, printbuf_remaining(out));
+
+       for (i = 0; i < can_print; i++)
+               out->buf[out->pos++] = ((char *) b)[i];
+       out->pos += n - can_print;
+
        printbuf_nul_terminate(out);
 }
 
@@ -202,6 +228,11 @@ static inline void prt_str(struct printbuf *out, const char *str)
        prt_bytes(out, str, strlen(str));
 }
 
+static inline void prt_str_indented(struct printbuf *out, const char *str)
+{
+       prt_bytes_indented(out, str, strlen(str));
+}
+
 static inline void prt_hex_byte(struct printbuf *out, u8 byte)
 {
        printbuf_make_room(out, 2);
@@ -226,7 +257,8 @@ static inline void printbuf_reset(struct printbuf *buf)
        buf->pos                = 0;
        buf->allocation_failure = 0;
        buf->indent             = 0;
-       buf->tabstop            = 0;
+       buf->nr_tabstops        = 0;
+       buf->cur_tabstop        = 0;
 }
 
 /**
@@ -245,4 +277,30 @@ static inline void printbuf_atomic_dec(struct printbuf *buf)
        buf->atomic--;
 }
 
+/*
+ * This is used for the %pf(%p) sprintf format extension, where we pass a pretty
+ * printer and arguments to the pretty-printer to sprintf
+ *
+ * Instead of passing a pretty-printer function to sprintf directly, we pass it
+ * a pointer to a struct call_pp, so that sprintf can check that the magic
+ * number is present, which in turn ensures that the CALL_PP() macro has been
+ * used in order to typecheck the arguments to the pretty printer function
+ *
+ * Example usage:
+ *   sprintf("%pf(%p)", CALL_PP(prt_bdev, bdev));
+ */
+struct call_pp {
+       unsigned long   magic;
+       void            *fn;
+};
+
+#define PP_TYPECHECK(fn, ...)                                  \
+       ({ while (0) fn((struct printbuf *) NULL, ##__VA_ARGS__); })
+
+#define CALL_PP_MAGIC          (unsigned long) 0xce0b92d22f6b6be4
+
+#define CALL_PP(fn, ...)                                       \
+       (PP_TYPECHECK(fn, ##__VA_ARGS__),                       \
+        &((struct call_pp) { CALL_PP_MAGIC, fn })), ##__VA_ARGS__
+
 #endif /* _LINUX_PRINTBUF_H */
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
new file mode 100644 (file)
index 0000000..af58770
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STRING_HELPERS_H_
+#define _LINUX_STRING_HELPERS_H_
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+
+/* Descriptions of the types of units to
+ * print in */
+enum string_size_units {
+       STRING_UNITS_10,        /* use powers of 10^3 (standard SI) */
+       STRING_UNITS_2,         /* use binary powers of 2^10 */
+};
+
+int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
+                   char *buf, int len);
+
+#endif
index 2c9807426930e238540cf553b2ce4cb0043e2900..a18c59a3e2e785c0cd407a092e608364946b36e7 100644 (file)
@@ -317,24 +317,27 @@ DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
 );
 
 TRACE_EVENT(btree_reserve_get_fail,
-       TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
-       TP_ARGS(c, required, cl),
+       TP_PROTO(const char *trans_fn,
+                unsigned long caller_ip,
+                size_t required),
+       TP_ARGS(trans_fn, caller_ip, required),
 
        TP_STRUCT__entry(
-               __field(dev_t,          dev                     )
+               __array(char,                   trans_fn, 24    )
+               __field(unsigned long,          caller_ip       )
                __field(size_t,                 required        )
-               __field(struct closure *,       cl              )
        ),
 
        TP_fast_assign(
-               __entry->dev            = c->dev;
-               __entry->required = required;
-               __entry->cl = cl;
+               strlcpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+               __entry->caller_ip      = caller_ip;
+               __entry->required       = required;
        ),
 
-       TP_printk("%d,%d required %zu by %p",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->required, __entry->cl)
+       TP_printk("%s %pS required %zu",
+                 __entry->trans_fn,
+                 (void *) __entry->caller_ip,
+                 __entry->required)
 );
 
 DEFINE_EVENT(btree_node, btree_split,
index 0a9f1313414b7eb6a681fd745ce6d22e11e48532..c57baa1ff5bc3f0eaff31c0aca2e5f9babe75900 100644 (file)
@@ -339,6 +339,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
                                skipped_need_journal_commit,
                                skipped_nouse,
                                cl);
+       if (!ob)
+               iter.path->preserve = false;
 err:
        set_btree_iter_dontneed(&iter);
        bch2_trans_iter_exit(trans, &iter);
@@ -379,15 +381,15 @@ static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch
  * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
  */
 static noinline struct open_bucket *
-bch2_bucket_alloc_trans_early(struct btree_trans *trans,
-                             struct bch_dev *ca,
-                             enum alloc_reserve reserve,
-                             u64 *cur_bucket,
-                             u64 *buckets_seen,
-                             u64 *skipped_open,
-                             u64 *skipped_need_journal_commit,
-                             u64 *skipped_nouse,
-                             struct closure *cl)
+bch2_bucket_alloc_early(struct btree_trans *trans,
+                       struct bch_dev *ca,
+                       enum alloc_reserve reserve,
+                       u64 *cur_bucket,
+                       u64 *buckets_seen,
+                       u64 *skipped_open,
+                       u64 *skipped_need_journal_commit,
+                       u64 *skipped_nouse,
+                       struct closure *cl)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -430,7 +432,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans,
        return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
 }
 
-static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
+static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
                                                   struct bch_dev *ca,
                                                   enum alloc_reserve reserve,
                                                   u64 *cur_bucket,
@@ -445,15 +447,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
        struct open_bucket *ob = NULL;
        int ret;
 
-       if (unlikely(!ca->mi.freespace_initialized))
-               return bch2_bucket_alloc_trans_early(trans, ca, reserve,
-                                                    cur_bucket,
-                                                    buckets_seen,
-                                                    skipped_open,
-                                                    skipped_need_journal_commit,
-                                                    skipped_nouse,
-                                                    cl);
-
        BUG_ON(ca->new_fs_bucket_idx);
 
        /*
@@ -467,7 +460,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
                        break;
 
                for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k));
-                    *cur_bucket < k.k->p.offset && !ob;
+                    *cur_bucket < k.k->p.offset;
                     (*cur_bucket)++) {
                        ret = btree_trans_too_many_iters(trans);
                        if (ret)
@@ -481,6 +474,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
                                              skipped_need_journal_commit,
                                              skipped_nouse,
                                              k, cl);
+                       if (ob)
+                               break;
                }
 
                if (ob || ret)
@@ -496,11 +491,13 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
  *
  * Returns index of bucket on success, 0 on failure
  * */
-struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
+                                     struct bch_dev *ca,
                                      enum alloc_reserve reserve,
                                      bool may_alloc_partial,
                                      struct closure *cl)
 {
+       struct bch_fs *c = trans->c;
        struct open_bucket *ob = NULL;
        struct bch_dev_usage usage;
        bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
@@ -512,7 +509,6 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
        u64 skipped_need_journal_commit = 0;
        u64 skipped_nouse = 0;
        bool waiting = false;
-       int ret;
 again:
        usage = bch2_dev_usage_read(ca);
        avail = dev_buckets_free(ca, usage, reserve);
@@ -549,19 +545,26 @@ again:
                        return ob;
        }
 
-       ret = bch2_trans_do(c, NULL, NULL, 0,
-                       PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
-                                                       &cur_bucket,
-                                                       &buckets_seen,
-                                                       &skipped_open,
-                                                       &skipped_need_journal_commit,
-                                                       &skipped_nouse,
-                                                       cl)));
+       ob = likely(ca->mi.freespace_initialized)
+               ? bch2_bucket_alloc_freelist(trans, ca, reserve,
+                                       &cur_bucket,
+                                       &buckets_seen,
+                                       &skipped_open,
+                                       &skipped_need_journal_commit,
+                                       &skipped_nouse,
+                                       cl)
+               : bch2_bucket_alloc_early(trans, ca, reserve,
+                                       &cur_bucket,
+                                       &buckets_seen,
+                                       &skipped_open,
+                                       &skipped_need_journal_commit,
+                                       &skipped_nouse,
+                                       cl);
 
        if (skipped_need_journal_commit * 2 > avail)
                bch2_journal_flush_async(&c->journal, NULL);
 
-       if (!ob && !ret && !freespace_initialized && start) {
+       if (!ob && !freespace_initialized && start) {
                start = cur_bucket = 0;
                goto again;
        }
@@ -570,7 +573,7 @@ again:
                ca->bucket_alloc_trans_early_cursor = cur_bucket;
 err:
        if (!ob)
-               ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
+               ob = ERR_PTR(-BCH_ERR_no_buckets_found);
 
        if (IS_ERR(ob)) {
                trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve],
@@ -590,6 +593,19 @@ err:
        return ob;
 }
 
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+                                     enum alloc_reserve reserve,
+                                     bool may_alloc_partial,
+                                     struct closure *cl)
+{
+       struct open_bucket *ob;
+
+       bch2_trans_do(c, NULL, NULL, 0,
+                     PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
+                                                                  may_alloc_partial, cl)));
+       return ob;
+}
+
 static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
                            unsigned l, unsigned r)
 {
@@ -655,7 +671,7 @@ static void add_new_bucket(struct bch_fs *c,
        ob_push(c, ptrs, ob);
 }
 
-int bch2_bucket_alloc_set(struct bch_fs *c,
+static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
                      struct open_buckets *ptrs,
                      struct dev_stripe_state *stripe,
                      struct bch_devs_mask *devs_may_alloc,
@@ -666,11 +682,12 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
                      unsigned flags,
                      struct closure *cl)
 {
+       struct bch_fs *c = trans->c;
        struct dev_alloc_list devs_sorted =
                bch2_dev_alloc_list(c, stripe, devs_may_alloc);
        unsigned dev;
        struct bch_dev *ca;
-       int ret = -BCH_ERR_insufficient_devices;
+       int ret = 0;
        unsigned i;
 
        BUG_ON(*nr_effective >= nr_replicas);
@@ -694,16 +711,15 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
                        continue;
                }
 
-               ob = bch2_bucket_alloc(c, ca, reserve,
+               ob = bch2_bucket_alloc_trans(trans, ca, reserve,
                                flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
                if (!IS_ERR(ob))
                        bch2_dev_stripe_increment(ca, stripe);
                percpu_ref_put(&ca->ref);
 
-               if (IS_ERR(ob)) {
-                       ret = PTR_ERR(ob);
-
-                       if (cl)
+               ret = PTR_ERR_OR_ZERO(ob);
+               if (ret) {
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl)
                                break;
                        continue;
                }
@@ -711,15 +727,36 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
                add_new_bucket(c, ptrs, devs_may_alloc,
                               nr_effective, have_cache, flags, ob);
 
-               if (*nr_effective >= nr_replicas) {
-                       ret = 0;
+               if (*nr_effective >= nr_replicas)
                        break;
-               }
        }
 
+       if (*nr_effective >= nr_replicas)
+               ret = 0;
+       else if (!ret)
+               ret = -BCH_ERR_insufficient_devices;
+
        return ret;
 }
 
+int bch2_bucket_alloc_set(struct bch_fs *c,
+                     struct open_buckets *ptrs,
+                     struct dev_stripe_state *stripe,
+                     struct bch_devs_mask *devs_may_alloc,
+                     unsigned nr_replicas,
+                     unsigned *nr_effective,
+                     bool *have_cache,
+                     enum alloc_reserve reserve,
+                     unsigned flags,
+                     struct closure *cl)
+{
+       return bch2_trans_do(c, NULL, NULL, 0,
+                     bch2_bucket_alloc_set_trans(&trans, ptrs, stripe,
+                                             devs_may_alloc, nr_replicas,
+                                             nr_effective, have_cache, reserve,
+                                             flags, cl));
+}
+
 /* Allocate from stripes: */
 
 /*
@@ -824,7 +861,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
        wp->ptrs = ptrs_skip;
 }
 
-static int open_bucket_add_buckets(struct bch_fs *c,
+static int open_bucket_add_buckets(struct btree_trans *trans,
                        struct open_buckets *ptrs,
                        struct write_point *wp,
                        struct bch_devs_list *devs_have,
@@ -837,6 +874,7 @@ static int open_bucket_add_buckets(struct bch_fs *c,
                        unsigned flags,
                        struct closure *_cl)
 {
+       struct bch_fs *c = trans->c;
        struct bch_devs_mask devs;
        struct open_bucket *ob;
        struct closure *cl = NULL;
@@ -868,7 +906,8 @@ static int open_bucket_add_buckets(struct bch_fs *c,
                                                 target, erasure_code,
                                                 nr_replicas, nr_effective,
                                                 have_cache, flags, _cl);
-                       if (bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+                           bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
                            bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
                                return ret;
                        if (*nr_effective >= nr_replicas)
@@ -887,10 +926,11 @@ retry_blocking:
         * Try nonblocking first, so that if one device is full we'll try from
         * other devices:
         */
-       ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
+       ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
                                nr_replicas, nr_effective, have_cache,
                                reserve, flags, cl);
        if (ret &&
+           !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
            !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
            !cl && _cl) {
                cl = _cl;
@@ -1010,15 +1050,25 @@ static bool try_decrease_writepoints(struct bch_fs *c,
        return true;
 }
 
-static struct write_point *writepoint_find(struct bch_fs *c,
+static void bch2_trans_mutex_lock(struct btree_trans *trans,
+                                 struct mutex *lock)
+{
+       if (!mutex_trylock(lock)) {
+               bch2_trans_unlock(trans);
+               mutex_lock(lock);
+       }
+}
+
+static struct write_point *writepoint_find(struct btree_trans *trans,
                                           unsigned long write_point)
 {
+       struct bch_fs *c = trans->c;
        struct write_point *wp, *oldest;
        struct hlist_head *head;
 
        if (!(write_point & 1UL)) {
                wp = (struct write_point *) write_point;
-               mutex_lock(&wp->lock);
+               bch2_trans_mutex_lock(trans, &wp->lock);
                return wp;
        }
 
@@ -1027,7 +1077,7 @@ restart_find:
        wp = __writepoint_find(head, write_point);
        if (wp) {
 lock_wp:
-               mutex_lock(&wp->lock);
+               bch2_trans_mutex_lock(trans, &wp->lock);
                if (wp->write_point == write_point)
                        goto out;
                mutex_unlock(&wp->lock);
@@ -1040,8 +1090,8 @@ restart_find_oldest:
                if (!oldest || time_before64(wp->last_used, oldest->last_used))
                        oldest = wp;
 
-       mutex_lock(&oldest->lock);
-       mutex_lock(&c->write_points_hash_lock);
+       bch2_trans_mutex_lock(trans, &oldest->lock);
+       bch2_trans_mutex_lock(trans, &c->write_points_hash_lock);
        if (oldest >= c->write_points + c->write_points_nr ||
            try_increase_writepoints(c)) {
                mutex_unlock(&c->write_points_hash_lock);
@@ -1069,7 +1119,7 @@ out:
 /*
  * Get us an open_bucket we can allocate from, return with it locked:
  */
-struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
+struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *trans,
                                unsigned target,
                                unsigned erasure_code,
                                struct write_point_specifier write_point,
@@ -1080,6 +1130,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
                                unsigned flags,
                                struct closure *cl)
 {
+       struct bch_fs *c = trans->c;
        struct write_point *wp;
        struct open_bucket *ob;
        struct open_buckets ptrs;
@@ -1099,7 +1150,7 @@ retry:
        write_points_nr = c->write_points_nr;
        have_cache      = false;
 
-       wp = writepoint_find(c, write_point.v);
+       wp = writepoint_find(trans, write_point.v);
 
        if (wp->data_type == BCH_DATA_user)
                ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
@@ -1109,21 +1160,22 @@ retry:
                have_cache = true;
 
        if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
-               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+               ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
                                              target, erasure_code,
                                              nr_replicas, &nr_effective,
                                              &have_cache, reserve,
                                              ob_flags, cl);
        } else {
-               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+               ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
                                              target, erasure_code,
                                              nr_replicas, &nr_effective,
                                              &have_cache, reserve,
                                              ob_flags, NULL);
-               if (!ret)
+               if (!ret ||
+                   bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        goto alloc_done;
 
-               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+               ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
                                              0, erasure_code,
                                              nr_replicas, &nr_effective,
                                              &have_cache, reserve,
@@ -1180,6 +1232,32 @@ err:
        return ERR_PTR(ret);
 }
 
+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
+                               unsigned target,
+                               unsigned erasure_code,
+                               struct write_point_specifier write_point,
+                               struct bch_devs_list *devs_have,
+                               unsigned nr_replicas,
+                               unsigned nr_replicas_required,
+                               enum alloc_reserve reserve,
+                               unsigned flags,
+                               struct closure *cl)
+{
+       struct write_point *wp;
+
+       bch2_trans_do(c, NULL, NULL, 0,
+                     PTR_ERR_OR_ZERO(wp = bch2_alloc_sectors_start_trans(&trans, target,
+                                                       erasure_code,
+                                                       write_point,
+                                                       devs_have,
+                                                       nr_replicas,
+                                                       nr_replicas_required,
+                                                       reserve,
+                                                       flags, cl)));
+       return wp;
+
+}
+
 struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
 {
        struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
index 8bc78877f0fc88b352101a5925b1638caef9b413..6de63a351fa881f2547bb01e0f5ee593bfbb8410 100644 (file)
@@ -136,6 +136,14 @@ int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
                      unsigned, unsigned *, bool *, enum alloc_reserve,
                      unsigned, struct closure *);
 
+struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *,
+                                            unsigned, unsigned,
+                                            struct write_point_specifier,
+                                            struct bch_devs_list *,
+                                            unsigned, unsigned,
+                                            enum alloc_reserve,
+                                            unsigned,
+                                            struct closure *);
 struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
                                             unsigned, unsigned,
                                             struct write_point_specifier,
index 5a46b25b0587e9257df68427e6bd148411518e40..029b1ec14283e1f6950a856d89d59acf491edcea 100644 (file)
@@ -492,7 +492,7 @@ static void backpointer_not_found(struct btree_trans *trans,
        prt_printf(&buf, "\n  ");
        bch2_bkey_val_to_text(&buf, c, k);
        if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
-               bch_err(c, "%s", buf.buf);
+               bch_err_ratelimited(c, "%s", buf.buf);
        else
                bch2_trans_inconsistent(trans, "%s", buf.buf);
 
@@ -526,9 +526,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
        if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
                return k;
 
-       backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
-
        bch2_trans_iter_exit(trans, iter);
+
+       if (bp.level) {
+               /*
+                * If a backpointer for a btree node wasn't found, it may be
+                * because it was overwritten by a new btree node that hasn't
+                * been written out yet - backpointer_get_node() checks for
+                * this:
+                */
+               bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
+               bch2_trans_iter_exit(trans, iter);
+               return bkey_s_c_null;
+       }
+
+       backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
        return bkey_s_c_null;
 }
 
@@ -540,7 +552,6 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree *b;
-       struct bkey_s_c k;
 
        BUG_ON(!bp.level);
 
@@ -551,22 +562,24 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
                                  bp.level - 1,
                                  0);
        b = bch2_btree_iter_peek_node(iter);
-       if (IS_ERR(b)) {
-               bch2_trans_iter_exit(trans, iter);
-               return b;
-       }
+       if (IS_ERR(b))
+               goto err;
 
        if (extent_matches_bp(c, bp.btree_id, bp.level,
                              bkey_i_to_s_c(&b->key),
                              bucket, bp))
                return b;
 
-       if (!btree_node_will_make_reachable(b))
-               backpointer_not_found(trans, bucket, bp_offset,
-                                     bp, k, "btree node");
-
+       if (btree_node_will_make_reachable(b)) {
+               b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
+       } else {
+               backpointer_not_found(trans, bucket, bp_offset, bp,
+                                     bkey_i_to_s_c(&b->key), "btree node");
+               b = NULL;
+       }
+err:
        bch2_trans_iter_exit(trans, iter);
-       return NULL;
+       return b;
 }
 
 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
@@ -829,6 +842,8 @@ static int check_one_backpointer(struct btree_trans *trans,
 
        k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
        ret = bkey_err(k);
+       if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+               return 0;
        if (ret)
                return ret;
 
index 8ffdb4dee47aef18d40f3de255cb80df0054073d..a5bf808763e0e0a46bf567bc072a765ed06d4788 100644 (file)
@@ -319,8 +319,6 @@ BCH_DEBUG_PARAMS_DEBUG()
 #undef BCH_DEBUG_PARAM
 #endif
 
-#define BCH_LOCK_TIME_NR 128
-
 #define BCH_TIME_STATS()                       \
        x(btree_node_mem_alloc)                 \
        x(btree_node_split)                     \
@@ -531,9 +529,13 @@ struct btree_debug {
        unsigned                id;
 };
 
-struct lock_held_stats {
-       struct time_stats       times[BCH_LOCK_TIME_NR];
-       const char              *names[BCH_LOCK_TIME_NR];
+#define BCH_TRANSACTIONS_NR 128
+
+struct btree_transaction_stats {
+       struct mutex            lock;
+       struct time_stats       lock_hold_times;
+       unsigned                nr_max_paths;
+       char                    *max_paths_text;
 };
 
 struct bch_fs_pcpu {
@@ -930,7 +932,8 @@ struct bch_fs {
 
        struct time_stats       times[BCH_TIME_STAT_NR];
 
-       struct lock_held_stats lock_held_stats;
+       const char              *btree_transaction_fns[BCH_TRANSACTIONS_NR];
+       struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
 };
 
 static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
index cc068963516405846bd717a9dc6c288fbfbd633a..d348175edad402c15f3dff9b78b7e70e7a185af2 100644 (file)
@@ -19,33 +19,49 @@ const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
 struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
                              const struct bkey_packed *);
 
-void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits)
+void bch2_bkey_packed_to_binary_text(struct printbuf *out,
+                                    const struct bkey_format *f,
+                                    const struct bkey_packed *k)
 {
-       unsigned bit = high_bit_offset, done = 0;
+       const u64 *p = high_word(f, k);
+       unsigned word_bits = 64 - high_bit_offset;
+       unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset;
+       u64 v = *p & (~0ULL >> high_bit_offset);
+
+       if (!nr_key_bits) {
+               prt_str(out, "(empty)");
+               return;
+       }
 
        while (1) {
-               while (bit < 64) {
-                       if (done && !(done % 8))
-                               *out++ = ' ';
-                       *out++ = *p & (1ULL << (63 - bit)) ? '1' : '0';
-                       bit++;
-                       done++;
-                       if (done == nr_bits) {
-                               *out++ = '\0';
-                               return;
-                       }
+               unsigned next_key_bits = nr_key_bits;
+
+               if (nr_key_bits < 64) {
+                       v >>= 64 - nr_key_bits;
+                       next_key_bits = 0;
+               } else {
+                       next_key_bits -= 64;
                }
 
+               bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
+
+               if (!next_key_bits)
+                       break;
+
+               prt_char(out, ' ');
+
                p = next_word(p);
-               bit = 0;
+               v = *p;
+               word_bits = 64;
+               nr_key_bits = next_key_bits;
        }
 }
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 
 static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-                                const struct bkey *unpacked,
-                                const struct bkey_format *format)
+                                 const struct bkey *unpacked,
+                                 const struct bkey_format *format)
 {
        struct bkey tmp;
 
@@ -57,23 +73,35 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
        tmp = __bch2_bkey_unpack_key(format, packed);
 
        if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
-               struct printbuf buf1 = PRINTBUF;
-               struct printbuf buf2 = PRINTBUF;
-               char buf3[160], buf4[160];
+               struct printbuf buf = PRINTBUF;
 
-               bch2_bkey_to_text(&buf1, unpacked);
-               bch2_bkey_to_text(&buf2, &tmp);
-               bch2_to_binary(buf3, (void *) unpacked, 80);
-               bch2_to_binary(buf4, high_word(format, packed), 80);
-
-               panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n",
+               prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n",
                      format->key_u64s,
                      format->bits_per_field[0],
                      format->bits_per_field[1],
                      format->bits_per_field[2],
                      format->bits_per_field[3],
-                     format->bits_per_field[4],
-                     buf1.buf, buf2.buf, buf3, buf4);
+                     format->bits_per_field[4]);
+
+               prt_printf(&buf, "compiled unpack: ");
+               bch2_bkey_to_text(&buf, unpacked);
+               prt_newline(&buf);
+
+               prt_printf(&buf, "c unpack:        ");
+               bch2_bkey_to_text(&buf, &tmp);
+               prt_newline(&buf);
+
+               prt_printf(&buf, "compiled unpack: ");
+               bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
+                                               (struct bkey_packed *) unpacked);
+               prt_newline(&buf);
+
+               prt_printf(&buf, "c unpack:        ");
+               bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
+                                               (struct bkey_packed *) &tmp);
+               prt_newline(&buf);
+
+               panic("%s", buf.buf);
        }
 }
 
index 7dee3d8e0a3d169160fab7018c6fe1ef55660eb5..df9fb859d1db6834057ef46e7573ddac031b1aee 100644 (file)
@@ -12,7 +12,9 @@
 #define HAVE_BCACHEFS_COMPILED_UNPACK  1
 #endif
 
-void bch2_to_binary(char *, const u64 *, unsigned);
+void bch2_bkey_packed_to_binary_text(struct printbuf *,
+                                    const struct bkey_format *,
+                                    const struct bkey_packed *);
 
 /* bkey with split value, const */
 struct bkey_s_c {
@@ -42,12 +44,15 @@ static inline size_t bkey_val_bytes(const struct bkey *k)
 
 static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
 {
-       k->u64s = BKEY_U64s + val_u64s;
+       unsigned u64s = BKEY_U64s + val_u64s;
+
+       BUG_ON(u64s > U8_MAX);
+       k->u64s = u64s;
 }
 
 static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
 {
-       k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
+       set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64)));
 }
 
 #define bkey_val_end(_k)       ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
index ae731b3a390840f4d61019ac44d258a9767dae56..8aad87ead0826a69e3ac725161d4282f785fde1b 100644 (file)
@@ -616,7 +616,6 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
                                          (u64 *) vstruct_end(i) - (u64 *) k);
                        i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift);
                        set_btree_bset_end(b, t);
-                       bch2_bset_set_no_aux_tree(b, t);
                }
 
                for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
@@ -626,10 +625,14 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
                if (k != vstruct_last(i)) {
                        i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start);
                        set_btree_bset_end(b, t);
-                       bch2_bset_set_no_aux_tree(b, t);
                }
        }
 
+       /*
+        * Always rebuild search trees: eytzinger search tree nodes directly
+        * depend on the values of min/max key:
+        */
+       bch2_bset_set_no_aux_tree(b, b->set);
        bch2_btree_build_aux_trees(b);
 
        for_each_btree_node_key_unpack(b, k, &iter, &unpacked) {
@@ -778,8 +781,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
 }
 
 static int validate_bset_keys(struct bch_fs *c, struct btree *b,
-                        struct bset *i, unsigned *whiteout_u64s,
-                        int write, bool have_retry)
+                        struct bset *i, int write, bool have_retry)
 {
        unsigned version = le16_to_cpu(i->version);
        struct bkey_packed *k, *prev = NULL;
@@ -915,7 +917,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
        }
 
        while (b->written < (ptr_written ?: btree_sectors(c))) {
-               unsigned sectors, whiteout_u64s = 0;
+               unsigned sectors;
                struct nonce nonce;
                struct bch_csum csum;
                bool first = !b->written;
@@ -984,8 +986,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                if (!b->written)
                        btree_node_set_format(b, b->data->format);
 
-               ret = validate_bset_keys(c, b, i, &whiteout_u64s,
-                                   READ, have_retry);
+               ret = validate_bset_keys(c, b, i, READ, have_retry);
                if (ret)
                        goto fsck_err;
 
@@ -1011,11 +1012,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                if (blacklisted && !first)
                        continue;
 
-               sort_iter_add(iter, i->start,
-                             vstruct_idx(i, whiteout_u64s));
-
                sort_iter_add(iter,
-                             vstruct_idx(i, whiteout_u64s),
+                             vstruct_idx(i, 0),
                              vstruct_last(i));
 
                nonblacklisted_written = b->written;
@@ -1745,7 +1743,6 @@ static void btree_node_write_endio(struct bio *bio)
 static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
                                   struct bset *i, unsigned sectors)
 {
-       unsigned whiteout_u64s = 0;
        struct printbuf buf = PRINTBUF;
        int ret;
 
@@ -1758,7 +1755,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
        if (ret)
                return ret;
 
-       ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
+       ret = validate_bset_keys(c, b, i, WRITE, false) ?:
                validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false);
        if (ret) {
                bch2_inconsistent_error(c);
index 04a613187b60bef13d27d670db90ba781f77755c..1d4b9fdec1a03b794bc2bcc6262c963cfce9d184 100644 (file)
@@ -1418,16 +1418,16 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
        if (unlikely(ret))
                goto err;
 
-       mark_btree_node_locked(trans, path, level, lock_type);
-       btree_path_level_init(trans, path, b);
-
        if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
            unlikely(b != btree_node_mem_ptr(tmp.k)))
                btree_node_mem_ptr_set(trans, path, level + 1, b);
 
        if (btree_node_read_locked(path, level + 1))
                btree_node_unlock(trans, path, level + 1);
+
+       mark_btree_node_locked(trans, path, level, lock_type);
        path->level = level;
+       btree_path_level_init(trans, path, b);
 
        bch2_btree_path_verify_locks(path);
 err:
@@ -1872,42 +1872,69 @@ void bch2_dump_trans_updates(struct btree_trans *trans)
        printbuf_exit(&buf);
 }
 
-noinline __cold
-void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
+{
+       prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ",
+                  path->idx, path->ref, path->intent_ref,
+                  path->preserve ? 'P' : ' ',
+                  path->should_be_locked ? 'S' : ' ',
+                  bch2_btree_ids[path->btree_id],
+                  path->level);
+       bch2_bpos_to_text(out, path->pos);
+
+       prt_printf(out, " locks %u", path->nodes_locked);
+#ifdef CONFIG_BCACHEFS_DEBUG
+       prt_printf(out, " %pS", (void *) path->ip_allocated);
+#endif
+       prt_newline(out);
+}
+
+void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
 {
        struct btree_path *path;
-       struct printbuf buf = PRINTBUF;
        unsigned idx;
 
-       trans_for_each_path_inorder(trans, path, idx) {
-               printbuf_reset(&buf);
+       trans_for_each_path_inorder(trans, path, idx)
+               bch2_btree_path_to_text(out, path);
+}
 
-               bch2_bpos_to_text(&buf, path->pos);
+noinline __cold
+void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+{
+       struct printbuf buf = PRINTBUF;
 
-               printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n",
-                      path->idx, path->ref, path->intent_ref,
-                      path->preserve ? 'P' : ' ',
-                      path->should_be_locked ? 'S' : ' ',
-                      bch2_btree_ids[path->btree_id],
-                      path->level,
-                      buf.buf,
-                      path->nodes_locked,
-#ifdef CONFIG_BCACHEFS_DEBUG
-                      (void *) path->ip_allocated
-#else
-                      NULL
-#endif
-                      );
-       }
+       bch2_trans_paths_to_text(&buf, trans);
 
+       printk(KERN_ERR "%s", buf.buf);
        printbuf_exit(&buf);
 
        bch2_dump_trans_updates(trans);
 }
 
+noinline
+static void bch2_trans_update_max_paths(struct btree_trans *trans)
+{
+       struct btree_transaction_stats *s = btree_trans_stats(trans);
+       struct printbuf buf = PRINTBUF;
+
+       bch2_trans_paths_to_text(&buf, trans);
+
+       if (!buf.allocation_failure) {
+               mutex_lock(&s->lock);
+               if (s->nr_max_paths < hweight64(trans->paths_allocated)) {
+                       s->nr_max_paths = hweight64(trans->paths_allocated);
+                       swap(s->max_paths_text, buf.buf);
+               }
+               mutex_unlock(&s->lock);
+       }
+
+       printbuf_exit(&buf);
+}
+
 static struct btree_path *btree_path_alloc(struct btree_trans *trans,
                                           struct btree_path *pos)
 {
+       struct btree_transaction_stats *s = btree_trans_stats(trans);
        struct btree_path *path;
        unsigned idx;
 
@@ -1920,6 +1947,9 @@ static struct btree_path *btree_path_alloc(struct btree_trans *trans,
        idx = __ffs64(~trans->paths_allocated);
        trans->paths_allocated |= 1ULL << idx;
 
+       if (s && unlikely(hweight64(trans->paths_allocated) > s->nr_max_paths))
+               bch2_trans_update_max_paths(trans);
+
        path = &trans->paths[idx];
 
        path->idx               = idx;
@@ -2013,12 +2043,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
 
        struct bkey_s_c k;
 
+       EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
+       EBUG_ON(!btree_node_locked(path, path->level));
+
        if (!path->cached) {
                struct btree_path_level *l = path_l(path);
                struct bkey_packed *_k;
 
-               EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
-
                _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
                k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
 
@@ -2033,7 +2064,6 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
                        (path->btree_id != ck->key.btree_id ||
                         bkey_cmp(path->pos, ck->key.pos)));
                EBUG_ON(!ck || !ck->valid);
-               EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
 
                *u = ck->k->k;
                k = bkey_i_to_s_c(ck->k);
@@ -2288,7 +2318,7 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
  * bkey_s_c_null:
  */
 static noinline
-struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
+struct bkey_s_c __btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
 {
        struct btree_trans *trans = iter->trans;
        struct bch_fs *c = trans->c;
@@ -2317,6 +2347,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
        return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
 }
 
+static noinline
+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
+{
+       struct bkey_s_c ret = __btree_trans_peek_key_cache(iter, pos);
+       int err = bkey_err(ret) ?: bch2_btree_path_relock(iter->trans, iter->path, _THIS_IP_);
+
+       return err ? bkey_s_c_err(err) : ret;
+}
+
 static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
 {
        struct btree_trans *trans = iter->trans;
@@ -2347,15 +2386,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
                if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
                    k.k &&
                    (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
-                       ret = bkey_err(k2);
+                       k = k2;
+                       ret = bkey_err(k);
                        if (ret) {
-                               k = k2;
                                bch2_btree_iter_set_pos(iter, iter->pos);
                                goto out;
                        }
-
-                       k = k2;
-                       iter->k = *k.k;
                }
 
                if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
@@ -2803,8 +2839,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                                        btree_iter_ip_allocated(iter));
 
        ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
-       if (unlikely(ret))
-               return bkey_s_c_err(ret);
+       if (unlikely(ret)) {
+               k = bkey_s_c_err(ret);
+               goto out_no_locked;
+       }
 
        if ((iter->flags & BTREE_ITER_CACHED) ||
            !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
@@ -2828,13 +2866,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                }
 
                if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
-                   (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
-                       if (bkey_err(k)) {
-                               goto out_no_locked;
-                       } else {
+                   (k = __btree_trans_peek_key_cache(iter, iter->pos)).k) {
+                       if (!bkey_err(k))
                                iter->k = *k.k;
-                               goto out;
-                       }
+                       /* We're not returning a key from iter->path: */
+                       goto out_no_locked;
                }
 
                k = bch2_btree_path_peek_slot(iter->path, &iter->k);
@@ -2862,11 +2898,14 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                        struct bpos pos = iter->pos;
 
                        k = bch2_btree_iter_peek(iter);
-                       iter->pos = pos;
+                       if (unlikely(bkey_err(k)))
+                               bch2_btree_iter_set_pos(iter, pos);
+                       else
+                               iter->pos = pos;
                }
 
                if (unlikely(bkey_err(k)))
-                       return k;
+                       goto out_no_locked;
 
                next = k.k ? bkey_start_pos(k.k) : POS_MAX;
 
@@ -3195,6 +3234,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
 
        bch2_trans_reset_updates(trans);
 
+       trans->restart_count++;
        trans->mem_top                  = 0;
 
        if (trans->fs_usage_deltas) {
@@ -3245,10 +3285,10 @@ u32 bch2_trans_begin(struct btree_trans *trans)
 
 void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count)
 {
-       bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans,
-               "trans->restart_count %u, should be %u, last restarted by %ps\n",
-               trans->restart_count, restart_count,
-               (void *) trans->last_restarted_ip);
+       if (trans_was_restarted(trans, restart_count))
+               panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
+                     trans->restart_count, restart_count,
+                     (void *) trans->last_restarted_ip);
 }
 
 static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
@@ -3269,6 +3309,22 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
        trans->updates          = p; p += updates_bytes;
 }
 
+static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c,
+                                       const char *fn)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++)
+               if (!c->btree_transaction_fns[i] ||
+                   c->btree_transaction_fns[i] == fn) {
+                       c->btree_transaction_fns[i] = fn;
+                       return i;
+               }
+
+       pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
+       return i;
+}
+
 void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
                       unsigned expected_nr_iters,
                       size_t expected_mem_bytes,
@@ -3284,15 +3340,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
        trans->fn               = fn;
        trans->last_begin_time  = ktime_get_ns();
        trans->task             = current;
-
-       while (c->lock_held_stats.names[trans->lock_name_idx] != fn
-              && c->lock_held_stats.names[trans->lock_name_idx] != 0)
-               trans->lock_name_idx++;
-
-       if (trans->lock_name_idx >= BCH_LOCK_TIME_NR)
-               pr_warn_once("lock_times array not big enough!");
-       else
-               c->lock_held_stats.names[trans->lock_name_idx] = fn;
+       trans->fn_idx           = bch2_trans_get_fn_idx(trans, c, fn);
 
        bch2_trans_alloc_paths(trans, c);
 
@@ -3463,9 +3511,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
 
 int bch2_fs_btree_iter_init(struct bch_fs *c)
 {
-       unsigned nr = BTREE_ITER_MAX;
+       unsigned i, nr = BTREE_ITER_MAX;
        int ret;
 
+       for (i = 0; i < ARRAY_SIZE(c->btree_transaction_stats); i++)
+               mutex_init(&c->btree_transaction_stats[i].lock);
+
        INIT_LIST_HEAD(&c->btree_trans_list);
        mutex_init(&c->btree_trans_lock);
 
index f38fd25b49929484e54763dbae8215232d069646..6ad28ff684c82277f692b1563706ef687cfefb67 100644 (file)
@@ -182,7 +182,6 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er
        BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
 
        trans->restarted = err;
-       trans->restart_count++;
        return -err;
 }
 
@@ -368,7 +367,7 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *
 
 static inline int btree_trans_too_many_iters(struct btree_trans *trans)
 {
-       if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) {
+       if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2) {
                trace_trans_restart_too_many_iters(trans, _THIS_IP_);
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
        }
@@ -392,13 +391,17 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
 
 #define lockrestart_do(_trans, _do)                                    \
 ({                                                                     \
+       u32 _restart_count;                                             \
        int _ret;                                                       \
                                                                        \
        do {                                                            \
-               bch2_trans_begin(_trans);                               \
+               _restart_count = bch2_trans_begin(_trans);              \
                _ret = (_do);                                           \
        } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart));  \
                                                                        \
+       if (!_ret)                                                      \
+               bch2_trans_verify_not_restarted(_trans, _restart_count);\
+                                                                       \
        _ret;                                                           \
 })
 
@@ -439,7 +442,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
                             (_start), (_flags));                       \
                                                                        \
        while (1) {                                                     \
-               bch2_trans_begin(_trans);                               \
+               u32 _restart_count = bch2_trans_begin(_trans);          \
                (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags));   \
                if (!(_k).k) {                                          \
                        _ret = 0;                                       \
@@ -451,6 +454,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
                        continue;                                       \
                if (_ret)                                               \
                        break;                                          \
+               bch2_trans_verify_not_restarted(_trans, _restart_count);\
                if (!bch2_btree_iter_advance(&(_iter)))                 \
                        break;                                          \
        }                                                               \
@@ -468,7 +472,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
                             (_start), (_flags));                       \
                                                                        \
        while (1) {                                                     \
-               bch2_trans_begin(_trans);                               \
+               u32 _restart_count = bch2_trans_begin(_trans);          \
                (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
                if (!(_k).k) {                                          \
                        _ret = 0;                                       \
@@ -480,6 +484,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
                        continue;                                       \
                if (_ret)                                               \
                        break;                                          \
+               bch2_trans_verify_not_restarted(_trans, _restart_count);\
                if (!bch2_btree_iter_rewind(&(_iter)))                  \
                        break;                                          \
        }                                                               \
@@ -535,6 +540,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
 /* new multiple iterator interface: */
 
 void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
+void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
+void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
 void bch2_dump_trans_updates(struct btree_trans *);
 void bch2_dump_trans_paths_updates(struct btree_trans *);
 void __bch2_trans_init(struct btree_trans *, struct bch_fs *,
index fa90581f7ade7d6869e5bc8244e489f9d819bfaf..38b16f95566077cee28b8a10dfc7a4f1035c891c 100644 (file)
@@ -631,11 +631,22 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 void bch2_btree_key_cache_drop(struct btree_trans *trans,
                               struct btree_path *path)
 {
+       struct bch_fs *c = trans->c;
        struct bkey_cached *ck = (void *) path->l[0].b;
 
-       ck->valid = false;
+       BUG_ON(!ck->valid);
 
-       BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
+       /*
+        * We just did an update to the btree, bypassing the key cache: the key
+        * cache key is now stale and must be dropped, even if dirty:
+        */
+       if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+               clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+               atomic_long_dec(&c->btree_key_cache.nr_dirty);
+               bch2_journal_pin_drop(&c->journal, &ck->journal);
+       }
+
+       ck->valid = false;
 }
 
 static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
index c3f3cb8733f249c0f2831c145a6f8e16e643fe8e..205c6b599ba0474aee60fdd9cd78be464975169b 100644 (file)
@@ -115,6 +115,26 @@ btree_lock_want(struct btree_path *path, int level)
        return BTREE_NODE_UNLOCKED;
 }
 
+static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
+{
+       return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
+               ? &trans->c->btree_transaction_stats[trans->fn_idx]
+               : NULL;
+}
+
+static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
+                                             struct btree_path *path, unsigned level)
+{
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+       struct btree_transaction_stats *s = btree_trans_stats(trans);
+
+       if (s)
+               __bch2_time_stats_update(&s->lock_hold_times,
+                                        path->l[level].lock_taken_time,
+                                        ktime_get_ns());
+#endif
+}
+
 static inline void btree_node_unlock(struct btree_trans *trans,
                                     struct btree_path *path, unsigned level)
 {
@@ -124,15 +144,7 @@ static inline void btree_node_unlock(struct btree_trans *trans,
 
        if (lock_type != BTREE_NODE_UNLOCKED) {
                six_unlock_type(&path->l[level].b->c.lock, lock_type);
-#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
-               if (trans->lock_name_idx < BCH_LOCK_TIME_NR) {
-                       struct bch_fs *c = trans->c;
-
-                       __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx],
-                                              path->l[level].lock_taken_time,
-                                                ktime_get_ns());
-               }
-#endif
+               btree_trans_lock_hold_time_update(trans, path, level);
        }
        mark_btree_node_unlocked(path, level);
 }
index 1ff999179b4e1ca2e651168aee383f4019662c81..21d76181777f30fa4057c4cd261941f35d4ca6e9 100644 (file)
@@ -392,6 +392,7 @@ struct btree_trans {
        struct task_struct      *task;
        int                     srcu_idx;
 
+       u8                      fn_idx;
        u8                      nr_sorted;
        u8                      nr_updates;
        u8                      traverse_all_idx;
@@ -432,7 +433,6 @@ struct btree_trans {
        unsigned                journal_u64s;
        unsigned                journal_preres_u64s;
        struct replicas_delta_list *fs_usage_deltas;
-       int                      lock_name_idx;
 };
 
 #define BTREE_FLAGS()                                                  \
index e4138614e0810558c9cc66d3dc85e231878ce797..0409737f757a288654fad85bb1a382f28ad16613 100644 (file)
@@ -178,12 +178,13 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
        six_unlock_intent(&b->c.lock);
 }
 
-static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
+static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
                                             struct disk_reservation *res,
                                             struct closure *cl,
                                             bool interior_node,
                                             unsigned flags)
 {
+       struct bch_fs *c = trans->c;
        struct write_point *wp;
        struct btree *b;
        __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
@@ -213,7 +214,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
        mutex_unlock(&c->btree_reserve_cache_lock);
 
 retry:
-       wp = bch2_alloc_sectors_start(c,
+       wp = bch2_alloc_sectors_start_trans(trans,
                                      c->opts.metadata_target ?:
                                      c->opts.foreground_target,
                                      0,
@@ -412,18 +413,16 @@ static void bch2_btree_reserve_put(struct btree_update *as)
        }
 }
 
-static int bch2_btree_reserve_get(struct btree_update *as,
+static int bch2_btree_reserve_get(struct btree_trans *trans,
+                                 struct btree_update *as,
                                  unsigned nr_nodes[2],
-                                 unsigned flags)
+                                 unsigned flags,
+                                 struct closure *cl)
 {
        struct bch_fs *c = as->c;
-       struct closure cl;
        struct btree *b;
        unsigned interior;
-       int ret;
-
-       closure_init_stack(&cl);
-retry:
+       int ret = 0;
 
        BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
 
@@ -434,18 +433,17 @@ retry:
         * BTREE_INSERT_NOWAIT only applies to btree node allocation, not
         * blocking on this lock:
         */
-       ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+       ret = bch2_btree_cache_cannibalize_lock(c, cl);
        if (ret)
-               goto err;
+               return ret;
 
        for (interior = 0; interior < 2; interior++) {
                struct prealloc_nodes *p = as->prealloc_nodes + interior;
 
                while (p->nr < nr_nodes[interior]) {
-                       b = __bch2_btree_node_alloc(c, &as->disk_res,
-                                                   flags & BTREE_INSERT_NOWAIT
-                                                   ? NULL : &cl,
-                                                   interior, flags);
+                       b = __bch2_btree_node_alloc(trans, &as->disk_res,
+                                       flags & BTREE_INSERT_NOWAIT ? NULL : cl,
+                                       interior, flags);
                        if (IS_ERR(b)) {
                                ret = PTR_ERR(b);
                                goto err;
@@ -454,18 +452,8 @@ retry:
                        p->b[p->nr++] = b;
                }
        }
-
-       bch2_btree_cache_cannibalize_unlock(c);
-       closure_sync(&cl);
-       return 0;
 err:
        bch2_btree_cache_cannibalize_unlock(c);
-       closure_sync(&cl);
-
-       if (ret == -EAGAIN)
-               goto retry;
-
-       trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl);
        return ret;
 }
 
@@ -980,6 +968,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        unsigned update_level = level;
        int journal_flags = flags & JOURNAL_WATERMARK_MASK;
        int ret = 0;
+       u32 restart_count = trans->restart_count;
 
        BUG_ON(!path->should_be_locked);
 
@@ -1053,16 +1042,24 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        if (ret)
                goto err;
 
-       bch2_trans_unlock(trans);
-
        ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
                                      BTREE_UPDATE_JOURNAL_RES,
-                                     journal_flags);
+                                     journal_flags|JOURNAL_RES_GET_NONBLOCK);
        if (ret) {
-               bch2_btree_update_free(as);
-               trace_trans_restart_journal_preres_get(trans, _RET_IP_);
-               ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
-               return ERR_PTR(ret);
+               bch2_trans_unlock(trans);
+
+               ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
+                                             BTREE_UPDATE_JOURNAL_RES,
+                                             journal_flags);
+               if (ret) {
+                       trace_trans_restart_journal_preres_get(trans, _RET_IP_);
+                       ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
+                       goto err;
+               }
+
+               ret = bch2_trans_relock(trans);
+               if (ret)
+                       goto err;
        }
 
        ret = bch2_disk_reservation_get(c, &as->disk_res,
@@ -1072,14 +1069,32 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        if (ret)
                goto err;
 
-       ret = bch2_btree_reserve_get(as, nr_nodes, flags);
-       if (ret)
+       ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
+       if (ret == -EAGAIN ||
+           ret == -ENOMEM) {
+               struct closure cl;
+
+               closure_init_stack(&cl);
+
+               bch2_trans_unlock(trans);
+
+               do {
+                       ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
+                       closure_sync(&cl);
+               } while (ret == -EAGAIN);
+       }
+
+       if (ret) {
+               trace_btree_reserve_get_fail(trans->fn, _RET_IP_,
+                                            nr_nodes[0] + nr_nodes[1]);
                goto err;
+       }
 
        ret = bch2_trans_relock(trans);
        if (ret)
                goto err;
 
+       bch2_trans_verify_not_restarted(trans, restart_count);
        return as;
 err:
        bch2_btree_update_free(as);
index cd37a1016e259b1f0e040520e8b9d7e71449f317..f35e714e6697d6412020958ca716ea9e936b7ecd 100644 (file)
@@ -199,7 +199,7 @@ struct dump_iter {
        ssize_t                 ret;    /* bytes read so far */
 };
 
-static int flush_buf(struct dump_iter *i)
+static ssize_t flush_buf(struct dump_iter *i)
 {
        if (i->buf.pos) {
                size_t bytes = min_t(size_t, i->buf.pos, i->size);
@@ -215,7 +215,7 @@ static int flush_buf(struct dump_iter *i)
                memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos);
        }
 
-       return 0;
+       return i->size ? 0 : i->ret;
 }
 
 static int bch2_dump_open(struct inode *inode, struct file *file)
@@ -253,7 +253,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       int err;
+       ssize_t ret;
 
        i->ubuf = buf;
        i->size = size;
@@ -261,14 +261,11 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
 
        bch2_trans_init(&trans, i->c, 0, 0);
 
-       err = for_each_btree_key2(&trans, iter, i->id, i->from,
+       ret = for_each_btree_key2(&trans, iter, i->id, i->from,
                                  BTREE_ITER_PREFETCH|
                                  BTREE_ITER_ALL_SNAPSHOTS, k, ({
-               err = flush_buf(i);
-               if (err)
-                       break;
-
-               if (!i->size)
+               ret = flush_buf(i);
+               if (ret)
                        break;
 
                bch2_bkey_val_to_text(&i->buf, i->c, k);
@@ -277,12 +274,12 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
        }));
        i->from = iter.pos;
 
-       if (!err)
-               err = flush_buf(i);
+       if (!ret)
+               ret = flush_buf(i);
 
        bch2_trans_exit(&trans);
 
-       return err ?: i->ret;
+       return ret ?: i->ret;
 }
 
 static const struct file_operations btree_debug_ops = {
@@ -299,43 +296,39 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
        struct btree_trans trans;
        struct btree_iter iter;
        struct btree *b;
-       int err;
+       ssize_t ret;
 
        i->ubuf = buf;
        i->size = size;
        i->ret  = 0;
 
-       err = flush_buf(i);
-       if (err)
-               return err;
+       ret = flush_buf(i);
+       if (ret)
+               return ret;
 
-       if (!i->size || !bpos_cmp(SPOS_MAX, i->from))
+       if (!bpos_cmp(SPOS_MAX, i->from))
                return i->ret;
 
        bch2_trans_init(&trans, i->c, 0, 0);
 
-       for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) {
-               bch2_btree_node_to_text(&i->buf, i->c, b);
-               err = flush_buf(i);
-               if (err)
+       for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) {
+               ret = flush_buf(i);
+               if (ret)
                        break;
 
-               /*
-                * can't easily correctly restart a btree node traversal across
-                * all nodes, meh
-                */
+               bch2_btree_node_to_text(&i->buf, i->c, b);
                i->from = bpos_cmp(SPOS_MAX, b->key.k.p)
                        ? bpos_successor(b->key.k.p)
                        : b->key.k.p;
-
-               if (!i->size)
-                       break;
        }
        bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
 
-       return err < 0 ? err : i->ret;
+       if (!ret)
+               ret = flush_buf(i);
+
+       return ret ?: i->ret;
 }
 
 static const struct file_operations btree_format_debug_ops = {
@@ -352,33 +345,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       int err;
+       ssize_t ret;
 
        i->ubuf = buf;
        i->size = size;
        i->ret  = 0;
 
-       err = flush_buf(i);
-       if (err)
-               return err;
-
-       if (!i->size)
-               return i->ret;
+       ret = flush_buf(i);
+       if (ret)
+               return ret;
 
        bch2_trans_init(&trans, i->c, 0, 0);
 
-       err = for_each_btree_key2(&trans, iter, i->id, i->from,
+       ret = for_each_btree_key2(&trans, iter, i->id, i->from,
                                  BTREE_ITER_PREFETCH|
                                  BTREE_ITER_ALL_SNAPSHOTS, k, ({
                struct btree_path_level *l = &iter.path->l[0];
                struct bkey_packed *_k =
                        bch2_btree_node_iter_peek(&l->iter, l->b);
 
-               err = flush_buf(i);
-               if (err)
-                       break;
-
-               if (!i->size)
+               ret = flush_buf(i);
+               if (ret)
                        break;
 
                if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) {
@@ -391,12 +378,12 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
        }));
        i->from = iter.pos;
 
-       if (!err)
-               err = flush_buf(i);
-
        bch2_trans_exit(&trans);
 
-       return err ?: i->ret;
+       if (!ret)
+               ret = flush_buf(i);
+
+       return ret ?: i->ret;
 }
 
 static const struct file_operations bfloat_failed_debug_ops = {
@@ -409,7 +396,8 @@ static const struct file_operations bfloat_failed_debug_ops = {
 static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
                                           struct btree *b)
 {
-       out->tabstops[0] = 32;
+       if (!out->nr_tabstops)
+               printbuf_tabstop_push(out, 32);
 
        prt_printf(out, "%px btree=%s l=%u ",
               b,
@@ -466,7 +454,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
        struct dump_iter *i = file->private_data;
        struct bch_fs *c = i->c;
        bool done = false;
-       int err;
+       ssize_t ret = 0;
 
        i->ubuf = buf;
        i->size = size;
@@ -477,12 +465,9 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
                struct rhash_head *pos;
                struct btree *b;
 
-               err = flush_buf(i);
-               if (err)
-                       return err;
-
-               if (!i->size)
-                       break;
+               ret = flush_buf(i);
+               if (ret)
+                       return ret;
 
                rcu_read_lock();
                i->buf.atomic++;
@@ -500,9 +485,12 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
        } while (!done);
 
        if (i->buf.allocation_failure)
-               return -ENOMEM;
+               ret = -ENOMEM;
 
-       return i->ret;
+       if (!ret)
+               ret = flush_buf(i);
+
+       return ret ?: i->ret;
 }
 
 static const struct file_operations cached_btree_nodes_ops = {
@@ -538,7 +526,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
        struct dump_iter *i = file->private_data;
        struct bch_fs *c = i->c;
        struct btree_trans *trans;
-       int err;
+       ssize_t ret = 0;
 
        i->ubuf = buf;
        i->size = size;
@@ -549,12 +537,9 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
                if (trans->task->pid <= i->iter)
                        continue;
 
-               err = flush_buf(i);
-               if (err)
-                       return err;
-
-               if (!i->size)
-                       break;
+               ret = flush_buf(i);
+               if (ret)
+                       return ret;
 
                bch2_btree_trans_to_text(&i->buf, trans);
 
@@ -570,9 +555,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
        mutex_unlock(&c->btree_trans_lock);
 
        if (i->buf.allocation_failure)
-               return -ENOMEM;
+               ret = -ENOMEM;
 
-       return i->ret;
+       if (!ret)
+               ret = flush_buf(i);
+
+       return ret ?: i->ret;
 }
 
 static const struct file_operations btree_transactions_ops = {
@@ -651,14 +639,16 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
                                      size_t size, loff_t *ppos)
 {
        struct dump_iter        *i = file->private_data;
-       struct lock_held_stats *lhs = &i->c->lock_held_stats;
+       struct bch_fs *c = i->c;
        int err;
 
        i->ubuf = buf;
        i->size = size;
        i->ret  = 0;
 
-       while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) {
+       while (1) {
+               struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
+
                err = flush_buf(i);
                if (err)
                        return err;
@@ -666,11 +656,37 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
                if (!i->size)
                        break;
 
-               prt_printf(&i->buf, "%s:", lhs->names[i->iter]);
+               if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) ||
+                   !c->btree_transaction_fns[i->iter])
+                       break;
+
+               prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]);
                prt_newline(&i->buf);
-               printbuf_indent_add(&i->buf, 8);
-               bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]);
-               printbuf_indent_sub(&i->buf, 8);
+               printbuf_indent_add(&i->buf, 2);
+
+               mutex_lock(&s->lock);
+
+               if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+                       prt_printf(&i->buf, "Lock hold times:");
+                       prt_newline(&i->buf);
+
+                       printbuf_indent_add(&i->buf, 2);
+                       bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
+                       printbuf_indent_sub(&i->buf, 2);
+               }
+
+               if (s->max_paths_text) {
+                       prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths);
+                       prt_newline(&i->buf);
+
+                       printbuf_indent_add(&i->buf, 2);
+                       prt_str_indented(&i->buf, s->max_paths_text);
+                       printbuf_indent_sub(&i->buf, 2);
+               }
+
+               mutex_unlock(&s->lock);
+
+               printbuf_indent_sub(&i->buf, 2);
                prt_newline(&i->buf);
                i->iter++;
        }
@@ -716,10 +732,8 @@ void bch2_fs_debug_init(struct bch_fs *c)
        debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
                            c->btree_debug, &journal_pins_ops);
 
-       if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
-               debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir,
-                               c, &lock_held_stats_op);
-       }
+       debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
+                           c, &lock_held_stats_op);
 
        c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
        if (IS_ERR_OR_NULL(c->btree_debug_dir))
index 15a1be2fcc84e68df6205bc8560fffda72e2a4d1..232f7c7999f6591d1278b3241c22fe3032481d57 100644 (file)
@@ -37,6 +37,7 @@
        x(no_btree_node,        no_btree_node_down)                     \
        x(no_btree_node,        no_btree_node_init)                     \
        x(no_btree_node,        no_btree_node_cached)                   \
+       x(0,                    backpointer_to_overwritten_btree_node)  \
        x(0,                    lock_fail_node_reused)                  \
        x(0,                    lock_fail_root_changed)                 \
        x(0,                    journal_reclaim_would_deadlock)         \
index c93e177a314ffc38abe016fc085b3a23db02a467..1a841146e379fb129ed2ce5de1c55ea147aa88a8 100644 (file)
@@ -290,7 +290,7 @@ err:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
-       return ret;
+       return ret ?: -BCH_ERR_transaction_restart_nested;
 }
 
 static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
@@ -914,7 +914,7 @@ static int check_inode(struct btree_trans *trans,
                bch2_fs_lazy_rw(c);
 
                ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
-               if (ret)
+               if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        bch_err(c, "error in fsck: error while deleting inode: %s",
                                bch2_err_str(ret));
                return ret;
@@ -1149,13 +1149,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
                }
        }
 fsck_err:
-       if (ret) {
+       if (ret)
                bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret));
-               return ret;
-       }
-       if (trans_was_restarted(trans, restart_count))
-               return -BCH_ERR_transaction_restart_nested;
-       return 0;
+       if (!ret && trans_was_restarted(trans, restart_count))
+               ret = -BCH_ERR_transaction_restart_nested;
+       return ret;
 }
 
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
index d77092aa069ee66826c30da8c1d5316cac857b06..3f1cf1ac921a064741beecce7e9daa6136fb7dec 100644 (file)
@@ -1255,8 +1255,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        u64 seq;
        unsigned i;
 
+       if (!out->nr_tabstops)
+               printbuf_tabstop_push(out, 24);
        out->atomic++;
-       out->tabstops[0] = 24;
 
        rcu_read_lock();
        s = READ_ONCE(j->reservations);
index 2fc247451390d72cd9be8062e5f4397b0e0c6149..224700675604bef24126e28602299adad7aecdf0 100644 (file)
@@ -636,6 +636,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
                        b = bch2_backpointer_get_node(&trans, &iter,
                                                bucket, bp_offset, bp);
                        ret = PTR_ERR_OR_ZERO(b);
+                       if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+                               continue;
                        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                continue;
                        if (ret)
index ecc64dd92b05074b958bf9fe33255b12703b6929..17b289b051f290bbce8fb6053ce20d07375f0218 100644 (file)
@@ -268,7 +268,8 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
        struct bch_fs_rebalance *r = &c->rebalance;
        struct rebalance_work w = rebalance_work(c);
 
-       out->tabstops[0] = 20;
+       if (!out->nr_tabstops)
+               printbuf_tabstop_push(out, 20);
 
        prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx);
        prt_tab(out);
index 24244bc3d2fbff6703dcf5b4e27d513ebb7082dc..fb3f8e4074c7a65d38939402a80a91af6db18ba4 100644 (file)
@@ -278,8 +278,8 @@ int bch2_fs_check_snapshots(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
-                       POS(BCACHEFS_ROOT_INO, 0),
+       ret = for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_snapshots, POS_MIN,
                        BTREE_ITER_PREFETCH, k,
                        NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
                check_snapshot(&trans, &iter, k));
index 55f8c65ad725e42b3cc757b2d9fdad25f1326067..ade09bdfc4d9a1746de63c612e0c6b807f882f16 100644 (file)
@@ -1427,8 +1427,8 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
        const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
                ? bch2_sb_field_ops[type] : NULL;
 
-       if (!out->tabstops[0])
-               out->tabstops[0] = 32;
+       if (!out->nr_tabstops)
+               printbuf_tabstop_push(out, 32);
 
        if (ops)
                prt_printf(out, "%s", bch2_sb_fields[type]);
@@ -1476,8 +1476,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
        u64 fields_have = 0;
        unsigned nr_devices = 0;
 
-       if (!out->tabstops[0])
-               out->tabstops[0] = 32;
+       if (!out->nr_tabstops)
+               printbuf_tabstop_push(out, 32);
 
        mi = bch2_sb_get_members(sb);
        if (mi) {
index 2c650055f53064e24f7b48c8cd1e64c4e856bf8c..2dfed1ffadd7a8f16f85d0c839aa6aa4b9cfd476 100644 (file)
@@ -560,7 +560,8 @@ SHOW(bch2_fs_counters)
        u64 counter = 0;
        u64 counter_since_mount = 0;
 
-       out->tabstops[0] = 32;
+       printbuf_tabstop_push(out, 32);
+
        #define x(t, ...) \
                if (attr == &sysfs_##t) {                                       \
                        counter             = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
index ee2c7d9e7050082b432ebdc81c1a80e39fd904ff..42da6623d815f6812d91b17f7e661046bab8884d 100644 (file)
@@ -268,6 +268,12 @@ static void bch2_quantiles_update(struct quantiles *q, u64 v)
        }
 }
 
+void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
+{
+       while (nr_bits)
+               prt_char(out, '0' + ((v >> --nr_bits) & 1));
+}
+
 /* time stats: */
 
 static void bch2_time_stats_update_one(struct time_stats *stats,
@@ -526,7 +532,8 @@ void bch2_pd_controller_init(struct bch_pd_controller *pd)
 
 void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd)
 {
-       out->tabstops[0] = 20;
+       if (!out->nr_tabstops)
+               printbuf_tabstop_push(out, 20);
 
        prt_printf(out, "rate:");
        prt_tab(out);
index 1fe66fd91ccc724203d29b83d6787c94f9e1b5b6..ab7e43d4bf8bbddf955b77693ca0fb7a3794351c 100644 (file)
@@ -353,6 +353,8 @@ bool bch2_is_zero(const void *, size_t);
 
 u64 bch2_read_flag_list(char *, const char * const[]);
 
+void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
+
 #define NR_QUANTILES   15
 #define QUANTILE_IDX(i)        inorder_to_eytzinger0(i, NR_QUANTILES)
 #define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
index 3fc9ff47409ff6917f391d0b9597b19b495f25e7..5cf79d43f5a4eee2244c9282ca87fffca5ad904e 100644 (file)
@@ -2,20 +2,13 @@
 /* Copyright (C) 2022 Kent Overstreet */
 
 #include <linux/err.h>
-#include <linux/math64.h>
-#include <linux/printbuf.h>
-#include <linux/slab.h>
-
-#ifdef __KERNEL__
 #include <linux/export.h>
 #include <linux/kernel.h>
-#else
-#ifndef EXPORT_SYMBOL
-#define EXPORT_SYMBOL(x)
-#endif
-#endif
+#include <linux/printbuf.h>
+#include <linux/slab.h>
+#include <linux/string_helpers.h>
 
-static inline size_t printbuf_linelen(struct printbuf *buf)
+static inline unsigned printbuf_linelen(struct printbuf *buf)
 {
        return buf->pos - buf->last_newline;
 }
@@ -35,6 +28,11 @@ int printbuf_make_room(struct printbuf *out, unsigned extra)
                return 0;
 
        new_size = roundup_pow_of_two(out->size + extra);
+
+       /*
+        * Note: output buffer must be freeable with kfree(), it's not required
+        * that the user use printbuf_exit().
+        */
        buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT);
 
        if (!buf) {
@@ -78,25 +76,43 @@ void printbuf_exit(struct printbuf *buf)
 }
 EXPORT_SYMBOL(printbuf_exit);
 
-void prt_newline(struct printbuf *buf)
+void printbuf_tabstops_reset(struct printbuf *buf)
 {
-       unsigned i;
-
-       printbuf_make_room(buf, 1 + buf->indent);
-
-       __prt_char(buf, '\n');
+       buf->nr_tabstops = 0;
+}
+EXPORT_SYMBOL(printbuf_tabstops_reset);
 
-       buf->last_newline       = buf->pos;
+void printbuf_tabstop_pop(struct printbuf *buf)
+{
+       if (buf->nr_tabstops)
+               --buf->nr_tabstops;
+}
+EXPORT_SYMBOL(printbuf_tabstop_pop);
 
-       for (i = 0; i < buf->indent; i++)
-               __prt_char(buf, ' ');
+/*
+ * printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop
+ *
+ * @buf: printbuf to control
+ * @spaces: number of spaces from previous tabpstop
+ *
+ * In the future this function may allocate memory if setting more than
+ * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start
+ * of line.
+ */
+int printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
+{
+       unsigned prev_tabstop = buf->nr_tabstops
+               ? buf->_tabstops[buf->nr_tabstops - 1]
+               : 0;
 
-       printbuf_nul_terminate(buf);
+       if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops)))
+               return -EINVAL;
 
-       buf->last_field         = buf->pos;
-       buf->tabstop = 0;
+       buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces;
+       buf->has_indent_or_tabstops = true;
+       return 0;
 }
-EXPORT_SYMBOL(prt_newline);
+EXPORT_SYMBOL(printbuf_tabstop_push);
 
 /**
  * printbuf_indent_add - add to the current indent level
@@ -113,8 +129,9 @@ void printbuf_indent_add(struct printbuf *buf, unsigned spaces)
                spaces = 0;
 
        buf->indent += spaces;
-       while (spaces--)
-               prt_char(buf, ' ');
+       prt_chars(buf, ' ', spaces);
+
+       buf->has_indent_or_tabstops = true;
 }
 EXPORT_SYMBOL(printbuf_indent_add);
 
@@ -137,168 +154,162 @@ void printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
                printbuf_nul_terminate(buf);
        }
        buf->indent -= spaces;
+
+       if (!buf->indent && !buf->nr_tabstops)
+               buf->has_indent_or_tabstops = false;
 }
 EXPORT_SYMBOL(printbuf_indent_sub);
 
-/**
- * prt_tab - Advance printbuf to the next tabstop
- *
- * @buf: printbuf to control
- *
- * Advance output to the next tabstop by printing spaces.
+void prt_newline(struct printbuf *buf)
+{
+       unsigned i;
+
+       printbuf_make_room(buf, 1 + buf->indent);
+
+       __prt_char(buf, '\n');
+
+       buf->last_newline       = buf->pos;
+
+       for (i = 0; i < buf->indent; i++)
+               __prt_char(buf, ' ');
+
+       printbuf_nul_terminate(buf);
+
+       buf->last_field         = buf->pos;
+       buf->cur_tabstop        = 0;
+}
+EXPORT_SYMBOL(prt_newline);
+
+/*
+ * Returns spaces from start of line, if set, or 0 if unset:
  */
-void prt_tab(struct printbuf *out)
+static inline unsigned cur_tabstop(struct printbuf *buf)
 {
-       int spaces = max_t(int, 0, out->tabstops[out->tabstop] - printbuf_linelen(out));
+       return buf->cur_tabstop < buf->nr_tabstops
+               ? buf->_tabstops[buf->cur_tabstop]
+               : 0;
+}
 
-       BUG_ON(out->tabstop > ARRAY_SIZE(out->tabstops));
+static void __prt_tab(struct printbuf *out)
+{
+       int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out));
 
        prt_chars(out, ' ', spaces);
 
        out->last_field = out->pos;
-       out->tabstop++;
+       out->cur_tabstop++;
 }
-EXPORT_SYMBOL(prt_tab);
 
 /**
- * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
- * previous output
+ * prt_tab - Advance printbuf to the next tabstop
  *
  * @buf: printbuf to control
  *
- * Advance output to the next tabstop by inserting spaces immediately after the
- * previous tabstop, right justifying previously outputted text.
+ * Advance output to the next tabstop by printing spaces.
  */
-void prt_tab_rjust(struct printbuf *buf)
+void prt_tab(struct printbuf *out)
 {
-       BUG_ON(buf->tabstop > ARRAY_SIZE(buf->tabstops));
+       if (WARN_ON(!cur_tabstop(out)))
+               return;
+
+       __prt_tab(out);
+}
+EXPORT_SYMBOL(prt_tab);
 
-       if (printbuf_linelen(buf) < buf->tabstops[buf->tabstop]) {
-               unsigned move = buf->pos - buf->last_field;
-               unsigned shift = buf->tabstops[buf->tabstop] -
-                       printbuf_linelen(buf);
+static void __prt_tab_rjust(struct printbuf *buf)
+{
+       unsigned move = buf->pos - buf->last_field;
+       int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf);
 
-               printbuf_make_room(buf, shift);
+       if (pad > 0) {
+               printbuf_make_room(buf, pad);
 
-               if (buf->last_field + shift < buf->size)
-                       memmove(buf->buf + buf->last_field + shift,
+               if (buf->last_field + pad < buf->size)
+                       memmove(buf->buf + buf->last_field + pad,
                                buf->buf + buf->last_field,
-                               min(move, buf->size - 1 - buf->last_field - shift));
+                               min(move, buf->size - 1 - buf->last_field - pad));
 
                if (buf->last_field < buf->size)
                        memset(buf->buf + buf->last_field, ' ',
-                              min(shift, buf->size - buf->last_field));
+                              min((unsigned) pad, buf->size - buf->last_field));
 
-               buf->pos += shift;
+               buf->pos += pad;
                printbuf_nul_terminate(buf);
        }
 
        buf->last_field = buf->pos;
-       buf->tabstop++;
+       buf->cur_tabstop++;
 }
-EXPORT_SYMBOL(prt_tab_rjust);
 
-enum string_size_units {
-       STRING_UNITS_10,        /* use powers of 10^3 (standard SI) */
-       STRING_UNITS_2,         /* use binary powers of 2^10 */
-};
-static int string_get_size(u64 size, u64 blk_size,
-                          const enum string_size_units units,
-                          char *buf, int len)
+/**
+ * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
+ * previous output
+ *
+ * @buf: printbuf to control
+ *
+ * Advance output to the next tabstop by inserting spaces immediately after the
+ * previous tabstop, right justifying previously outputted text.
+ */
+void prt_tab_rjust(struct printbuf *buf)
 {
-       static const char *const units_10[] = {
-               "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
-       };
-       static const char *const units_2[] = {
-               "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
-       };
-       static const char *const *const units_str[] = {
-               [STRING_UNITS_10] = units_10,
-               [STRING_UNITS_2] = units_2,
-       };
-       static const unsigned int divisor[] = {
-               [STRING_UNITS_10] = 1000,
-               [STRING_UNITS_2] = 1024,
-       };
-       static const unsigned int rounding[] = { 500, 50, 5 };
-       int i = 0, j;
-       u32 remainder = 0, sf_cap;
-       char tmp[13];
-       const char *unit;
-
-       tmp[0] = '\0';
-
-       if (blk_size == 0)
-               size = 0;
-       if (size == 0)
-               goto out;
-
-       /* This is Napier's algorithm.  Reduce the original block size to
-        *
-        * coefficient * divisor[units]^i
-        *
-        * we do the reduction so both coefficients are just under 32 bits so
-        * that multiplying them together won't overflow 64 bits and we keep
-        * as much precision as possible in the numbers.
-        *
-        * Note: it's safe to throw away the remainders here because all the
-        * precision is in the coefficients.
-        */
-       while (blk_size >> 32) {
-               do_div(blk_size, divisor[units]);
-               i++;
-       }
-
-       while (size >> 32) {
-               do_div(size, divisor[units]);
-               i++;
-       }
+       if (WARN_ON(!cur_tabstop(buf)))
+               return;
 
-       /* now perform the actual multiplication keeping i as the sum of the
-        * two logarithms */
-       size *= blk_size;
-
-       /* and logarithmically reduce it until it's just under the divisor */
-       while (size >= divisor[units]) {
-               remainder = do_div(size, divisor[units]);
-               i++;
-       }
+       __prt_tab_rjust(buf);
+}
+EXPORT_SYMBOL(prt_tab_rjust);
 
-       /* work out in j how many digits of precision we need from the
-        * remainder */
-       sf_cap = size;
-       for (j = 0; sf_cap*10 < 1000; j++)
-               sf_cap *= 10;
-
-       if (units == STRING_UNITS_2) {
-               /* express the remainder as a decimal.  It's currently the
-                * numerator of a fraction whose denominator is
-                * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
-               remainder *= 1000;
-               remainder >>= 10;
-       }
+/**
+ * prt_bytes_indented - Print an array of chars, handling embedded control characters
+ *
+ * @out: printbuf to output to
+ * @str: string to print
+ * @count: number of bytes to print
+ *
+ * The following contol characters are handled as so:
+ *   \n: prt_newline   newline that obeys current indent level
+ *   \t: prt_tab       advance to next tabstop
+ *   \r: prt_tab_rjust advance to next tabstop, with right justification
+ */
+void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
+{
+       const char *unprinted_start = str;
+       const char *end = str + count;
 
-       /* add a 5 to the digit below what will be printed to ensure
-        * an arithmetical round up and carry it through to size */
-       remainder += rounding[j];
-       if (remainder >= 1000) {
-               remainder -= 1000;
-               size += 1;
+       if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) {
+               prt_bytes(out, str, count);
+               return;
        }
 
-       if (j) {
-               snprintf(tmp, sizeof(tmp), ".%03u", remainder);
-               tmp[j+1] = '\0';
+       while (str != end) {
+               switch (*str) {
+               case '\n':
+                       prt_bytes(out, unprinted_start, str - unprinted_start);
+                       unprinted_start = str + 1;
+                       prt_newline(out);
+                       break;
+               case '\t':
+                       if (likely(cur_tabstop(out))) {
+                               prt_bytes(out, unprinted_start, str - unprinted_start);
+                               unprinted_start = str + 1;
+                               __prt_tab(out);
+                       }
+                       break;
+               case '\r':
+                       if (likely(cur_tabstop(out))) {
+                               prt_bytes(out, unprinted_start, str - unprinted_start);
+                               unprinted_start = str + 1;
+                               __prt_tab_rjust(out);
+                       }
+                       break;
+               }
+
+               str++;
        }
 
- out:
-       if (i >= ARRAY_SIZE(units_2))
-               unit = "UNK";
-       else
-               unit = units_str[units][i];
-
-       return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
+       prt_bytes(out, unprinted_start, str - unprinted_start);
 }
+EXPORT_SYMBOL(prt_bytes_indented);
 
 /**
  * prt_human_readable_u64 - Print out a u64 in human readable units
diff --git a/linux/string_helpers.c b/linux/string_helpers.c
new file mode 100644 (file)
index 0000000..3d720bc
--- /dev/null
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ * Copyright (C) 2013, Intel Corporation
+ */
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/printbuf.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size:      The size to be converted in blocks
+ * @blk_size:  Size of the block (use 1 for size in bytes)
+ * @units:     units to use (powers of 1000 or 1024)
+ * @buf:       buffer to format to
+ * @len:       length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units.  @buf should have room for
+ * at least 9 bytes and will always be zero terminated.
+ *
+ */
+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
+                   char *buf, int len)
+{
+       static const char *const units_10[] = {
+               "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
+       };
+       static const char *const units_2[] = {
+               "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
+       };
+       static const char *const *const units_str[] = {
+               [STRING_UNITS_10] = units_10,
+               [STRING_UNITS_2] = units_2,
+       };
+       static const unsigned int divisor[] = {
+               [STRING_UNITS_10] = 1000,
+               [STRING_UNITS_2] = 1024,
+       };
+       static const unsigned int rounding[] = { 500, 50, 5 };
+       int i = 0, j;
+       u32 remainder = 0, sf_cap;
+       char tmp[8];
+       const char *unit;
+
+       tmp[0] = '\0';
+
+       if (blk_size == 0)
+               size = 0;
+       if (size == 0)
+               goto out;
+
+       /* This is Napier's algorithm.  Reduce the original block size to
+        *
+        * coefficient * divisor[units]^i
+        *
+        * we do the reduction so both coefficients are just under 32 bits so
+        * that multiplying them together won't overflow 64 bits and we keep
+        * as much precision as possible in the numbers.
+        *
+        * Note: it's safe to throw away the remainders here because all the
+        * precision is in the coefficients.
+        */
+       while (blk_size >> 32) {
+               do_div(blk_size, divisor[units]);
+               i++;
+       }
+
+       while (size >> 32) {
+               do_div(size, divisor[units]);
+               i++;
+       }
+
+       /* now perform the actual multiplication keeping i as the sum of the
+        * two logarithms */
+       size *= blk_size;
+
+       /* and logarithmically reduce it until it's just under the divisor */
+       while (size >= divisor[units]) {
+               remainder = do_div(size, divisor[units]);
+               i++;
+       }
+
+       /* work out in j how many digits of precision we need from the
+        * remainder */
+       sf_cap = size;
+       for (j = 0; sf_cap*10 < 1000; j++)
+               sf_cap *= 10;
+
+       if (units == STRING_UNITS_2) {
+               /* express the remainder as a decimal.  It's currently the
+                * numerator of a fraction whose denominator is
+                * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
+               remainder *= 1000;
+               remainder >>= 10;
+       }
+
+       /* add a 5 to the digit below what will be printed to ensure
+        * an arithmetical round up and carry it through to size */
+       remainder += rounding[j];
+       if (remainder >= 1000) {
+               remainder -= 1000;
+               size += 1;
+       }
+
+       if (j) {
+               snprintf(tmp, sizeof(tmp), ".%03u", remainder);
+               tmp[j+1] = '\0';
+       }
+
+ out:
+       if (i >= ARRAY_SIZE(units_2))
+               unit = "UNK";
+       else
+               unit = units_str[units][i];
+
+       return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
+}
+EXPORT_SYMBOL(string_get_size);