]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 6d44812757dd bcachefs: BCH_IOCTL_FSCK_ONLINE
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 4 Dec 2023 21:03:52 +0000 (16:03 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 7 Dec 2023 16:57:36 +0000 (11:57 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
30 files changed:
.bcachefs_revision
Makefile
libbcachefs/bcachefs.h
libbcachefs/bcachefs_ioctl.h
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_io.c
libbcachefs/btree_io.h
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_key_cache.c
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update.c
libbcachefs/btree_update_interior.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/chardev.c
libbcachefs/compress.c
libbcachefs/data_update.c
libbcachefs/errcode.h
libbcachefs/fs-ioctl.c
libbcachefs/opts.c
libbcachefs/opts.h
libbcachefs/recovery.c
libbcachefs/reflink.c
libbcachefs/reflink.h
libbcachefs/super.c
libbcachefs/sysfs.c
libbcachefs/trace.h

index b0f07a3d4ff618ca813c07de78436c16668dbf00..393a80ed20d02101ddf3db1e5ab0d07693c0087b 100644 (file)
@@ -1 +1 @@
-71a5b27e017df6ebae391da58857b22fdc406276
+6d44812757ddf81fad087d6abe662355e6712e02
index 4947814be90356c60e8727e8de7e9e86d98e254c..a7926f46dec03dedae85f20fc11bdbca32368188 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -268,7 +268,7 @@ update-bcachefs-sources:
        git add include/linux/kmemleak.h
        cp $(LINUX_DIR)/lib/math/int_sqrt.c linux/
        git add linux/int_sqrt.c
-       rm libbcachefs/mean_and_variance_test.c
+       git rm libbcachefs/mean_and_variance_test.c
 #      cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/
 #      git add linux/mean_and_variance.c
 #      cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/
index bb2a0cc43f83d2a4c9ffb5d4d7819804104982bd..66de8c0c33cc52b8b34debd96d5f952b747892e6 100644 (file)
@@ -264,36 +264,54 @@ do {                                                                      \
 
 #define bch2_fmt(_c, fmt)              bch2_log_msg(_c, fmt "\n")
 
+void __bch2_print(struct bch_fs *c, const char *fmt, ...);
+
+#define maybe_dev_to_fs(_c)    _Generic((_c),                          \
+       struct bch_dev *:       ((struct bch_dev *) (_c))->fs,          \
+       struct bch_fs *:        (_c))
+
+#define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__)
+
+#define bch2_print_ratelimited(_c, ...)                                        \
+do {                                                                   \
+       static DEFINE_RATELIMIT_STATE(_rs,                              \
+                                     DEFAULT_RATELIMIT_INTERVAL,       \
+                                     DEFAULT_RATELIMIT_BURST);         \
+                                                                       \
+       if (__ratelimit(&_rs))                                          \
+               bch2_print(_c, __VA_ARGS__);                            \
+} while (0)
+
 #define bch_info(c, fmt, ...) \
-       printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
 #define bch_notice(c, fmt, ...) \
-       printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
 #define bch_warn(c, fmt, ...) \
-       printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
 #define bch_warn_ratelimited(c, fmt, ...) \
-       printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
+       bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
 
 #define bch_err(c, fmt, ...) \
-       printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
 #define bch_err_dev(ca, fmt, ...) \
-       printk(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
 #define bch_err_dev_offset(ca, _offset, fmt, ...) \
-       printk(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
 #define bch_err_inum(c, _inum, fmt, ...) \
-       printk(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
 #define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \
-       printk(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
+       bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
 
 #define bch_err_ratelimited(c, fmt, ...) \
-       printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+       bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
 #define bch_err_dev_ratelimited(ca, fmt, ...) \
-       printk_ratelimited(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
+       bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
 #define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \
-       printk_ratelimited(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
+       bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
 #define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
-       printk_ratelimited(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
+       bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
 #define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \
-       printk_ratelimited(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
+       bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
 
 #define bch_err_fn(_c, _ret)                                           \
 do {                                                                   \
@@ -446,6 +464,12 @@ enum bch_time_stats {
 
 struct btree;
 
+struct log_output {
+       spinlock_t              lock;
+       wait_queue_head_t       wait;
+       struct printbuf         buf;
+};
+
 enum gc_phase {
        GC_PHASE_NOT_RUNNING,
        GC_PHASE_START,
@@ -700,6 +724,7 @@ struct bch_fs {
        struct super_block      *vfs_sb;
        dev_t                   dev;
        char                    name[40];
+       struct log_output       *output;
 
        /* ro/rw, add/remove/resize devices: */
        struct rw_semaphore     state_lock;
index 43822c17297c697c070717aa34458a88e830a676..2ac6272c8ef52e2211816e15817a07c43d90a4f3 100644 (file)
@@ -83,6 +83,10 @@ struct bch_ioctl_incremental {
 
 #define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc,     18, struct bch_ioctl_dev_usage_v2)
 
+#define BCH_IOCTL_FSCK_OFFLINE         _IOW(0xbc,      19,  struct bch_ioctl_fsck_offline)
+
+#define BCH_IOCTL_FSCK_ONLINE          _IOW(0xbc,      20,  struct bch_ioctl_fsck_online)
+
 /* ioctl below act on a particular file, not the filesystem as a whole: */
 
 #define BCHFS_IOC_REINHERIT_ATTRS      _IOR(0xbc, 64, const char __user *)
@@ -386,4 +390,24 @@ struct bch_ioctl_subvolume {
 #define BCH_SUBVOL_SNAPSHOT_CREATE     (1U << 0)
 #define BCH_SUBVOL_SNAPSHOT_RO         (1U << 1)
 
+/*
+ * BCH_IOCTL_FSCK_OFFLINE: run fsck from the 'bcachefs fsck' userspace command,
+ * but with the kernel's implementation of fsck:
+ */
+struct bch_ioctl_fsck_offline {
+       __u64                   flags;
+       __u64                   opts;           /* string */
+       __u64                   nr_devs;
+       __u64                   devs[0];
+};
+
+/*
+ * BCH_IOCTL_FSCK_ONLINE: run fsck from the 'bcachefs fsck' userspace command,
+ * but with the kernel's implementation of fsck:
+ */
+struct bch_ioctl_fsck_online {
+       __u64                   flags;
+       __u64                   opts;           /* string */
+};
+
 #endif /* _BCACHEFS_IOCTL_H */
index 47e7770d05831757d45c4aee9331f52a10c06e76..9574c8c4d70864b8617df8d68dff6d8e47bdf855 100644 (file)
@@ -9,6 +9,7 @@
 #include "debug.h"
 #include "errcode.h"
 #include "error.h"
+#include "journal.h"
 #include "trace.h"
 
 #include <linux/prefetch.h>
@@ -424,14 +425,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
                BUG_ON(btree_node_read_in_flight(b) ||
                       btree_node_write_in_flight(b));
 
-               if (btree_node_dirty(b))
-                       bch2_btree_complete_write(c, b, btree_current_write(b));
-               clear_btree_node_dirty_acct(c, b);
-
                btree_node_data_free(c, b);
        }
 
-       BUG_ON(atomic_read(&c->btree_cache.dirty));
+       BUG_ON(!bch2_journal_error(&c->journal) &&
+              atomic_read(&c->btree_cache.dirty));
 
        list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
 
@@ -502,19 +500,21 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
  * cannibalize_bucket() will take. This means every time we unlock the root of
  * the btree, we need to release this lock if we have it held.
  */
-void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
+void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
 {
+       struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
 
        if (bc->alloc_lock == current) {
-               trace_and_count(c, btree_cache_cannibalize_unlock, c);
+               trace_and_count(c, btree_cache_cannibalize_unlock, trans);
                bc->alloc_lock = NULL;
                closure_wake_up(&bc->alloc_wait);
        }
 }
 
-int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
+int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl)
 {
+       struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
        struct task_struct *old;
 
@@ -523,7 +523,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
                goto success;
 
        if (!cl) {
-               trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
+               trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
                return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
        }
 
@@ -537,11 +537,11 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
                goto success;
        }
 
-       trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
+       trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
        return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
 
 success:
-       trace_and_count(c, btree_cache_cannibalize_lock, c);
+       trace_and_count(c, btree_cache_cannibalize_lock, trans);
        return 0;
 }
 
@@ -675,7 +675,7 @@ err:
 
                mutex_unlock(&bc->lock);
 
-               trace_and_count(c, btree_cache_cannibalize, c);
+               trace_and_count(c, btree_cache_cannibalize, trans);
                goto out;
        }
 
@@ -751,7 +751,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        if (path && sync)
                bch2_trans_unlock_noassert(trans);
 
-       bch2_btree_node_read(c, b, sync);
+       bch2_btree_node_read(trans, b, sync);
 
        if (!sync)
                return NULL;
@@ -1041,7 +1041,7 @@ retry:
                        goto retry;
 
                if (IS_ERR(b) &&
-                   !bch2_btree_cache_cannibalize_lock(c, NULL))
+                   !bch2_btree_cache_cannibalize_lock(trans, NULL))
                        goto retry;
 
                if (IS_ERR(b))
@@ -1089,7 +1089,7 @@ lock_node:
        EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
        btree_check_header(c, b);
 out:
-       bch2_btree_cache_cannibalize_unlock(c);
+       bch2_btree_cache_cannibalize_unlock(trans);
        return b;
 }
 
index cfb80b201d61be9240ed659baa57a693d12b796a..4e1af58820522fc8feec3caf9afc34d12f76c772 100644 (file)
@@ -17,8 +17,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
 int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
                                unsigned, enum btree_id);
 
-void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
-int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
+void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
+int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);
 
 struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
 struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
index 3c663c596b464ae3c8b3db68d2fc44a44bf19b9a..a6ac68fe90fb5f496cc12fb4ae76d4db84c326a1 100644 (file)
@@ -1575,16 +1575,17 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
        return 0;
 }
 
-void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
+void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
                          bool sync)
 {
+       struct bch_fs *c = trans->c;
        struct extent_ptr_decoded pick;
        struct btree_read_bio *rb;
        struct bch_dev *ca;
        struct bio *bio;
        int ret;
 
-       trace_and_count(c, btree_node_read, c, b);
+       trace_and_count(c, btree_node_read, trans, b);
 
        if (bch2_verify_all_btree_replicas &&
            !btree_node_read_all_replicas(c, b, sync))
@@ -1663,12 +1664,12 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
        closure_init_stack(&cl);
 
        do {
-               ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+               ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
                closure_sync(&cl);
        } while (ret);
 
        b = bch2_btree_node_mem_alloc(trans, level != 0);
-       bch2_btree_cache_cannibalize_unlock(c);
+       bch2_btree_cache_cannibalize_unlock(trans);
 
        BUG_ON(IS_ERR(b));
 
@@ -1677,7 +1678,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
 
        set_btree_node_read_in_flight(b);
 
-       bch2_btree_node_read(c, b, true);
+       bch2_btree_node_read(trans, b, true);
 
        if (btree_node_read_error(b)) {
                bch2_btree_node_hash_remove(&c->btree_cache, b);
@@ -1704,8 +1705,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
        return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
 }
 
-void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
-                             struct btree_write *w)
+static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
+                                     struct btree_write *w)
 {
        unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
 
index 7e03dd76fb380498a42bcdef91857727403a4d8a..e251cb6b965ff0a8bdc4aa0684dfdcaa315c32d6 100644 (file)
@@ -130,13 +130,10 @@ void bch2_btree_init_next(struct btree_trans *, struct btree *);
 
 int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
                              struct btree *, bool, bool *);
-void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
+void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
 int bch2_btree_root_read(struct bch_fs *, enum btree_id,
                         const struct bkey_i *, unsigned);
 
-void bch2_btree_complete_write(struct bch_fs *, struct btree *,
-                             struct btree_write *);
-
 bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
 
 enum btree_write_flags {
index 4d673d470724ef8b38484c468cfac54cd559d13e..929f33dff5d7bb6ce99a1f3e91172ec320813de2 100644 (file)
@@ -977,7 +977,7 @@ retry_all:
                closure_init_stack(&cl);
 
                do {
-                       ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+                       ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
                        closure_sync(&cl);
                } while (ret);
        }
@@ -1013,7 +1013,7 @@ retry_all:
         * then failed to relock a path - that's fine.
         */
 err:
-       bch2_btree_cache_cannibalize_unlock(c);
+       bch2_btree_cache_cannibalize_unlock(trans);
 
        trans->in_traverse_all = false;
 
@@ -1298,7 +1298,7 @@ static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path
 {
        __bch2_btree_path_unlock(trans, path);
        btree_path_list_remove(trans, path);
-       trans->paths_allocated &= ~(1ULL << path->idx);
+       __clear_bit(path->idx, trans->paths_allocated);
 }
 
 void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
@@ -1471,6 +1471,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
 {
        struct btree_transaction_stats *s = btree_trans_stats(trans);
        struct printbuf buf = PRINTBUF;
+       size_t nr = bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX);
 
        if (!s)
                return;
@@ -1479,9 +1480,8 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
 
        if (!buf.allocation_failure) {
                mutex_lock(&s->lock);
-               if (s->nr_max_paths < hweight64(trans->paths_allocated)) {
-                       s->nr_max_paths = trans->nr_max_paths =
-                               hweight64(trans->paths_allocated);
+               if (nr > s->nr_max_paths) {
+                       s->nr_max_paths = nr;
                        swap(s->max_paths_text, buf.buf);
                }
                mutex_unlock(&s->lock);
@@ -1489,7 +1489,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
 
        printbuf_exit(&buf);
 
-       trans->nr_max_paths = hweight64(trans->paths_allocated);
+       trans->nr_max_paths = nr;
 }
 
 noinline __cold
@@ -1518,13 +1518,12 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
                                                  struct btree_path *pos)
 {
        struct btree_path *path;
-       unsigned idx;
+       size_t idx = find_first_zero_bit(trans->paths_allocated, BTREE_ITER_MAX);
 
-       if (unlikely(trans->paths_allocated ==
-                    ~((~0ULL << 1) << (BTREE_ITER_MAX - 1))))
+       if (unlikely(idx == BTREE_ITER_MAX))
                btree_path_overflow(trans);
 
-       idx = __ffs64(~trans->paths_allocated);
+       BUG_ON(idx > BTREE_ITER_MAX);
 
        /*
         * Do this before marking the new path as allocated, since it won't be
@@ -1533,7 +1532,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
        if (unlikely(idx > trans->nr_max_paths))
                bch2_trans_update_max_paths(trans);
 
-       trans->paths_allocated |= 1ULL << idx;
+       __set_bit(idx, trans->paths_allocated);
 
        path = &trans->paths[idx];
        path->idx               = idx;
@@ -2503,7 +2502,7 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
        struct btree_path *path;
        unsigned i;
 
-       BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated));
+       BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX));
 
        trans_for_each_path(trans, path) {
                BUG_ON(path->sorted_idx >= trans->nr_sorted);
@@ -2513,7 +2512,7 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
        for (i = 0; i < trans->nr_sorted; i++) {
                unsigned idx = trans->sorted[i];
 
-               EBUG_ON(!(trans->paths_allocated & (1ULL << idx)));
+               BUG_ON(!test_bit(idx, trans->paths_allocated));
                BUG_ON(trans->paths[idx].sorted_idx != i);
        }
 }
index 75beb183183d7e849b10b25c36d0dcd20d1c0112..ea4fc8a2f79ab7e11b8252920c82bbacf944ee44 100644 (file)
@@ -66,17 +66,10 @@ static inline void btree_trans_sort_paths(struct btree_trans *trans)
 static inline struct btree_path *
 __trans_next_path(struct btree_trans *trans, unsigned idx)
 {
-       u64 l;
-
+       idx = find_next_bit(trans->paths_allocated, BTREE_ITER_MAX, idx);
        if (idx == BTREE_ITER_MAX)
                return NULL;
-
-       l = trans->paths_allocated >> idx;
-       if (!l)
-               return NULL;
-
-       idx += __ffs64(l);
-       EBUG_ON(idx >= BTREE_ITER_MAX);
+       EBUG_ON(idx > BTREE_ITER_MAX);
        EBUG_ON(trans->paths[idx].idx != idx);
        return &trans->paths[idx];
 }
@@ -92,17 +85,11 @@ __trans_next_path(struct btree_trans *trans, unsigned idx)
 static inline struct btree_path *
 __trans_next_path_safe(struct btree_trans *trans, unsigned *idx)
 {
-       u64 l;
-
+       *idx = find_next_bit(trans->paths_allocated, BTREE_ITER_MAX, *idx);
        if (*idx == BTREE_ITER_MAX)
                return NULL;
 
-       l = trans->paths_allocated >> *idx;
-       if (!l)
-               return NULL;
-
-       *idx += __ffs64(l);
-       EBUG_ON(*idx >= BTREE_ITER_MAX);
+       EBUG_ON(*idx > BTREE_ITER_MAX);
        return &trans->paths[*idx];
 }
 
@@ -631,7 +618,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
 
 static inline int btree_trans_too_many_iters(struct btree_trans *trans)
 {
-       if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8)
+       if (bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX) > BTREE_ITER_MAX - 8)
                return __bch2_btree_trans_too_many_iters(trans);
 
        return 0;
index c5e8a46129c55dd1d1b82ba6b945c628c5f3c001..b39b28b4ae73772a72778069d9c572b69375d129 100644 (file)
@@ -997,8 +997,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        list_for_each_entry_safe(ck, n, &items, list) {
                cond_resched();
 
-               bch2_journal_pin_drop(&c->journal, &ck->journal);
-
                list_del(&ck->list);
                kfree(ck->k);
                six_lock_exit(&ck->c.lock);
index 11b0a2c8cd691b21afccdcc38486aa060351f62a..a49f1dd1d223613c5f9c59b28a6a53f1410a5143 100644 (file)
@@ -263,7 +263,7 @@ static inline int btree_node_lock(struct btree_trans *trans,
        int ret = 0;
 
        EBUG_ON(level >= BTREE_MAX_DEPTH);
-       EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
+       EBUG_ON(!test_bit(path->idx, trans->paths_allocated));
 
        if (likely(six_trylock_type(&b->lock, type)) ||
            btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) ||
index ca7526603d0693fd02a7fa18682354bb67e09221..78d9f585db4545312739490725ef71ee19af3b46 100644 (file)
@@ -414,7 +414,7 @@ struct btree_trans {
        unsigned                extra_journal_res;
        unsigned                nr_max_paths;
 
-       u64                     paths_allocated;
+       unsigned long           paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)];
 
        unsigned                mem_top;
        unsigned                mem_max;
index ba42f578f8107591ec46f2c6c90d0b562179956b..254794c1d9552624ebd90a8f039d091c64911b29 100644 (file)
@@ -531,6 +531,19 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
        return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_);
 }
 
+static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans,
+                                                 enum btree_id btree,
+                                                 struct bkey_i *k)
+{
+       struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k));
+       int ret = PTR_ERR_OR_ZERO(n);
+       if (ret)
+               return ret;
+
+       bkey_copy(n, k);
+       return bch2_btree_insert_trans(trans, btree, n, 0);
+}
+
 int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
                                            enum btree_id btree,
                                            struct bkey_i *k)
@@ -541,6 +554,9 @@ int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
        EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size);
        EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
 
+       if (unlikely(trans->journal_replay_not_finished))
+               return bch2_btree_insert_clone_trans(trans, btree, k);
+
        trans_for_each_wb_update(trans, i) {
                if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) {
                        bkey_copy(&i->k, k);
index c9f07ca49b9b0b23b56b0fd4a21921fea4e9e50d..970faec13bc74a2e86c31e862f048f3197681619 100644 (file)
@@ -163,9 +163,11 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
 
 /* Btree node freeing/allocation: */
 
-static void __btree_node_free(struct bch_fs *c, struct btree *b)
+static void __btree_node_free(struct btree_trans *trans, struct btree *b)
 {
-       trace_and_count(c, btree_node_free, c, b);
+       struct bch_fs *c = trans->c;
+
+       trace_and_count(c, btree_node_free, trans, b);
 
        BUG_ON(btree_node_write_blocked(b));
        BUG_ON(btree_node_dirty(b));
@@ -191,7 +193,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
 
        bch2_btree_node_lock_write_nofail(trans, path, &b->c);
        bch2_btree_node_hash_remove(&c->btree_cache, b);
-       __btree_node_free(c, b);
+       __btree_node_free(trans, b);
        six_unlock_write(&b->c.lock);
        mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
 
@@ -362,7 +364,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as,
        ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
        BUG_ON(ret);
 
-       trace_and_count(c, btree_node_alloc, c, b);
+       trace_and_count(c, btree_node_alloc, trans, b);
        bch2_increment_clock(c, btree_sectors(c), WRITE);
        return b;
 }
@@ -452,7 +454,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
 
                        btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
                        btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
-                       __btree_node_free(c, b);
+                       __btree_node_free(trans, b);
                        six_unlock_write(&b->c.lock);
                        six_unlock_intent(&b->c.lock);
                }
@@ -465,7 +467,6 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
                                  unsigned flags,
                                  struct closure *cl)
 {
-       struct bch_fs *c = as->c;
        struct btree *b;
        unsigned interior;
        int ret = 0;
@@ -476,7 +477,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
         * Protects reaping from the btree node cache and using the btree node
         * open bucket reserve:
         */
-       ret = bch2_btree_cache_cannibalize_lock(c, cl);
+       ret = bch2_btree_cache_cannibalize_lock(trans, cl);
        if (ret)
                return ret;
 
@@ -495,7 +496,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
                }
        }
 err:
-       bch2_btree_cache_cannibalize_unlock(c);
+       bch2_btree_cache_cannibalize_unlock(trans);
        return ret;
 }
 
@@ -1067,6 +1068,17 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        flags &= ~BCH_WATERMARK_MASK;
        flags |= watermark;
 
+       if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
+           watermark < c->journal.watermark) {
+               struct journal_res res = { 0 };
+
+               ret = drop_locks_do(trans,
+                       bch2_journal_res_get(&c->journal, &res, 1,
+                                            watermark|JOURNAL_RES_GET_CHECK));
+               if (ret)
+                       return ERR_PTR(ret);
+       }
+
        while (1) {
                nr_nodes[!!update_level] += 1 + split;
                update_level++;
@@ -1211,7 +1223,7 @@ static void bch2_btree_set_root(struct btree_update *as,
        struct bch_fs *c = as->c;
        struct btree *old;
 
-       trace_and_count(c, btree_node_set_root, c, b);
+       trace_and_count(c, btree_node_set_root, trans, b);
 
        old = btree_node_root(c, b);
 
@@ -1465,7 +1477,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
        if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
                struct btree *n[2];
 
-               trace_and_count(c, btree_node_split, c, b);
+               trace_and_count(c, btree_node_split, trans, b);
 
                n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
                n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);
@@ -1523,7 +1535,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
                        btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
                }
        } else {
-               trace_and_count(c, btree_node_compact, c, b);
+               trace_and_count(c, btree_node_compact, trans, b);
 
                n1 = bch2_btree_node_alloc_replacement(as, trans, b);
 
@@ -1843,7 +1855,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       trace_and_count(c, btree_node_merge, c, b);
+       trace_and_count(c, btree_node_merge, trans, b);
 
        bch2_btree_interior_update_will_free_node(as, b);
        bch2_btree_interior_update_will_free_node(as, m);
@@ -1946,7 +1958,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
        bch2_btree_path_level_init(trans, new_path, n);
 
-       trace_and_count(c, btree_node_rewrite, c, b);
+       trace_and_count(c, btree_node_rewrite, trans, b);
 
        if (parent) {
                bch2_keylist_add(&as->parent_keys, &n->key);
@@ -2228,7 +2240,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
         * btree_iter_traverse():
         */
        if (btree_ptr_hash_val(new_key) != b->hash_val) {
-               ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+               ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
                if (ret) {
                        ret = drop_locks_do(trans, (closure_sync(&cl), 0));
                        if (ret)
@@ -2252,7 +2264,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
                six_unlock_intent(&new_hash->c.lock);
        }
        closure_sync(&cl);
-       bch2_btree_cache_cannibalize_unlock(c);
+       bch2_btree_cache_cannibalize_unlock(trans);
        return ret;
 }
 
@@ -2313,12 +2325,12 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
        closure_init_stack(&cl);
 
        do {
-               ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+               ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
                closure_sync(&cl);
        } while (ret);
 
        b = bch2_btree_node_mem_alloc(trans, false);
-       bch2_btree_cache_cannibalize_unlock(c);
+       bch2_btree_cache_cannibalize_unlock(trans);
 
        set_btree_node_fake(b);
        set_btree_node_need_rewrite(b);
index 312bd0c86623402d6be837df1fe8298a71dfd5c6..27c743882b63b423216c06e37f482296c915913f 100644 (file)
@@ -1164,107 +1164,6 @@ int bch2_mark_reservation(struct btree_trans *trans,
        return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags);
 }
 
-static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
-                                struct bkey_s_c_reflink_p p,
-                                u64 start, u64 end,
-                                u64 *idx, unsigned flags, size_t r_idx)
-{
-       struct bch_fs *c = trans->c;
-       struct reflink_gc *r;
-       int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
-       u64 next_idx = end;
-       s64 ret = 0;
-       struct printbuf buf = PRINTBUF;
-
-       if (r_idx >= c->reflink_gc_nr)
-               goto not_found;
-
-       r = genradix_ptr(&c->reflink_gc_table, r_idx);
-       next_idx = min(next_idx, r->offset - r->size);
-       if (*idx < next_idx)
-               goto not_found;
-
-       BUG_ON((s64) r->refcount + add < 0);
-
-       r->refcount += add;
-       *idx = r->offset;
-       return 0;
-not_found:
-       if (fsck_err(c, reflink_p_to_missing_reflink_v,
-                    "pointer to missing indirect extent\n"
-                    "  %s\n"
-                    "  missing range %llu-%llu",
-                    (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
-                    *idx, next_idx)) {
-               struct bkey_i_error *new;
-
-               new = bch2_trans_kmalloc(trans, sizeof(*new));
-               ret = PTR_ERR_OR_ZERO(new);
-               if (ret)
-                       goto err;
-
-               bkey_init(&new->k);
-               new->k.type     = KEY_TYPE_error;
-               new->k.p                = bkey_start_pos(p.k);
-               new->k.p.offset += *idx - start;
-               bch2_key_resize(&new->k, next_idx - *idx);
-               ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i,
-                                         BTREE_TRIGGER_NORUN);
-       }
-
-       *idx = next_idx;
-err:
-fsck_err:
-       printbuf_exit(&buf);
-       return ret;
-}
-
-static int __mark_reflink_p(struct btree_trans *trans,
-                           enum btree_id btree_id, unsigned level,
-                           struct bkey_s_c k, unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-       struct reflink_gc *ref;
-       size_t l, r, m;
-       u64 idx = le64_to_cpu(p.v->idx), start = idx;
-       u64 end = le64_to_cpu(p.v->idx) + p.k->size;
-       int ret = 0;
-
-       BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
-       if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) {
-               idx -= le32_to_cpu(p.v->front_pad);
-               end += le32_to_cpu(p.v->back_pad);
-       }
-
-       l = 0;
-       r = c->reflink_gc_nr;
-       while (l < r) {
-               m = l + (r - l) / 2;
-
-               ref = genradix_ptr(&c->reflink_gc_table, m);
-               if (ref->offset <= idx)
-                       l = m + 1;
-               else
-                       r = m;
-       }
-
-       while (idx < end && !ret)
-               ret = __bch2_mark_reflink_p(trans, p, start, end,
-                                           &idx, flags, l++);
-
-       return ret;
-}
-
-int bch2_mark_reflink_p(struct btree_trans *trans,
-                       enum btree_id btree_id, unsigned level,
-                       struct bkey_s_c old, struct bkey_s_c new,
-                       unsigned flags)
-{
-       return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags);
-}
-
 void bch2_trans_fs_usage_revert(struct btree_trans *trans,
                                struct replicas_delta_list *deltas)
 {
@@ -1732,105 +1631,6 @@ int bch2_trans_mark_reservation(struct btree_trans *trans,
        return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags);
 }
 
-static int trans_mark_reflink_p_segment(struct btree_trans *trans,
-                       struct bkey_s_c_reflink_p p,
-                       u64 *idx, unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       struct btree_iter iter;
-       struct bkey_i *k;
-       __le64 *refcount;
-       int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
-       struct printbuf buf = PRINTBUF;
-       int ret;
-
-       k = bch2_bkey_get_mut_noupdate(trans, &iter,
-                       BTREE_ID_reflink, POS(0, *idx),
-                       BTREE_ITER_WITH_UPDATES);
-       ret = PTR_ERR_OR_ZERO(k);
-       if (ret)
-               goto err;
-
-       refcount = bkey_refcount(k);
-       if (!refcount) {
-               bch2_bkey_val_to_text(&buf, c, p.s_c);
-               bch2_trans_inconsistent(trans,
-                       "nonexistent indirect extent at %llu while marking\n  %s",
-                       *idx, buf.buf);
-               ret = -EIO;
-               goto err;
-       }
-
-       if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
-               bch2_bkey_val_to_text(&buf, c, p.s_c);
-               bch2_trans_inconsistent(trans,
-                       "indirect extent refcount underflow at %llu while marking\n  %s",
-                       *idx, buf.buf);
-               ret = -EIO;
-               goto err;
-       }
-
-       if (flags & BTREE_TRIGGER_INSERT) {
-               struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
-               u64 pad;
-
-               pad = max_t(s64, le32_to_cpu(v->front_pad),
-                           le64_to_cpu(v->idx) - bkey_start_offset(&k->k));
-               BUG_ON(pad > U32_MAX);
-               v->front_pad = cpu_to_le32(pad);
-
-               pad = max_t(s64, le32_to_cpu(v->back_pad),
-                           k->k.p.offset - p.k->size - le64_to_cpu(v->idx));
-               BUG_ON(pad > U32_MAX);
-               v->back_pad = cpu_to_le32(pad);
-       }
-
-       le64_add_cpu(refcount, add);
-
-       bch2_btree_iter_set_pos_to_extent_start(&iter);
-       ret = bch2_trans_update(trans, &iter, k, 0);
-       if (ret)
-               goto err;
-
-       *idx = k->k.p.offset;
-err:
-       bch2_trans_iter_exit(trans, &iter);
-       printbuf_exit(&buf);
-       return ret;
-}
-
-static int __trans_mark_reflink_p(struct btree_trans *trans,
-                               enum btree_id btree_id, unsigned level,
-                               struct bkey_s_c k, unsigned flags)
-{
-       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-       u64 idx, end_idx;
-       int ret = 0;
-
-       idx     = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
-       end_idx = le64_to_cpu(p.v->idx) + p.k->size +
-               le32_to_cpu(p.v->back_pad);
-
-       while (idx < end_idx && !ret)
-               ret = trans_mark_reflink_p_segment(trans, p, &idx, flags);
-       return ret;
-}
-
-int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-                             enum btree_id btree_id, unsigned level,
-                             struct bkey_s_c old,
-                             struct bkey_i *new,
-                             unsigned flags)
-{
-       if (flags & BTREE_TRIGGER_INSERT) {
-               struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v;
-
-               v->front_pad = v->back_pad = 0;
-       }
-
-       return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags);
-}
-
 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                                    struct bch_dev *ca, size_t b,
                                    enum bch_data_type type,
index bc088673009bfce4f9af14506d9f16382338a4f4..379101d7e585b414de3757fc161a4c4f407b14a7 100644 (file)
@@ -335,14 +335,10 @@ int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned,
                     struct bkey_s_c, struct bkey_s_c, unsigned);
 int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned,
                          struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
-                       struct bkey_s_c, struct bkey_s_c, unsigned);
 
 int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
 int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
 int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-
 #define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
 ({                                                                                             \
        int ret = 0;                                                                            \
index ba0436ae6b05833869fe9989585ffe74f13f82e4..a042e07cefd91e09bac1b275dcfaf72b4fb9f233 100644 (file)
@@ -29,6 +29,63 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long
        return copy_to_user(to, from, n) ? -EFAULT : 0;
 }
 
+struct thread_with_file {
+       struct task_struct      *task;
+       int                     ret;
+};
+
+static void thread_with_file_exit(struct thread_with_file *thr)
+{
+       if (thr->task) {
+               kthread_stop(thr->task);
+               put_task_struct(thr->task);
+       }
+}
+
+static int run_thread_with_file(struct thread_with_file *thr,
+                               const struct file_operations *fops,
+                               int (*fn)(void *), const char *fmt, ...)
+{
+       va_list args;
+       struct file *file = NULL;
+       int ret, fd = -1;
+       struct printbuf name = PRINTBUF;
+       unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
+
+       va_start(args, fmt);
+       prt_vprintf(&name, fmt, args);
+       va_end(args);
+
+       thr->ret = 0;
+       thr->task = kthread_create(fn, thr, name.buf);
+       ret = PTR_ERR_OR_ZERO(thr->task);
+       if (ret)
+               goto err;
+
+       ret = get_unused_fd_flags(fd_flags);
+       if (ret < 0)
+               goto err_stop_task;
+       fd = ret;
+
+       file = anon_inode_getfile(name.buf, fops, thr, fd_flags);
+       ret = PTR_ERR_OR_ZERO(file);
+       if (ret)
+               goto err_put_fd;
+
+       fd_install(fd, file);
+       get_task_struct(thr->task);
+       wake_up_process(thr->task);
+       printbuf_exit(&name);
+       return fd;
+err_put_fd:
+       put_unused_fd(fd);
+err_stop_task:
+       kthread_stop(thr->task);
+err:
+       printbuf_exit(&name);
+       return ret;
+}
+
 /* returns with ref on ca->ref */
 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
                                          unsigned flags)
@@ -138,8 +195,177 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
 }
 #endif
 
+struct fsck_thread {
+       struct thread_with_file thr;
+       struct printbuf         buf;
+       struct bch_fs           *c;
+       char                    **devs;
+       size_t                  nr_devs;
+       struct bch_opts         opts;
+
+       struct log_output       output;
+       DARRAY(char)            output2;
+};
+
+static void bch2_fsck_thread_free(struct fsck_thread *thr)
+{
+       thread_with_file_exit(&thr->thr);
+       if (thr->devs)
+               for (size_t i = 0; i < thr->nr_devs; i++)
+                       kfree(thr->devs[i]);
+       darray_exit(&thr->output2);
+       printbuf_exit(&thr->output.buf);
+       kfree(thr->devs);
+       kfree(thr);
+}
+
+static int bch2_fsck_thread_release(struct inode *inode, struct file *file)
+{
+       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
+
+       bch2_fsck_thread_free(thr);
+       return 0;
+}
+
+static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf,
+                                    size_t len, loff_t *ppos)
+{
+       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
+       size_t copied = 0, b;
+       int ret = 0;
+
+       ret = wait_event_interruptible(thr->output.wait,
+                       thr->output.buf.pos || thr->output2.nr);
+       if (ret)
+               return ret;
+
+       while (len) {
+               ret = darray_make_room(&thr->output2, thr->output.buf.pos);
+               if (ret)
+                       break;
+
+               spin_lock_irq(&thr->output.lock);
+               b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos);
+
+               memcpy(&darray_top(thr->output2), thr->output.buf.buf, b);
+               memmove(thr->output.buf.buf,
+                       thr->output.buf.buf + b,
+                       thr->output.buf.pos - b);
+
+               thr->output2.nr += b;
+               thr->output.buf.pos -= b;
+               spin_unlock_irq(&thr->output.lock);
+
+               b = min(len, thr->output2.nr);
+               if (!b)
+                       break;
+
+               b -= copy_to_user(buf, thr->output2.data, b);
+               if (!b) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               copied  += b;
+               buf     += b;
+               len     -= b;
+
+               memmove(thr->output2.data,
+                       thr->output2.data + b,
+                       thr->output2.nr - b);
+               thr->output2.nr -= b;
+       }
+
+       return copied ?: ret;
+}
+
+static const struct file_operations fsck_thread_ops = {
+       .release        = bch2_fsck_thread_release,
+       .read           = bch2_fsck_thread_read,
+       .llseek         = no_llseek,
+};
+
+static int bch2_fsck_offline_thread_fn(void *arg)
+{
+       struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
+       struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
+
+       thr->thr.ret = PTR_ERR_OR_ZERO(c);
+       if (!thr->thr.ret)
+               bch2_fs_stop(c);
+       return 0;
+}
+
+static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
+{
+       struct bch_ioctl_fsck_offline arg;
+       struct fsck_thread *thr = NULL;
+       u64 *devs = NULL;
+       long ret = 0;
+
+       if (copy_from_user(&arg, user_arg, sizeof(arg)))
+               return -EFAULT;
+
+       if (arg.flags)
+               return -EINVAL;
+
+       if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
+           !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
+           !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       thr->nr_devs = arg.nr_devs;
+       thr->output.buf = PRINTBUF;
+       thr->output.buf.atomic++;
+       spin_lock_init(&thr->output.lock);
+       init_waitqueue_head(&thr->output.wait);
+       darray_init(&thr->output2);
+
+       if (copy_from_user(devs, &user_arg->devs[0], sizeof(user_arg->devs[0]) * arg.nr_devs)) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       for (size_t i = 0; i < arg.nr_devs; i++) {
+               thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
+               ret = PTR_ERR_OR_ZERO(thr->devs[i]);
+               if (ret)
+                       goto err;
+       }
+
+       if (arg.opts) {
+               char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
+
+               ret =   PTR_ERR_OR_ZERO(optstr) ?:
+                       bch2_parse_mount_opts(NULL, &thr->opts, optstr);
+               kfree(optstr);
+
+               if (ret)
+                       goto err;
+       }
+
+       opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output);
+
+       ret = run_thread_with_file(&thr->thr,
+                                  &fsck_thread_ops,
+                                  bch2_fsck_offline_thread_fn,
+                                  "bch-fsck");
+err:
+       if (ret < 0) {
+               if (thr)
+                       bch2_fsck_thread_free(thr);
+               pr_err("ret %s", bch2_err_str(ret));
+       }
+       kfree(devs);
+       return ret;
+}
+
 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
 {
+       long ret;
+
        switch (cmd) {
 #if 0
        case BCH_IOCTL_ASSEMBLE:
@@ -147,9 +373,18 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
        case BCH_IOCTL_INCREMENTAL:
                return bch2_ioctl_incremental(arg);
 #endif
+       case BCH_IOCTL_FSCK_OFFLINE: {
+               ret = bch2_ioctl_fsck_offline(arg);
+               break;
+       }
        default:
-               return -ENOTTY;
+               ret = -ENOTTY;
+               break;
        }
+
+       if (ret < 0)
+               ret = bch2_err_class(ret);
+       return ret;
 }
 
 static long bch2_ioctl_query_uuid(struct bch_fs *c,
@@ -299,31 +534,27 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c,
 }
 
 struct bch_data_ctx {
+       struct thread_with_file         thr;
+
        struct bch_fs                   *c;
        struct bch_ioctl_data           arg;
        struct bch_move_stats           stats;
-
-       int                             ret;
-
-       struct task_struct              *thread;
 };
 
 static int bch2_data_thread(void *arg)
 {
-       struct bch_data_ctx *ctx = arg;
-
-       ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
+       struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
 
+       ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
        ctx->stats.data_type = U8_MAX;
        return 0;
 }
 
 static int bch2_data_job_release(struct inode *inode, struct file *file)
 {
-       struct bch_data_ctx *ctx = file->private_data;
+       struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
 
-       kthread_stop(ctx->thread);
-       put_task_struct(ctx->thread);
+       thread_with_file_exit(&ctx->thr);
        kfree(ctx);
        return 0;
 }
@@ -331,7 +562,7 @@ static int bch2_data_job_release(struct inode *inode, struct file *file)
 static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
                                  size_t len, loff_t *ppos)
 {
-       struct bch_data_ctx *ctx = file->private_data;
+       struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
        struct bch_fs *c = ctx->c;
        struct bch_ioctl_data_event e = {
                .type                   = BCH_DATA_EVENT_PROGRESS,
@@ -357,10 +588,8 @@ static const struct file_operations bcachefs_data_ops = {
 static long bch2_ioctl_data(struct bch_fs *c,
                            struct bch_ioctl_data arg)
 {
-       struct bch_data_ctx *ctx = NULL;
-       struct file *file = NULL;
-       unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
-       int ret, fd = -1;
+       struct bch_data_ctx *ctx;
+       int ret;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -375,36 +604,12 @@ static long bch2_ioctl_data(struct bch_fs *c,
        ctx->c = c;
        ctx->arg = arg;
 
-       ctx->thread = kthread_create(bch2_data_thread, ctx,
-                                    "bch-data/%s", c->name);
-       if (IS_ERR(ctx->thread)) {
-               ret = PTR_ERR(ctx->thread);
-               goto err;
-       }
-
-       ret = get_unused_fd_flags(flags);
+       ret = run_thread_with_file(&ctx->thr,
+                                  &bcachefs_data_ops,
+                                  bch2_data_thread,
+                                  "bch-data/%s", c->name);
        if (ret < 0)
-               goto err;
-       fd = ret;
-
-       file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags);
-       if (IS_ERR(file)) {
-               ret = PTR_ERR(file);
-               goto err;
-       }
-
-       fd_install(fd, file);
-
-       get_task_struct(ctx->thread);
-       wake_up_process(ctx->thread);
-
-       return fd;
-err:
-       if (fd >= 0)
-               put_unused_fd(fd);
-       if (!IS_ERR_OR_NULL(ctx->thread))
-               kthread_stop(ctx->thread);
-       kfree(ctx);
+               kfree(ctx);
        return ret;
 }
 
@@ -690,6 +895,50 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
        return ret;
 }
 
+static int bch2_fsck_online_thread_fn(void *arg)
+{
+       struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
+       struct bch_fs *c = thr->c;
+#if 0
+       struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
+
+       thr->thr.ret = PTR_ERR_OR_ZERO(c);
+       if (!thr->thr.ret)
+               bch2_fs_stop(c);
+#endif
+       return 0;
+}
+
+static long bch2_ioctl_fsck_online(struct bch_fs *c,
+                                  struct bch_ioctl_fsck_online arg)
+{
+       struct fsck_thread *thr = NULL;
+       long ret = 0;
+
+       if (arg.flags)
+               return -EINVAL;
+
+       thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+       if (!thr)
+               return -ENOMEM;
+
+       thr->c = c;
+       thr->output.buf = PRINTBUF;
+       thr->output.buf.atomic++;
+       spin_lock_init(&thr->output.lock);
+       init_waitqueue_head(&thr->output.wait);
+       darray_init(&thr->output2);
+
+       ret = run_thread_with_file(&thr->thr,
+                                  &fsck_thread_ops,
+                                  bch2_fsck_online_thread_fn,
+                                  "bch-fsck");
+       bch_err_fn(c, ret);
+       if (ret < 0)
+               bch2_fsck_thread_free(thr);
+       return ret;
+}
+
 #define BCH_IOCTL(_name, _argtype)                                     \
 do {                                                                   \
        _argtype i;                                                     \
@@ -745,7 +994,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
                BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
        case BCH_IOCTL_DISK_RESIZE_JOURNAL:
                BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
-
+       case BCH_IOCTL_FSCK_ONLINE:
+               BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
        default:
                return -ENOTTY;
        }
index 51af8ea230edbf997756e51ac37cfe3cfc158341..33df8cf86bd8f83bbf42d45944d0632da404fd71 100644 (file)
@@ -572,10 +572,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
        ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
                                                 c->opts.encoded_extent_max);
 
-       /*
-        * ZSTD is lying: if we allocate the size of the workspace it says it
-        * requires, it returns memory allocation errors
-        */
        c->zstd_workspace_size = zstd_cctx_workspace_bound(&params.cParams);
 
        struct {
index 0d58a8727a7b25f6c3ecbb194f14e6612be096dd..22d4bb78f87426d5514ddce4333ffc38df6dd780 100644 (file)
@@ -485,7 +485,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
         * we aren't using the extent overwrite path to delete, we're
         * just using the normal key deletion path:
         */
-       if (bkey_deleted(&n->k))
+       if (bkey_deleted(&n->k) && !(iter->flags & BTREE_ITER_IS_EXTENTS))
                n->k.size = 0;
 
        return bch2_trans_relock(trans) ?:
@@ -605,7 +605,7 @@ int bch2_data_update_init(struct btree_trans *trans,
                m->data_opts.rewrite_ptrs = 0;
                /* if iter == NULL, it's just a promote */
                if (iter)
-                       ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+                       ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
                goto done;
        }
 
index e3e2be7922b926bc8c1407db7638daa1103f7365..87c13f137db6ad7c766883c62aba74a601d79c7c 100644 (file)
        x(EINVAL,                       insufficient_devices_to_start)          \
        x(EINVAL,                       invalid)                                \
        x(EINVAL,                       internal_fsck_err)                      \
+       x(EINVAL,                       opt_parse_error)                        \
        x(EROFS,                        erofs_trans_commit)                     \
        x(EROFS,                        erofs_no_writes)                        \
        x(EROFS,                        erofs_journal_err)                      \
index 5a39bcb597a33d42826a16a98da394de3fe23660..561fc1da4e3a6b4a63a250d7aeb54db411cd40ac 100644 (file)
@@ -287,34 +287,26 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
 
        bch_notice(c, "shutdown by ioctl type %u", flags);
 
-       down_write(&c->vfs_sb->s_umount);
-
        switch (flags) {
        case FSOP_GOING_FLAGS_DEFAULT:
                ret = freeze_bdev(c->vfs_sb->s_bdev);
                if (ret)
-                       goto err;
-
+                       break;
                bch2_journal_flush(&c->journal);
-               c->vfs_sb->s_flags |= SB_RDONLY;
                bch2_fs_emergency_read_only(c);
                thaw_bdev(c->vfs_sb->s_bdev);
                break;
-
        case FSOP_GOING_FLAGS_LOGFLUSH:
                bch2_journal_flush(&c->journal);
                fallthrough;
-
        case FSOP_GOING_FLAGS_NOLOGFLUSH:
-               c->vfs_sb->s_flags |= SB_RDONLY;
                bch2_fs_emergency_read_only(c);
                break;
        default:
                ret = -EINVAL;
                break;
        }
-err:
-       up_write(&c->vfs_sb->s_umount);
+
        return ret;
 }
 
@@ -341,6 +333,10 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
             (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
                return -EINVAL;
 
+       if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
+           !arg.src_ptr)
+               return -EOPNOTSUPP;
+
        if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
                create_flags |= BCH_CREATE_SNAPSHOT;
 
index 8dd4046cca41ef23b061f4aeac1892f82a504d65..8e6f230eac38155bf5d048367d6ebde35a4a15bd 100644 (file)
@@ -279,14 +279,14 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
                if (err)
                        prt_printf(err, "%s: not a multiple of 512",
                               opt->attr.name);
-               return -EINVAL;
+               return -BCH_ERR_opt_parse_error;
        }
 
        if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) {
                if (err)
                        prt_printf(err, "%s: must be a power of two",
                               opt->attr.name);
-               return -EINVAL;
+               return -BCH_ERR_opt_parse_error;
        }
 
        if (opt->fn.validate)
index 8526f177450a56900c907a2e4cba3950fe5f9e00..91026dfb8c83483f918f1881a46d4409e2134ef8 100644 (file)
@@ -419,6 +419,11 @@ enum fsck_err_opts {
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Allocate the buckets_nouse bitmap")            \
+       x(log_output,                   u64,                            \
+         0,                                                            \
+         OPT_UINT(0, S64_MAX),                                         \
+         BCH2_NO_SB_OPT,               false,                          \
+         NULL,         "Allocate the buckets_nouse bitmap")            \
        x(project,                      u8,                             \
          OPT_INODE,                                                    \
          OPT_BOOL(),                                                   \
index 98f1454c23fb6c6307682082c27ecf072d392798..69b49845a730e612062a1e3b8aa3ed210480a466 100644 (file)
@@ -171,10 +171,12 @@ static int bch2_journal_replay(struct bch_fs *c)
 
                struct journal_key *k = keys->d + i;
 
-               ret = commit_do(trans, NULL, NULL,
-                               BCH_TRANS_COMMIT_no_enospc|
-                               BCH_TRANS_COMMIT_journal_reclaim|
-                               (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
+               /* Skip fastpath if we're low on space in the journal */
+               ret = c->journal.watermark ? -1 :
+                       commit_do(trans, NULL, NULL,
+                                 BCH_TRANS_COMMIT_no_enospc|
+                                 BCH_TRANS_COMMIT_journal_reclaim|
+                                 (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
                             bch2_journal_replay_key(trans, k));
                BUG_ON(!ret && !k->overwritten);
                if (ret) {
@@ -657,13 +659,13 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
                struct recovery_pass_fn *p = recovery_pass_fns + pass;
 
                if (!(p->when & PASS_SILENT))
-                       printk(KERN_INFO bch2_log_msg(c, "%s..."),
-                              bch2_recovery_passes[pass]);
+                       bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
+                                  bch2_recovery_passes[pass]);
                ret = p->fn(c);
                if (ret)
                        return ret;
                if (!(p->when & PASS_SILENT))
-                       printk(KERN_CONT " done\n");
+                       bch2_print(c, KERN_CONT " done\n");
 
                c->recovery_passes_complete |= BIT_ULL(pass);
        }
index 07ddf3e85ee454577f4ef6354e25ab7a89671161..1d56470e1849e24cf76e09215e4c31fcfac2d5ad 100644 (file)
@@ -3,6 +3,7 @@
 #include "bkey_buf.h"
 #include "btree_update.h"
 #include "buckets.h"
+#include "error.h"
 #include "extents.h"
 #include "inode.h"
 #include "io_misc.h"
@@ -73,6 +74,206 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
        return true;
 }
 
+static int trans_mark_reflink_p_segment(struct btree_trans *trans,
+                       struct bkey_s_c_reflink_p p,
+                       u64 *idx, unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_i *k;
+       __le64 *refcount;
+       int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
+       struct printbuf buf = PRINTBUF;
+       int ret;
+
+       k = bch2_bkey_get_mut_noupdate(trans, &iter,
+                       BTREE_ID_reflink, POS(0, *idx),
+                       BTREE_ITER_WITH_UPDATES);
+       ret = PTR_ERR_OR_ZERO(k);
+       if (ret)
+               goto err;
+
+       refcount = bkey_refcount(k);
+       if (!refcount) {
+               bch2_bkey_val_to_text(&buf, c, p.s_c);
+               bch2_trans_inconsistent(trans,
+                       "nonexistent indirect extent at %llu while marking\n  %s",
+                       *idx, buf.buf);
+               ret = -EIO;
+               goto err;
+       }
+
+       if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
+               bch2_bkey_val_to_text(&buf, c, p.s_c);
+               bch2_trans_inconsistent(trans,
+                       "indirect extent refcount underflow at %llu while marking\n  %s",
+                       *idx, buf.buf);
+               ret = -EIO;
+               goto err;
+       }
+
+       if (flags & BTREE_TRIGGER_INSERT) {
+               struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+               u64 pad;
+
+               pad = max_t(s64, le32_to_cpu(v->front_pad),
+                           le64_to_cpu(v->idx) - bkey_start_offset(&k->k));
+               BUG_ON(pad > U32_MAX);
+               v->front_pad = cpu_to_le32(pad);
+
+               pad = max_t(s64, le32_to_cpu(v->back_pad),
+                           k->k.p.offset - p.k->size - le64_to_cpu(v->idx));
+               BUG_ON(pad > U32_MAX);
+               v->back_pad = cpu_to_le32(pad);
+       }
+
+       le64_add_cpu(refcount, add);
+
+       bch2_btree_iter_set_pos_to_extent_start(&iter);
+       ret = bch2_trans_update(trans, &iter, k, 0);
+       if (ret)
+               goto err;
+
+       *idx = k->k.p.offset;
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       printbuf_exit(&buf);
+       return ret;
+}
+
+static int __trans_mark_reflink_p(struct btree_trans *trans,
+                               enum btree_id btree_id, unsigned level,
+                               struct bkey_s_c k, unsigned flags)
+{
+       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+       u64 idx, end_idx;
+       int ret = 0;
+
+       idx     = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+       end_idx = le64_to_cpu(p.v->idx) + p.k->size +
+               le32_to_cpu(p.v->back_pad);
+
+       while (idx < end_idx && !ret)
+               ret = trans_mark_reflink_p_segment(trans, p, &idx, flags);
+       return ret;
+}
+
+int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                             enum btree_id btree_id, unsigned level,
+                             struct bkey_s_c old,
+                             struct bkey_i *new,
+                             unsigned flags)
+{
+       if (flags & BTREE_TRIGGER_INSERT) {
+               struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v;
+
+               v->front_pad = v->back_pad = 0;
+       }
+
+       return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags);
+}
+
+static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
+                                struct bkey_s_c_reflink_p p,
+                                u64 start, u64 end,
+                                u64 *idx, unsigned flags, size_t r_idx)
+{
+       struct bch_fs *c = trans->c;
+       struct reflink_gc *r;
+       int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
+       u64 next_idx = end;
+       s64 ret = 0;
+       struct printbuf buf = PRINTBUF;
+
+       if (r_idx >= c->reflink_gc_nr)
+               goto not_found;
+
+       r = genradix_ptr(&c->reflink_gc_table, r_idx);
+       next_idx = min(next_idx, r->offset - r->size);
+       if (*idx < next_idx)
+               goto not_found;
+
+       BUG_ON((s64) r->refcount + add < 0);
+
+       r->refcount += add;
+       *idx = r->offset;
+       return 0;
+not_found:
+       if (fsck_err(c, reflink_p_to_missing_reflink_v,
+                    "pointer to missing indirect extent\n"
+                    "  %s\n"
+                    "  missing range %llu-%llu",
+                    (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
+                    *idx, next_idx)) {
+               struct bkey_i_error *new;
+
+               new = bch2_trans_kmalloc(trans, sizeof(*new));
+               ret = PTR_ERR_OR_ZERO(new);
+               if (ret)
+                       goto err;
+
+               bkey_init(&new->k);
+               new->k.type     = KEY_TYPE_error;
+               new->k.p                = bkey_start_pos(p.k);
+               new->k.p.offset += *idx - start;
+               bch2_key_resize(&new->k, next_idx - *idx);
+               ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i,
+                                         BTREE_TRIGGER_NORUN);
+       }
+
+       *idx = next_idx;
+err:
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
+static int __mark_reflink_p(struct btree_trans *trans,
+                           enum btree_id btree_id, unsigned level,
+                           struct bkey_s_c k, unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+       struct reflink_gc *ref;
+       size_t l, r, m;
+       u64 idx = le64_to_cpu(p.v->idx), start = idx;
+       u64 end = le64_to_cpu(p.v->idx) + p.k->size;
+       int ret = 0;
+
+       BUG_ON(!(flags & BTREE_TRIGGER_GC));
+
+       if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) {
+               idx -= le32_to_cpu(p.v->front_pad);
+               end += le32_to_cpu(p.v->back_pad);
+       }
+
+       l = 0;
+       r = c->reflink_gc_nr;
+       while (l < r) {
+               m = l + (r - l) / 2;
+
+               ref = genradix_ptr(&c->reflink_gc_table, m);
+               if (ref->offset <= idx)
+                       l = m + 1;
+               else
+                       r = m;
+       }
+
+       while (idx < end && !ret)
+               ret = __bch2_mark_reflink_p(trans, p, start, end,
+                                           &idx, flags, l++);
+
+       return ret;
+}
+
+int bch2_mark_reflink_p(struct btree_trans *trans,
+                       enum btree_id btree_id, unsigned level,
+                       struct bkey_s_c old, struct bkey_s_c new,
+                       unsigned flags)
+{
+       return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags);
+}
+
 /* indirect extents */
 
 int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k,
@@ -121,6 +322,14 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans,
 {
        check_indirect_extent_deleting(new, &flags);
 
+       if (old.k->type == KEY_TYPE_reflink_v &&
+           new->k.type == KEY_TYPE_reflink_v &&
+           old.k->u64s == new->k.u64s &&
+           !memcmp(bkey_s_c_to_reflink_v(old).v->start,
+                   bkey_i_to_reflink_v(new)->v.start,
+                   bkey_val_bytes(&new->k) - 8))
+               return 0;
+
        return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
 }
 
index 8ccf3f9c4939eed45d9d9dc231bf5632506de836..6cc9c4a77265988081c21aa006ee63a64bfff521 100644 (file)
@@ -9,6 +9,10 @@ int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c,
 void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
 bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
+                             struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
+                       struct bkey_s_c, struct bkey_s_c, unsigned);
 
 #define bch2_bkey_ops_reflink_p ((struct bkey_ops) {           \
        .key_invalid    = bch2_reflink_p_invalid,               \
index e7f186b45df103ad51662a1497efb92184a3ba61..3abccdbfacbe3e2873eb3225cb61b17030ff5bb8 100644 (file)
@@ -80,6 +80,25 @@ const char * const bch2_fs_flag_strs[] = {
        NULL
 };
 
+void __bch2_print(struct bch_fs *c, const char *fmt, ...)
+{
+       va_list args;
+
+       va_start(args, fmt);
+       if (likely(!c->output)) {
+               vprintk(fmt, args);
+       } else {
+               unsigned long flags;
+
+               spin_lock_irqsave(&c->output->lock, flags);
+               prt_vprintf(&c->output->buf, fmt, args);
+               spin_unlock_irqrestore(&c->output->lock, flags);
+
+               wake_up(&c->output->wait);
+       }
+       va_end(args);
+}
+
 #define KTYPE(type)                                                    \
 static const struct attribute_group type ## _group = {                 \
        .attrs = type ## _files                                         \
@@ -703,6 +722,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                goto out;
        }
 
+       c->output = (void *)(unsigned long) opts.log_output;
+
        __module_get(THIS_MODULE);
 
        closure_init(&c->cl, NULL);
index 4a7c93bcf2f4929ae2d172a46a4313f07a854a6e..1b82a3a93d14cb082f546b96c772462ee772a5b4 100644 (file)
@@ -278,8 +278,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
                if (!btree_type_has_ptrs(id))
                        continue;
 
-               for_each_btree_key(trans, iter, id, POS_MIN,
-                                  BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+               ret = for_each_btree_key2(trans, iter, id, POS_MIN,
+                                         BTREE_ITER_ALL_SNAPSHOTS, k, ({
                        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
                        struct bch_extent_crc_unpacked crc;
                        const union bch_extent_entry *entry;
@@ -305,8 +305,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
                                s[t].sectors_compressed += k.k->size;
                                s[t].sectors_uncompressed += k.k->size;
                        }
-               }
-               bch2_trans_iter_exit(trans, &iter);
+                       0;
+               }));
        }
 
        bch2_trans_put(trans);
index 6e2ad6f3db980d7d116c2800a2bdc37acc172e24..cfa7ee780fd4cab217b8749443e544b716318f77 100644 (file)
@@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(trans_str,
                  __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str))
 );
 
-DECLARE_EVENT_CLASS(btree_node,
+DECLARE_EVENT_CLASS(btree_node_nofs,
        TP_PROTO(struct bch_fs *c, struct btree *b),
        TP_ARGS(c, b),
 
@@ -97,6 +97,33 @@ DECLARE_EVENT_CLASS(btree_node,
                  __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
 );
 
+DECLARE_EVENT_CLASS(btree_node,
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b),
+
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __array(char,           trans_fn, 32            )
+               __field(u8,             level                   )
+               __field(u8,             btree_id                )
+               TRACE_BPOS_entries(pos)
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = trans->c->dev;
+               strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+               __entry->level          = b->c.level;
+               __entry->btree_id       = b->c.btree_id;
+               TRACE_BPOS_assign(pos, b->key.k.p);
+       ),
+
+       TP_printk("%d,%d %s %u %s %llu:%llu:%u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn,
+                 __entry->level,
+                 bch2_btree_id_str(__entry->btree_id),
+                 __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
+);
+
 DECLARE_EVENT_CLASS(bch_fs,
        TP_PROTO(struct bch_fs *c),
        TP_ARGS(c),
@@ -112,6 +139,23 @@ DECLARE_EVENT_CLASS(bch_fs,
        TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
 );
 
+DECLARE_EVENT_CLASS(btree_trans,
+       TP_PROTO(struct btree_trans *trans),
+       TP_ARGS(trans),
+
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __array(char,           trans_fn, 32            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = trans->c->dev;
+               strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+       ),
+
+       TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn)
+);
+
 DECLARE_EVENT_CLASS(bio,
        TP_PROTO(struct bio *bio),
        TP_ARGS(bio),
@@ -330,36 +374,36 @@ TRACE_EVENT(btree_cache_scan,
                  __entry->nr_to_scan, __entry->can_free, __entry->ret)
 );
 
-DEFINE_EVENT(btree_node, btree_cache_reap,
+DEFINE_EVENT(btree_node_nofs, btree_cache_reap,
        TP_PROTO(struct bch_fs *c, struct btree *b),
        TP_ARGS(c, b)
 );
 
-DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock_fail,
-       TP_PROTO(struct bch_fs *c),
-       TP_ARGS(c)
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock_fail,
+       TP_PROTO(struct btree_trans *trans),
+       TP_ARGS(trans)
 );
 
-DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock,
-       TP_PROTO(struct bch_fs *c),
-       TP_ARGS(c)
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock,
+       TP_PROTO(struct btree_trans *trans),
+       TP_ARGS(trans)
 );
 
-DEFINE_EVENT(bch_fs, btree_cache_cannibalize,
-       TP_PROTO(struct bch_fs *c),
-       TP_ARGS(c)
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize,
+       TP_PROTO(struct btree_trans *trans),
+       TP_ARGS(trans)
 );
 
-DEFINE_EVENT(bch_fs, btree_cache_cannibalize_unlock,
-       TP_PROTO(struct bch_fs *c),
-       TP_ARGS(c)
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize_unlock,
+       TP_PROTO(struct btree_trans *trans),
+       TP_ARGS(trans)
 );
 
 /* Btree */
 
 DEFINE_EVENT(btree_node, btree_node_read,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 TRACE_EVENT(btree_node_write,
@@ -383,13 +427,13 @@ TRACE_EVENT(btree_node_write,
 );
 
 DEFINE_EVENT(btree_node, btree_node_alloc,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 DEFINE_EVENT(btree_node, btree_node_free,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 TRACE_EVENT(btree_reserve_get_fail,
@@ -421,28 +465,28 @@ TRACE_EVENT(btree_reserve_get_fail,
 );
 
 DEFINE_EVENT(btree_node, btree_node_compact,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 DEFINE_EVENT(btree_node, btree_node_merge,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 DEFINE_EVENT(btree_node, btree_node_split,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 DEFINE_EVENT(btree_node, btree_node_rewrite,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 DEFINE_EVENT(btree_node, btree_node_set_root,
-       TP_PROTO(struct bch_fs *c, struct btree *b),
-       TP_ARGS(c, b)
+       TP_PROTO(struct btree_trans *trans, struct btree *b),
+       TP_ARGS(trans, b)
 );
 
 TRACE_EVENT(btree_path_relock_fail,